aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/Hexagon
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-05-27 18:44:32 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-05-27 18:44:32 +0000
commit5a5ac124e1efaf208671f01c46edb15f29ed2a0b (patch)
treea6140557876943cdd800ee997c9317283394b22c /lib/Target/Hexagon
parentf03b5bed27d0d2eafd68562ce14f8b5e3f1f0801 (diff)
Vendor import of llvm trunk r238337:vendor/llvm/llvm-trunk-r238337
Notes
Notes: svn path=/vendor/llvm/dist/; revision=283625 svn path=/vendor/llvm/llvm-trunk-r238337/; revision=283626; tag=vendor/llvm/llvm-trunk-r238337
Diffstat (limited to 'lib/Target/Hexagon')
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt3
-rw-r--r--lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp40
-rw-r--r--lib/Target/Hexagon/Hexagon.h19
-rw-r--r--lib/Target/Hexagon/Hexagon.td42
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp41
-rwxr-xr-xlib/Target/Hexagon/HexagonAsmPrinter.h9
-rw-r--r--lib/Target/Hexagon/HexagonCFGOptimizer.cpp16
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.cpp206
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.h187
-rw-r--r--lib/Target/Hexagon/HexagonCopyToCombine.cpp207
-rw-r--r--lib/Target/Hexagon/HexagonExpandCondsets.cpp1348
-rw-r--r--lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp195
-rw-r--r--lib/Target/Hexagon/HexagonFixupHwLoops.cpp136
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp1416
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h99
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp953
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp1821
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp1778
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h180
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td95
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV4.td94
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp866
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h43
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td2835
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV3.td142
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td2848
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV5.td1095
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoVector.td483
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td4530
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsDerived.td4
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV3.td51
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV4.td578
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV5.td506
-rw-r--r--lib/Target/Hexagon/HexagonMCInstLower.cpp13
-rw-r--r--lib/Target/Hexagon/HexagonMachineFunctionInfo.h9
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.h8
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp56
-rw-r--r--lib/Target/Hexagon/HexagonOperands.td505
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp9
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp323
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h49
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.td8
-rw-r--r--lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp100
-rw-r--r--lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp235
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp24
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h26
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp51
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h3
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp17
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.h11
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp166
-rw-r--r--lib/Target/Hexagon/HexagonVarargsCallingConvention.h149
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp9
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h137
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp53
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h15
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp467
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h18
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp223
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h108
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp248
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h122
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp39
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h8
-rw-r--r--lib/Target/Hexagon/Makefile18
69 files changed, 13813 insertions, 12301 deletions
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index af7914f30366..758ccc741007 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -13,9 +13,9 @@ add_public_tablegen_target(HexagonCommonTableGen)
add_llvm_target(HexagonCodeGen
HexagonAsmPrinter.cpp
- HexagonCallingConvLower.cpp
HexagonCFGOptimizer.cpp
HexagonCopyToCombine.cpp
+ HexagonExpandCondsets.cpp
HexagonExpandPredSpillCode.cpp
HexagonFixupHwLoops.cpp
HexagonFrameLowering.cpp
@@ -32,7 +32,6 @@ add_llvm_target(HexagonCodeGen
HexagonRemoveSZExtArgs.cpp
HexagonSelectionDAGInfo.cpp
HexagonSplitConst32AndConst64.cpp
- HexagonSplitTFRCondSets.cpp
HexagonSubtarget.cpp
HexagonTargetMachine.cpp
HexagonTargetObjectFile.cpp
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 44f9d93e8fc7..a60d1e471944 100644
--- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/HexagonBaseInfo.h"
-#include "MCTargetDesc/HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
@@ -51,6 +51,8 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, void const *Decoder);
static const uint16_t IntRegDecoderTable[] = {
Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
@@ -67,7 +69,7 @@ Hexagon::P2, Hexagon::P3 };
static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
const uint16_t Table[], size_t Size) {
if (RegNo < Size) {
- Inst.addOperand(MCOperand::CreateReg(Table[RegNo]));
+ Inst.addOperand(MCOperand::createReg(Table[RegNo]));
return MCDisassembler::Success;
}
else
@@ -81,7 +83,7 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Fail;
unsigned Register = IntRegDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return MCDisassembler::Success;
}
@@ -101,7 +103,31 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Fail;
unsigned Register = CtrlRegDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/, void const *Decoder) {
+ static const uint16_t CtrlReg64DecoderTable[] = {
+ Hexagon::C1_0, Hexagon::NoRegister,
+ Hexagon::C3_2, Hexagon::NoRegister,
+ Hexagon::NoRegister, Hexagon::NoRegister,
+ Hexagon::C7_6, Hexagon::NoRegister,
+ Hexagon::C9_8, Hexagon::NoRegister,
+ Hexagon::C11_10, Hexagon::NoRegister,
+ Hexagon::CS, Hexagon::NoRegister,
+ Hexagon::UPC, Hexagon::NoRegister
+ };
+
+ if (RegNo >= sizeof(CtrlReg64DecoderTable) / sizeof(CtrlReg64DecoderTable[0]))
+ return MCDisassembler::Fail;
+
+ if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister)
+ return MCDisassembler::Fail;
+
+ unsigned Register = CtrlReg64DecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
return MCDisassembler::Success;
}
@@ -118,7 +144,7 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
default:
return MCDisassembler::Fail;
}
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return MCDisassembler::Success;
}
@@ -143,7 +169,7 @@ static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Fail;
unsigned Register = PredRegDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
+ Inst.addOperand(MCOperand::createReg(Register));
return MCDisassembler::Success;
}
@@ -176,6 +202,6 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Remove parse bits.
insn &= ~static_cast<uint32_t>(HexagonII::InstParseBits::INST_PARSE_MASK);
DecodeStatus Result = decodeInstruction(DecoderTable32, MI, insn, Address, this, STI);
- HexagonMCInst::AppendImplicitOperands(MI);
+ HexagonMCInstrInfo::AppendImplicitOperands(MI);
return Result;
}
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index 64ae69c60e5d..dfe79f9ff7b0 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -21,26 +21,23 @@
namespace llvm {
class FunctionPass;
- class ModulePass;
- class TargetMachine;
- class MachineInstr;
- class HexagonMCInst;
class HexagonAsmPrinter;
class HexagonTargetMachine;
+ class MachineInstr;
+ class MCInst;
+ class ModulePass;
class raw_ostream;
+ class TargetMachine;
FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM);
FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM);
FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
- FunctionPass *createHexagonCFGOptimizer(const HexagonTargetMachine &TM);
+ FunctionPass *createHexagonCFGOptimizer();
- FunctionPass *createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM);
- FunctionPass *createHexagonSplitConst32AndConst64(
- const HexagonTargetMachine &TM);
- FunctionPass *createHexagonExpandPredSpillCode(
- const HexagonTargetMachine &TM);
+ FunctionPass *createHexagonSplitConst32AndConst64();
+ FunctionPass *createHexagonExpandPredSpillCode();
FunctionPass *createHexagonHardwareLoops();
FunctionPass *createHexagonPeephole();
FunctionPass *createHexagonFixupHwLoops();
@@ -58,7 +55,7 @@ namespace llvm {
TargetAsmBackend *createHexagonAsmBackend(const Target &,
const std::string &);
*/
- void HexagonLowerToMC(const MachineInstr *MI, HexagonMCInst &MCI,
+ void HexagonLowerToMC(MachineInstr const *MI, MCInst &MCI,
HexagonAsmPrinter &AP);
} // end namespace llvm;
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index 2068c6da4e48..53a687c337ec 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -21,31 +21,17 @@ include "llvm/Target/Target.td"
// Hexagon Subtarget features.
//===----------------------------------------------------------------------===//
-// Hexagon Archtectures
-def ArchV2 : SubtargetFeature<"v2", "HexagonArchVersion", "V2",
- "Hexagon v2">;
-def ArchV3 : SubtargetFeature<"v3", "HexagonArchVersion", "V3",
- "Hexagon v3">;
-def ArchV4 : SubtargetFeature<"v4", "HexagonArchVersion", "V4",
- "Hexagon v4">;
-def ArchV5 : SubtargetFeature<"v5", "HexagonArchVersion", "V5",
- "Hexagon v5">;
+// Hexagon Architectures
+def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Hexagon V4">;
+def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">;
//===----------------------------------------------------------------------===//
// Hexagon Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
-def HasV2T : Predicate<"Subtarget.hasV2TOps()">;
-def HasV2TOnly : Predicate<"Subtarget.hasV2TOpsOnly()">;
-def NoV2T : Predicate<"!Subtarget.hasV2TOps()">;
-def HasV3T : Predicate<"Subtarget.hasV3TOps()">;
-def HasV3TOnly : Predicate<"Subtarget.hasV3TOpsOnly()">;
-def NoV3T : Predicate<"!Subtarget.hasV3TOps()">;
-def HasV4T : Predicate<"Subtarget.hasV4TOps()">;
-def NoV4T : Predicate<"!Subtarget.hasV4TOps()">;
-def HasV5T : Predicate<"Subtarget.hasV5TOps()">;
-def NoV5T : Predicate<"!Subtarget.hasV5TOps()">;
-def UseMEMOP : Predicate<"Subtarget.useMemOps()">;
-def IEEERndNearV5T : Predicate<"Subtarget.modeIEEERndNear()">;
+def HasV5T : Predicate<"HST->hasV5TOps()">;
+def NoV5T : Predicate<"!HST->hasV5TOps()">;
+def UseMEMOP : Predicate<"HST->useMemOps()">;
+def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">;
//===----------------------------------------------------------------------===//
// Classes used for relation maps.
@@ -182,14 +168,6 @@ def getRegForm : InstrMapping {
let ValueCols = [["reg"]];
}
-def getRegShlForm : InstrMapping {
- let FilterClass = "ImmRegShl";
- let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
- let ColFields = ["InputType"];
- let KeyCol = ["imm"];
- let ValueCols = [["reg"]];
-}
-
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
//===----------------------------------------------------------------------===//
@@ -210,8 +188,10 @@ class Proc<string Name, SchedMachineModel Model,
list<SubtargetFeature> Features>
: ProcessorModel<Name, Model, Features>;
-def : Proc<"hexagonv4", HexagonModelV4, [ArchV2, ArchV3, ArchV4]>;
-def : Proc<"hexagonv5", HexagonModelV4, [ArchV2, ArchV3, ArchV4, ArchV5]>;
+def : Proc<"hexagonv4", HexagonModelV4,
+ [ArchV4]>;
+def : Proc<"hexagonv5", HexagonModelV4,
+ [ArchV4, ArchV5]>;
//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 50f2eca63693..e9491baf29ef 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -19,7 +19,7 @@
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
#include "MCTargetDesc/HexagonInstPrinter.h"
-#include "MCTargetDesc/HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -61,6 +61,10 @@ static cl::opt<bool> AlignCalls(
"hexagon-align-calls", cl::Hidden, cl::init(true),
cl::desc("Insert falign after call instruction for Hexagon target"));
+HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr) {}
+
void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNo);
@@ -195,44 +199,29 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned Size = BundleMIs.size();
assert((Size + IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!");
for (unsigned Index = 0; Index < Size; Index++) {
- HexagonMCInst MCI;
+ MCInst MCI;
HexagonLowerToMC(BundleMIs[Index], MCI, *this);
- HexagonMCInst::AppendImplicitOperands(MCI);
- MCI.setPacketBegin(Index == 0);
- MCI.setPacketEnd(Index == (Size - 1));
- EmitToStreamer(OutStreamer, MCI);
+ HexagonMCInstrInfo::AppendImplicitOperands(MCI);
+ HexagonMCInstrInfo::setPacketBegin(MCI, Index == 0);
+ HexagonMCInstrInfo::setPacketEnd(MCI, Index == (Size - 1));
+ EmitToStreamer(*OutStreamer, MCI);
}
}
else {
- HexagonMCInst MCI;
+ MCInst MCI;
HexagonLowerToMC(MI, MCI, *this);
- HexagonMCInst::AppendImplicitOperands(MCI);
+ HexagonMCInstrInfo::AppendImplicitOperands(MCI);
if (MI->getOpcode() == Hexagon::ENDLOOP0) {
- MCI.setPacketBegin(true);
- MCI.setPacketEnd(true);
+ HexagonMCInstrInfo::setPacketBegin(MCI, true);
+ HexagonMCInstrInfo::setPacketEnd(MCI, true);
}
- EmitToStreamer(OutStreamer, MCI);
+ EmitToStreamer(*OutStreamer, MCI);
}
return;
}
-static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI,
- const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
- if (SyntaxVariant == 0)
- return(new HexagonInstPrinter(MAI, MII, MRI));
- else
- return nullptr;
-}
-
extern "C" void LLVMInitializeHexagonAsmPrinter() {
RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget);
-
- TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
- createHexagonMCInstPrinter);
}
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h
index 5f4c162b0070..792fc8b7af3a 100755
--- a/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -25,9 +25,12 @@ namespace llvm {
const HexagonSubtarget *Subtarget;
public:
- explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) {
- Subtarget = &TM.getSubtarget<HexagonSubtarget>();
+ explicit HexagonAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer);
+
+ bool runOnMachineFunction(MachineFunction &Fn) override {
+ Subtarget = &Fn.getSubtarget<HexagonSubtarget>();
+ return AsmPrinter::runOnMachineFunction(Fn);
}
const char *getPassName() const override {
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index 307adad095c8..703e691e612f 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -37,15 +37,11 @@ namespace {
class HexagonCFGOptimizer : public MachineFunctionPass {
private:
- const HexagonTargetMachine& QTM;
- const HexagonSubtarget &QST;
-
void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*);
public:
static char ID;
- HexagonCFGOptimizer(const HexagonTargetMachine& TM)
- : MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
+ HexagonCFGOptimizer() : MachineFunctionPass(ID) {
initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry());
}
@@ -72,7 +68,8 @@ static bool IsUnconditionalJump(int Opc) {
void
HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
MachineBasicBlock* NewTarget) {
- const HexagonInstrInfo *QII = QTM.getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII =
+ MI->getParent()->getParent()->getSubtarget().getInstrInfo();
int NewOpcode = 0;
switch(MI->getOpcode()) {
case Hexagon::J2_jumpt:
@@ -95,13 +92,12 @@ HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
llvm_unreachable("Cannot handle this case");
}
- MI->setDesc(QII->get(NewOpcode));
+ MI->setDesc(TII->get(NewOpcode));
MI->getOperand(1).setMBB(NewTarget);
}
bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
-
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
MBBb != MBBe; ++MBBb) {
@@ -248,6 +244,6 @@ void llvm::initializeHexagonCFGOptimizerPass(PassRegistry &Registry) {
CALL_ONCE_INITIALIZATION(initializePassOnce)
}
-FunctionPass *llvm::createHexagonCFGOptimizer(const HexagonTargetMachine &TM) {
- return new HexagonCFGOptimizer(TM);
+FunctionPass *llvm::createHexagonCFGOptimizer() {
+ return new HexagonCFGOptimizer();
}
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
deleted file mode 100644
index 8d78409aa01d..000000000000
--- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-//===-- llvm/CallingConvLower.cpp - Calling Convention lowering -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Hexagon_CCState class, used for lowering and
-// implementing calling conventions. Adapted from the machine independent
-// version of the class (CCState) but this handles calls to varargs functions
-//
-//===----------------------------------------------------------------------===//
-
-#include "HexagonCallingConvLower.h"
-#include "Hexagon.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-using namespace llvm;
-
-Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
- const TargetMachine &tm,
- SmallVectorImpl<CCValAssign> &locs,
- LLVMContext &c)
- : CallingConv(CC), IsVarArg(isVarArg), TM(tm), Locs(locs), Context(c) {
- // No stack is used.
- StackOffset = 0;
-
- UsedRegs.resize(
- (TM.getSubtargetImpl()->getRegisterInfo()->getNumRegs() + 31) / 32);
-}
-
-// HandleByVal - Allocate a stack slot large enough to pass an argument by
-// value. The size and alignment information of the argument is encoded in its
-// parameter attribute.
-void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT,
- EVT LocVT, CCValAssign::LocInfo LocInfo,
- int MinSize, int MinAlign,
- ISD::ArgFlagsTy ArgFlags) {
- unsigned Align = ArgFlags.getByValAlign();
- unsigned Size = ArgFlags.getByValSize();
- if (MinSize > (int)Size)
- Size = MinSize;
- if (MinAlign > (int)Align)
- Align = MinAlign;
- unsigned Offset = AllocateStack(Size, Align);
-
- addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset,
- LocVT.getSimpleVT(), LocInfo));
-}
-
-/// MarkAllocated - Mark a register and all of its aliases as allocated.
-void Hexagon_CCState::MarkAllocated(unsigned Reg) {
- const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo();
- for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
- UsedRegs[*AI/32] |= 1 << (*AI&31);
-}
-
-/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
-/// incorporating info about the formals into this state.
-void
-Hexagon_CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg>
- &Ins,
- Hexagon_CCAssignFn Fn,
- unsigned SretValueInRegs) {
- unsigned NumArgs = Ins.size();
- unsigned i = 0;
-
- // If the function returns a small struct in registers, skip
- // over the first (dummy) argument.
- if (SretValueInRegs != 0) {
- ++i;
- }
-
-
- for (; i != NumArgs; ++i) {
- EVT ArgVT = Ins[i].VT;
- ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
- if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, 0, 0, false)) {
- dbgs() << "Formal argument #" << i << " has unhandled type "
- << ArgVT.getEVTString() << "\n";
- abort();
- }
- }
-}
-
-/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
-/// incorporating info about the result values into this state.
-void
-Hexagon_CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
- Hexagon_CCAssignFn Fn,
- unsigned SretValueInRegs) {
-
- // For Hexagon, Return small structures in registers.
- if (SretValueInRegs != 0) {
- if (SretValueInRegs <= 32) {
- unsigned Reg = Hexagon::R0;
- addLoc(CCValAssign::getReg(0, MVT::i32, Reg, MVT::i32,
- CCValAssign::Full));
- return;
- }
- if (SretValueInRegs <= 64) {
- unsigned Reg = Hexagon::D0;
- addLoc(CCValAssign::getReg(0, MVT::i64, Reg, MVT::i64,
- CCValAssign::Full));
- return;
- }
- }
-
-
- // Determine which register each value should be copied into.
- for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- EVT VT = Outs[i].VT;
- ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
- if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this, -1, -1, false)){
- dbgs() << "Return operand #" << i << " has unhandled type "
- << VT.getEVTString() << "\n";
- abort();
- }
- }
-}
-
-
-/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
-/// about the passed values into this state.
-void
-Hexagon_CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg>
- &Outs,
- Hexagon_CCAssignFn Fn,
- int NonVarArgsParams,
- unsigned SretValueSize) {
- unsigned NumOps = Outs.size();
-
- unsigned i = 0;
- // If the called function returns a small struct in registers, skip
- // the first actual parameter. We do not want to pass a pointer to
- // the stack location.
- if (SretValueSize != 0) {
- ++i;
- }
-
- for (; i != NumOps; ++i) {
- EVT ArgVT = Outs[i].VT;
- ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
- if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this,
- NonVarArgsParams, i+1, false)) {
- dbgs() << "Call operand #" << i << " has unhandled type "
- << ArgVT.getEVTString() << "\n";
- abort();
- }
- }
-}
-
-/// AnalyzeCallOperands - Same as above except it takes vectors of types
-/// and argument flags.
-void
-Hexagon_CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
- SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
- Hexagon_CCAssignFn Fn) {
- unsigned NumOps = ArgVTs.size();
- for (unsigned i = 0; i != NumOps; ++i) {
- EVT ArgVT = ArgVTs[i];
- ISD::ArgFlagsTy ArgFlags = Flags[i];
- if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, -1, -1,
- false)) {
- dbgs() << "Call operand #" << i << " has unhandled type "
- << ArgVT.getEVTString() << "\n";
- abort();
- }
- }
-}
-
-/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
-/// incorporating info about the passed values into this state.
-void
-Hexagon_CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
- Hexagon_CCAssignFn Fn,
- unsigned SretValueInRegs) {
-
- for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- EVT VT = Ins[i].VT;
- ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this, -1, -1, false)) {
- dbgs() << "Call result #" << i << " has unhandled type "
- << VT.getEVTString() << "\n";
- abort();
- }
- }
-}
-
-/// AnalyzeCallResult - Same as above except it's specialized for calls which
-/// produce a single value.
-void Hexagon_CCState::AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn) {
- if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this, -1, -1,
- false)) {
- dbgs() << "Call result has unhandled type "
- << VT.getEVTString() << "\n";
- abort();
- }
-}
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h
deleted file mode 100644
index 738ed1a52a09..000000000000
--- a/lib/Target/Hexagon/HexagonCallingConvLower.h
+++ /dev/null
@@ -1,187 +0,0 @@
-//===-- HexagonCallingConvLower.h - Calling Conventions ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Hexagon_CCState class, used for lowering
-// and implementing calling conventions. Adapted from the target independent
-// version but this handles calls to varargs functions
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONCALLINGCONVLOWER_H
-#define LLVM_LIB_TARGET_HEXAGON_HEXAGONCALLINGCONVLOWER_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-
-//
-// Need to handle varargs.
-//
-namespace llvm {
- class TargetRegisterInfo;
- class TargetMachine;
- class Hexagon_CCState;
- class SDNode;
- struct EVT;
-
-/// Hexagon_CCAssignFn - This function assigns a location for Val, updating
-/// State to reflect the change.
-typedef bool Hexagon_CCAssignFn(unsigned ValNo, EVT ValVT,
- EVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, Hexagon_CCState &State,
- int NonVarArgsParams,
- int CurrentParam,
- bool ForceMem);
-
-
-/// CCState - This class holds information needed while lowering arguments and
-/// return values. It captures which registers are already assigned and which
-/// stack slots are used. It provides accessors to allocate these values.
-class Hexagon_CCState {
- CallingConv::ID CallingConv;
- bool IsVarArg;
- const TargetMachine &TM;
- SmallVectorImpl<CCValAssign> &Locs;
- LLVMContext &Context;
-
- unsigned StackOffset;
- SmallVector<uint32_t, 16> UsedRegs;
-public:
- Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM,
- SmallVectorImpl<CCValAssign> &locs, LLVMContext &c);
-
- void addLoc(const CCValAssign &V) {
- Locs.push_back(V);
- }
-
- LLVMContext &getContext() const { return Context; }
- const TargetMachine &getTarget() const { return TM; }
- unsigned getCallingConv() const { return CallingConv; }
- bool isVarArg() const { return IsVarArg; }
-
- unsigned getNextStackOffset() const { return StackOffset; }
-
- /// isAllocated - Return true if the specified register (or an alias) is
- /// allocated.
- bool isAllocated(unsigned Reg) const {
- return UsedRegs[Reg/32] & (1 << (Reg&31));
- }
-
- /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
- /// incorporating info about the formals into this state.
- void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
- Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
-
- /// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
- /// incorporating info about the result values into this state.
- void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
- Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
-
- /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
- /// about the passed values into this state.
- void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
- Hexagon_CCAssignFn Fn, int NonVarArgsParams,
- unsigned SretValueSize);
-
- /// AnalyzeCallOperands - Same as above except it takes vectors of types
- /// and argument flags.
- void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
- SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
- Hexagon_CCAssignFn Fn);
-
- /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
- /// incorporating info about the passed values into this state.
- void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
- Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
-
- /// AnalyzeCallResult - Same as above except it's specialized for calls which
- /// produce a single value.
- void AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn);
-
- /// getFirstUnallocated - Return the first unallocated register in the set, or
- /// NumRegs if they are all allocated.
- unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const {
- for (unsigned i = 0; i != NumRegs; ++i)
- if (!isAllocated(Regs[i]))
- return i;
- return NumRegs;
- }
-
- /// AllocateReg - Attempt to allocate one register. If it is not available,
- /// return zero. Otherwise, return the register, marking it and any aliases
- /// as allocated.
- unsigned AllocateReg(unsigned Reg) {
- if (isAllocated(Reg)) return 0;
- MarkAllocated(Reg);
- return Reg;
- }
-
- /// Version of AllocateReg with extra register to be shadowed.
- unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) {
- if (isAllocated(Reg)) return 0;
- MarkAllocated(Reg);
- MarkAllocated(ShadowReg);
- return Reg;
- }
-
- /// AllocateReg - Attempt to allocate one of the specified registers. If none
- /// are available, return zero. Otherwise, return the first one available,
- /// marking it and any aliases as allocated.
- unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) {
- unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
- if (FirstUnalloc == NumRegs)
- return 0; // Didn't find the reg.
-
- // Mark the register and any aliases as allocated.
- unsigned Reg = Regs[FirstUnalloc];
- MarkAllocated(Reg);
- return Reg;
- }
-
- /// Version of AllocateReg with list of registers to be shadowed.
- unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs,
- unsigned NumRegs) {
- unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
- if (FirstUnalloc == NumRegs)
- return 0; // Didn't find the reg.
-
- // Mark the register and any aliases as allocated.
- unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc];
- MarkAllocated(Reg);
- MarkAllocated(ShadowReg);
- return Reg;
- }
-
- /// AllocateStack - Allocate a chunk of stack space with the specified size
- /// and alignment.
- unsigned AllocateStack(unsigned Size, unsigned Align) {
- assert(Align && ((Align-1) & Align) == 0); // Align is power of 2.
- StackOffset = ((StackOffset + Align-1) & ~(Align-1));
- unsigned Result = StackOffset;
- StackOffset += Size;
- return Result;
- }
-
- // HandleByVal - Allocate a stack slot large enough to pass an argument by
- // value. The size and alignment information of the argument is encoded in its
- // parameter attribute.
- void HandleByVal(unsigned ValNo, EVT ValVT,
- EVT LocVT, CCValAssign::LocInfo LocInfo,
- int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
-
-private:
- /// MarkAllocated - Mark a register and all of its aliases as allocated.
- void MarkAllocated(unsigned Reg);
-};
-
-
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 1883ad8f2e51..1d6455c66fa5 100644
--- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -116,38 +116,34 @@ static bool isCombinableInstType(MachineInstr *MI,
switch(MI->getOpcode()) {
case Hexagon::A2_tfr: {
// A COPY instruction can be combined if its arguments are IntRegs (32bit).
- assert(MI->getOperand(0).isReg() && MI->getOperand(1).isReg());
+ const MachineOperand &Op0 = MI->getOperand(0);
+ const MachineOperand &Op1 = MI->getOperand(1);
+ assert(Op0.isReg() && Op1.isReg());
- unsigned DestReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DestReg = Op0.getReg();
+ unsigned SrcReg = Op1.getReg();
return Hexagon::IntRegsRegClass.contains(DestReg) &&
- Hexagon::IntRegsRegClass.contains(SrcReg);
+ Hexagon::IntRegsRegClass.contains(SrcReg);
}
case Hexagon::A2_tfrsi: {
// A transfer-immediate can be combined if its argument is a signed 8bit
// value.
- assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
- unsigned DestReg = MI->getOperand(0).getReg();
-
- // Only combine constant extended TFRI if we are in aggressive mode.
- return Hexagon::IntRegsRegClass.contains(DestReg) &&
- (ShouldCombineAggressively || isInt<8>(MI->getOperand(1).getImm()));
- }
-
- case Hexagon::TFRI_V4: {
- if (!ShouldCombineAggressively)
- return false;
- assert(MI->getOperand(0).isReg() && MI->getOperand(1).isGlobal());
+ const MachineOperand &Op0 = MI->getOperand(0);
+ const MachineOperand &Op1 = MI->getOperand(1);
+ assert(Op0.isReg());
+ unsigned DestReg = Op0.getReg();
// Ensure that TargetFlags are MO_NO_FLAG for a global. This is a
// workaround for an ABI bug that prevents GOT relocations on combine
// instructions
- if (MI->getOperand(1).getTargetFlags() != HexagonII::MO_NO_FLAG)
+ if (!Op1.isImm() && Op1.getTargetFlags() != HexagonII::MO_NO_FLAG)
return false;
- unsigned DestReg = MI->getOperand(0).getReg();
- return Hexagon::IntRegsRegClass.contains(DestReg);
+ // Only combine constant extended A2_tfrsi if we are in aggressive mode.
+ bool NotExt = Op1.isImm() && isInt<8>(Op1.getImm());
+ return Hexagon::IntRegsRegClass.contains(DestReg) &&
+ (ShouldCombineAggressively || NotExt);
}
default:
@@ -157,13 +153,14 @@ static bool isCombinableInstType(MachineInstr *MI,
return false;
}
-static bool isGreaterThan8BitTFRI(MachineInstr *I) {
- return I->getOpcode() == Hexagon::A2_tfrsi &&
- !isInt<8>(I->getOperand(1).getImm());
-}
-static bool isGreaterThan6BitTFRI(MachineInstr *I) {
- return I->getOpcode() == Hexagon::A2_tfrsi &&
- !isUInt<6>(I->getOperand(1).getImm());
+template <unsigned N>
+static bool isGreaterThanNBitTFRI(const MachineInstr *I) {
+ if (I->getOpcode() == Hexagon::TFRI64_V4 ||
+ I->getOpcode() == Hexagon::A2_tfrsi) {
+ const MachineOperand &Op = I->getOperand(1);
+ return !Op.isImm() || !isInt<N>(Op.getImm());
+ }
+ return false;
}
/// areCombinableOperations - Returns true if the two instruction can be merge
@@ -171,31 +168,17 @@ static bool isGreaterThan6BitTFRI(MachineInstr *I) {
static bool areCombinableOperations(const TargetRegisterInfo *TRI,
MachineInstr *HighRegInst,
MachineInstr *LowRegInst) {
- assert((HighRegInst->getOpcode() == Hexagon::A2_tfr ||
- HighRegInst->getOpcode() == Hexagon::A2_tfrsi ||
- HighRegInst->getOpcode() == Hexagon::TFRI_V4) &&
- (LowRegInst->getOpcode() == Hexagon::A2_tfr ||
- LowRegInst->getOpcode() == Hexagon::A2_tfrsi ||
- LowRegInst->getOpcode() == Hexagon::TFRI_V4) &&
+ unsigned HiOpc = HighRegInst->getOpcode();
+ unsigned LoOpc = LowRegInst->getOpcode();
+ (void)HiOpc; // Fix compiler warning
+ (void)LoOpc; // Fix compiler warning
+ assert((HiOpc == Hexagon::A2_tfr || HiOpc == Hexagon::A2_tfrsi) &&
+ (LoOpc == Hexagon::A2_tfr || LoOpc == Hexagon::A2_tfrsi) &&
"Assume individual instructions are of a combinable type");
- const HexagonRegisterInfo *QRI =
- static_cast<const HexagonRegisterInfo *>(TRI);
-
- // V4 added some combine variations (mixed immediate and register source
- // operands), if we are on < V4 we can only combine 2 register-to-register
- // moves and 2 immediate-to-register moves. We also don't have
- // constant-extenders.
- if (!QRI->Subtarget.hasV4TOps())
- return HighRegInst->getOpcode() == LowRegInst->getOpcode() &&
- !isGreaterThan8BitTFRI(HighRegInst) &&
- !isGreaterThan6BitTFRI(LowRegInst);
-
// There is no combine of two constant extended values.
- if ((HighRegInst->getOpcode() == Hexagon::TFRI_V4 ||
- isGreaterThan8BitTFRI(HighRegInst)) &&
- (LowRegInst->getOpcode() == Hexagon::TFRI_V4 ||
- isGreaterThan6BitTFRI(LowRegInst)))
+ if (isGreaterThanNBitTFRI<8>(HighRegInst) &&
+ isGreaterThanNBitTFRI<6>(LowRegInst))
return false;
return true;
@@ -222,10 +205,14 @@ static bool isUnsafeToMoveAcross(MachineInstr *I, unsigned UseReg,
unsigned DestReg,
const TargetRegisterInfo *TRI) {
return (UseReg && (I->modifiesRegister(UseReg, TRI))) ||
- I->modifiesRegister(DestReg, TRI) ||
- I->readsRegister(DestReg, TRI) ||
- I->hasUnmodeledSideEffects() ||
- I->isInlineAsm() || I->isDebugValue();
+ I->modifiesRegister(DestReg, TRI) ||
+ I->readsRegister(DestReg, TRI) ||
+ I->hasUnmodeledSideEffects() ||
+ I->isInlineAsm() || I->isDebugValue();
+}
+
+static unsigned UseReg(const MachineOperand& MO) {
+ return MO.isReg() ? MO.getReg() : 0;
}
/// isSafeToMoveTogether - Returns true if it is safe to move I1 next to I2 such
@@ -235,9 +222,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1,
unsigned I1DestReg,
unsigned I2DestReg,
bool &DoInsertAtI1) {
-
- bool IsImmUseReg = I2->getOperand(1).isImm() || I2->getOperand(1).isGlobal();
- unsigned I2UseReg = IsImmUseReg ? 0 : I2->getOperand(1).getReg();
+ unsigned I2UseReg = UseReg(I2->getOperand(1));
// It is not safe to move I1 and I2 into one combine if I2 has a true
// dependence on I1.
@@ -301,8 +286,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1,
// At O3 we got better results (dhrystone) by being more conservative here.
if (!ShouldCombineAggressively)
End = std::next(MachineBasicBlock::iterator(I2));
- IsImmUseReg = I1->getOperand(1).isImm() || I1->getOperand(1).isGlobal();
- unsigned I1UseReg = IsImmUseReg ? 0 : I1->getOperand(1).getReg();
+ unsigned I1UseReg = UseReg(I1->getOperand(1));
// Track killed operands. If we move across an instruction that kills our
// operand, we need to update the kill information on the moved I1. It kills
// the operand now.
@@ -418,7 +402,7 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
// Get target info.
TRI = MF.getSubtarget().getRegisterInfo();
- TII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
// Combine aggressively (for code size)
ShouldCombineAggressively =
@@ -561,7 +545,7 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt,
DebugLoc DL = InsertPt->getDebugLoc();
MachineBasicBlock *BB = InsertPt->getParent();
- // Handle globals.
+ // Handle globals.
if (HiOperand.isGlobal()) {
BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
.addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(),
@@ -577,17 +561,64 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt,
return;
}
- // Handle constant extended immediates.
- if (!isInt<8>(HiOperand.getImm())) {
- assert(isInt<8>(LoOperand.getImm()));
+ // Handle block addresses.
+ if (HiOperand.isBlockAddress()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+ .addBlockAddress(HiOperand.getBlockAddress(), HiOperand.getOffset(),
+ HiOperand.getTargetFlags())
+ .addImm(LoOperand.getImm());
+ return;
+ }
+ if (LoOperand.isBlockAddress()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
+ .addImm(HiOperand.getImm())
+ .addBlockAddress(LoOperand.getBlockAddress(), LoOperand.getOffset(),
+ LoOperand.getTargetFlags());
+ return;
+ }
+
+ // Handle jump tables.
+ if (HiOperand.isJTI()) {
BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+ .addJumpTableIndex(HiOperand.getIndex(), HiOperand.getTargetFlags())
+ .addImm(LoOperand.getImm());
+ return;
+ }
+ if (LoOperand.isJTI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
.addImm(HiOperand.getImm())
+ .addJumpTableIndex(LoOperand.getIndex(), LoOperand.getTargetFlags());
+ return;
+ }
+
+ // Handle constant pools.
+ if (HiOperand.isCPI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+ .addConstantPoolIndex(HiOperand.getIndex(), HiOperand.getOffset(),
+ HiOperand.getTargetFlags())
.addImm(LoOperand.getImm());
return;
}
+ if (LoOperand.isCPI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
+ .addImm(HiOperand.getImm())
+ .addConstantPoolIndex(LoOperand.getIndex(), LoOperand.getOffset(),
+ LoOperand.getTargetFlags());
+ return;
+ }
+
+ // First preference should be given to Hexagon::A2_combineii instruction
+ // as it can include U6 (in Hexagon::A4_combineii) as well.
+ // In this instruction, HiOperand is const extended, if required.
+ if (isInt<8>(LoOperand.getImm())) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+ .addImm(HiOperand.getImm())
+ .addImm(LoOperand.getImm());
+ return;
+ }
- if (!isUInt<6>(LoOperand.getImm())) {
- assert(isInt<8>(HiOperand.getImm()));
+ // In this instruction, LoOperand is const extended, if required.
+ if (isInt<8>(HiOperand.getImm())) {
BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
.addImm(HiOperand.getImm())
.addImm(LoOperand.getImm());
@@ -611,7 +642,7 @@ void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt,
DebugLoc DL = InsertPt->getDebugLoc();
MachineBasicBlock *BB = InsertPt->getParent();
- // Handle global.
+ // Handle globals.
if (HiOperand.isGlobal()) {
BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
.addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(),
@@ -619,6 +650,29 @@ void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt,
.addReg(LoReg, LoRegKillFlag);
return;
}
+ // Handle block addresses.
+ if (HiOperand.isBlockAddress()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+ .addBlockAddress(HiOperand.getBlockAddress(), HiOperand.getOffset(),
+ HiOperand.getTargetFlags())
+ .addReg(LoReg, LoRegKillFlag);
+ return;
+ }
+ // Handle jump tables.
+ if (HiOperand.isJTI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+ .addJumpTableIndex(HiOperand.getIndex(), HiOperand.getTargetFlags())
+ .addReg(LoReg, LoRegKillFlag);
+ return;
+ }
+ // Handle constant pools.
+ if (HiOperand.isCPI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+ .addConstantPoolIndex(HiOperand.getIndex(), HiOperand.getOffset(),
+ HiOperand.getTargetFlags())
+ .addReg(LoReg, LoRegKillFlag);
+ return;
+ }
// Insert new combine instruction.
// DoubleRegDest = combine #HiImm, LoReg
BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
@@ -644,6 +698,29 @@ void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt,
LoOperand.getTargetFlags());
return;
}
+ // Handle block addresses.
+ if (LoOperand.isBlockAddress()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+ .addReg(HiReg, HiRegKillFlag)
+ .addBlockAddress(LoOperand.getBlockAddress(), LoOperand.getOffset(),
+ LoOperand.getTargetFlags());
+ return;
+ }
+ // Handle jump tables.
+ if (LoOperand.isJTI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+ .addReg(HiOperand.getReg(), HiRegKillFlag)
+ .addJumpTableIndex(LoOperand.getIndex(), LoOperand.getTargetFlags());
+ return;
+ }
+ // Handle constant pools.
+ if (LoOperand.isCPI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+ .addReg(HiOperand.getReg(), HiRegKillFlag)
+ .addConstantPoolIndex(LoOperand.getIndex(), LoOperand.getOffset(),
+ LoOperand.getTargetFlags());
+ return;
+ }
// Insert new combine instruction.
// DoubleRegDest = combine HiReg, #LoImm
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
new file mode 100644
index 000000000000..37ed173a79cd
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -0,0 +1,1348 @@
+// Replace mux instructions with the corresponding legal instructions.
+// It is meant to work post-SSA, but still on virtual registers. It was
+// originally placed between register coalescing and machine instruction
+// scheduler.
+// In this place in the optimization sequence, live interval analysis had
+// been performed, and the live intervals should be preserved. A large part
+// of the code deals with preserving the liveness information.
+//
+// Liveness tracking aside, the main functionality of this pass is divided
+// into two steps. The first step is to replace an instruction
+// vreg0 = C2_mux vreg0, vreg1, vreg2
+// with a pair of conditional transfers
+// vreg0 = A2_tfrt vreg0, vreg1
+// vreg0 = A2_tfrf vreg0, vreg2
+// It is the intention that the execution of this pass could be terminated
+// after this step, and the code generated would be functionally correct.
+//
+// If the uses of the source values vreg1 and vreg2 are kills, and their
+// definitions are predicable, then in the second step, the conditional
+// transfers will then be rewritten as predicated instructions. E.g.
+// vreg0 = A2_or vreg1, vreg2
+// vreg3 = A2_tfrt vreg99, vreg0<kill>
+// will be rewritten as
+// vreg3 = A2_port vreg99, vreg1, vreg2
+//
+// This replacement has two variants: "up" and "down". Consider this case:
+// vreg0 = A2_or vreg1, vreg2
+// ... [intervening instructions] ...
+// vreg3 = A2_tfrt vreg99, vreg0<kill>
+// variant "up":
+// vreg3 = A2_port vreg99, vreg1, vreg2
+// ... [intervening instructions, vreg0->vreg3] ...
+// [deleted]
+// variant "down":
+// [deleted]
+// ... [intervening instructions] ...
+// vreg3 = A2_port vreg99, vreg1, vreg2
+//
+// Both, one or none of these variants may be valid, and checks are made
+// to rule out inapplicable variants.
+//
+// As an additional optimization, before either of the two steps above is
+// executed, the pass attempts to coalesce the target register with one of
+// the source registers, e.g. given an instruction
+// vreg3 = C2_mux vreg0, vreg1, vreg2
+// vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds,
+// the instruction would then be (for example)
+// vreg3 = C2_mux vreg0, vreg3, vreg2
+// and, under certain circumstances, this could result in only one predicated
+// instruction:
+// vreg3 = A2_tfrf vreg0, vreg2
+//
+
+#define DEBUG_TYPE "expand-condsets"
+#include "HexagonTargetMachine.h"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit",
+ cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions"));
+static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit",
+ cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings"));
+
+namespace llvm {
+ void initializeHexagonExpandCondsetsPass(PassRegistry&);
+ FunctionPass *createHexagonExpandCondsets();
+}
+
+namespace {
+ class HexagonExpandCondsets : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonExpandCondsets() :
+ MachineFunctionPass(ID), HII(0), TRI(0), MRI(0),
+ LIS(0), CoaLimitActive(false),
+ TfrLimitActive(false), CoaCounter(0), TfrCounter(0) {
+ if (OptCoaLimit.getPosition())
+ CoaLimitActive = true, CoaLimit = OptCoaLimit;
+ if (OptTfrLimit.getPosition())
+ TfrLimitActive = true, TfrLimit = OptTfrLimit;
+ initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual const char *getPassName() const {
+ return "Hexagon Expand Condsets";
+ }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ const HexagonInstrInfo *HII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ LiveIntervals *LIS;
+
+ bool CoaLimitActive, TfrLimitActive;
+ unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter;
+
+ struct RegisterRef {
+ RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()),
+ Sub(Op.getSubReg()) {}
+ RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {}
+ bool operator== (RegisterRef RR) const {
+ return Reg == RR.Reg && Sub == RR.Sub;
+ }
+ bool operator!= (RegisterRef RR) const { return !operator==(RR); }
+ unsigned Reg, Sub;
+ };
+
+ typedef DenseMap<unsigned,unsigned> ReferenceMap;
+ enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) };
+ enum { Exec_Then = 0x10, Exec_Else = 0x20 };
+ unsigned getMaskForSub(unsigned Sub);
+ bool isCondset(const MachineInstr *MI);
+
+ void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec);
+ bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec);
+
+ LiveInterval::iterator nextSegment(LiveInterval &LI, SlotIndex S);
+ LiveInterval::iterator prevSegment(LiveInterval &LI, SlotIndex S);
+ void makeDefined(unsigned Reg, SlotIndex S, bool SetDef);
+ void makeUndead(unsigned Reg, SlotIndex S);
+ void shrinkToUses(unsigned Reg, LiveInterval &LI);
+ void updateKillFlags(unsigned Reg, LiveInterval &LI);
+ void terminateSegment(LiveInterval::iterator LT, SlotIndex S,
+ LiveInterval &LI);
+ void addInstrToLiveness(MachineInstr *MI);
+ void removeInstrFromLiveness(MachineInstr *MI);
+
+ unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond);
+ MachineInstr *genTfrFor(MachineOperand &SrcOp, unsigned DstR,
+ unsigned DstSR, const MachineOperand &PredOp, bool Cond);
+ bool split(MachineInstr *MI);
+ bool splitInBlock(MachineBasicBlock &B);
+
+ bool isPredicable(MachineInstr *MI);
+ MachineInstr *getReachingDefForPred(RegisterRef RD,
+ MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond);
+ bool canMoveOver(MachineInstr *MI, ReferenceMap &Defs, ReferenceMap &Uses);
+ bool canMoveMemTo(MachineInstr *MI, MachineInstr *ToI, bool IsDown);
+ void predicateAt(RegisterRef RD, MachineInstr *MI,
+ MachineBasicBlock::iterator Where, unsigned PredR, bool Cond);
+ void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR,
+ bool Cond, MachineBasicBlock::iterator First,
+ MachineBasicBlock::iterator Last);
+ bool predicate(MachineInstr *TfrI, bool Cond);
+ bool predicateInBlock(MachineBasicBlock &B);
+
+ void postprocessUndefImplicitUses(MachineBasicBlock &B);
+ void removeImplicitUses(MachineInstr *MI);
+ void removeImplicitUses(MachineBasicBlock &B);
+
+ bool isIntReg(RegisterRef RR, unsigned &BW);
+ bool isIntraBlocks(LiveInterval &LI);
+ bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
+ bool coalesceSegments(MachineFunction &MF);
+ };
+}
+
+char HexagonExpandCondsets::ID = 0;
+
+
+unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) {
+ switch (Sub) {
+ case Hexagon::subreg_loreg:
+ return Sub_Low;
+ case Hexagon::subreg_hireg:
+ return Sub_High;
+ case Hexagon::NoSubRegister:
+ return Sub_None;
+ }
+ llvm_unreachable("Invalid subregister");
+}
+
+
+bool HexagonExpandCondsets::isCondset(const MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::C2_mux:
+ case Hexagon::C2_muxii:
+ case Hexagon::C2_muxir:
+ case Hexagon::C2_muxri:
+ case Hexagon::MUX64_rr:
+ return true;
+ break;
+ }
+ return false;
+}
+
+
+void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map,
+ unsigned Exec) {
+ unsigned Mask = getMaskForSub(RR.Sub) | Exec;
+ ReferenceMap::iterator F = Map.find(RR.Reg);
+ if (F == Map.end())
+ Map.insert(std::make_pair(RR.Reg, Mask));
+ else
+ F->second |= Mask;
+}
+
+
+bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map,
+ unsigned Exec) {
+ ReferenceMap::iterator F = Map.find(RR.Reg);
+ if (F == Map.end())
+ return false;
+ unsigned Mask = getMaskForSub(RR.Sub) | Exec;
+ if (Mask & F->second)
+ return true;
+ return false;
+}
+
+
+LiveInterval::iterator HexagonExpandCondsets::nextSegment(LiveInterval &LI,
+ SlotIndex S) {
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ if (I->start >= S)
+ return I;
+ }
+ return LI.end();
+}
+
+
+LiveInterval::iterator HexagonExpandCondsets::prevSegment(LiveInterval &LI,
+ SlotIndex S) {
+ LiveInterval::iterator P = LI.end();
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ if (I->end > S)
+ return P;
+ P = I;
+ }
+ return P;
+}
+
+
+/// Find the implicit use of register Reg in slot index S, and make sure
+/// that the "defined" flag is set to SetDef. While the mux expansion is
+/// going on, predicated instructions will have implicit uses of the
+/// registers that are being defined. This is to keep any preceding
+/// definitions live. If there is no preceding definition, the implicit
+/// use will be marked as "undef", otherwise it will be "defined". This
+/// function is used to update the flag.
+void HexagonExpandCondsets::makeDefined(unsigned Reg, SlotIndex S,
+ bool SetDef) {
+ if (!S.isRegister())
+ return;
+ MachineInstr *MI = LIS->getInstructionFromIndex(S);
+ assert(MI && "Expecting instruction");
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+ continue;
+ bool IsDef = !Op.isUndef();
+ if (Op.isImplicit() && IsDef != SetDef)
+ Op.setIsUndef(!SetDef);
+ }
+}
+
+
+void HexagonExpandCondsets::makeUndead(unsigned Reg, SlotIndex S) {
+ // If S is a block boundary, then there can still be a dead def reaching
+ // this point. Instead of traversing the CFG, queue start points of all
+ // live segments that begin with a register, and end at a block boundary.
+ // This may "resurrect" some truly dead definitions, but doing so is
+ // harmless.
+ SmallVector<MachineInstr*,8> Defs;
+ if (S.isBlock()) {
+ LiveInterval &LI = LIS->getInterval(Reg);
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ if (!I->start.isRegister() || !I->end.isBlock())
+ continue;
+ MachineInstr *MI = LIS->getInstructionFromIndex(I->start);
+ Defs.push_back(MI);
+ }
+ } else if (S.isRegister()) {
+ MachineInstr *MI = LIS->getInstructionFromIndex(S);
+ Defs.push_back(MI);
+ }
+
+ for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+ MachineInstr *MI = Defs[i];
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+ continue;
+ Op.setIsDead(false);
+ }
+ }
+}
+
+
+/// Shrink the segments in the live interval for a given register to the last
+/// use before each subsequent def. Unlike LiveIntervals::shrinkToUses, this
+/// function will not mark any definitions of Reg as dead. The reason for this
+/// is that this function is used while a MUX instruction is being expanded,
+/// or while a conditional copy is undergoing predication. During these
+/// processes, there may be defs present in the instruction sequence that have
+/// not yet been removed, or there may be missing uses that have not yet been
+/// added. We want to utilize LiveIntervals::shrinkToUses as much as possible,
+/// but since it does not extend any intervals that are too short, we need to
+/// pre-emptively extend them here in anticipation of further changes.
+void HexagonExpandCondsets::shrinkToUses(unsigned Reg, LiveInterval &LI) {
+ SmallVector<MachineInstr*,4> Deads;
+ LIS->shrinkToUses(&LI, &Deads);
+ // Need to undo the deadification made by "shrinkToUses". It's easier to
+ // do it here, since we have a list of all instructions that were just
+ // marked as dead.
+ for (unsigned i = 0, n = Deads.size(); i < n; ++i) {
+ MachineInstr *MI = Deads[i];
+ // Clear the "dead" flag.
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+ continue;
+ Op.setIsDead(false);
+ }
+ // Extend the live segment to the beginning of the next one.
+ LiveInterval::iterator End = LI.end();
+ SlotIndex S = LIS->getInstructionIndex(MI).getRegSlot();
+ LiveInterval::iterator T = LI.FindSegmentContaining(S);
+ assert(T != End);
+ LiveInterval::iterator N = std::next(T);
+ if (N != End)
+ T->end = N->start;
+ else
+ T->end = LIS->getMBBEndIdx(MI->getParent());
+ }
+ updateKillFlags(Reg, LI);
+}
+
+
+/// Given an updated live interval LI for register Reg, update the kill flags
+/// in instructions using Reg to reflect the liveness changes.
+void HexagonExpandCondsets::updateKillFlags(unsigned Reg, LiveInterval &LI) {
+ MRI->clearKillFlags(Reg);
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ SlotIndex EX = I->end;
+ if (!EX.isRegister())
+ continue;
+ MachineInstr *MI = LIS->getInstructionFromIndex(EX);
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+ continue;
+ // Only set the kill flag on the first encountered use of Reg in this
+ // instruction.
+ Op.setIsKill(true);
+ break;
+ }
+ }
+}
+
+
+/// When adding a new instruction to liveness, the newly added definition
+/// will start a new live segment. This may happen at a position that falls
+/// within an existing live segment. In such case that live segment needs to
+/// be truncated to make room for the new segment. Ultimately, the truncation
+/// will occur at the last use, but for now the segment can be terminated
+/// right at the place where the new segment will start. The segments will be
+/// shrunk-to-uses later.
+void HexagonExpandCondsets::terminateSegment(LiveInterval::iterator LT,
+ SlotIndex S, LiveInterval &LI) {
+ // Terminate the live segment pointed to by LT within a live interval LI.
+ if (LT == LI.end())
+ return;
+
+ VNInfo *OldVN = LT->valno;
+ SlotIndex EX = LT->end;
+ LT->end = S;
+ // If LT does not end at a block boundary, the termination is done.
+ if (!EX.isBlock())
+ return;
+
+ // If LT ended at a block boundary, it's possible that its value number
+ // is picked up at the beginning other blocks. Create a new value number
+ // and change such blocks to use it instead.
+ VNInfo *NewVN = 0;
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ if (!I->start.isBlock() || I->valno != OldVN)
+ continue;
+ // Generate on-demand a new value number that is defined by the
+ // block beginning (i.e. -phi).
+ if (!NewVN)
+ NewVN = LI.getNextValue(I->start, LIS->getVNInfoAllocator());
+ I->valno = NewVN;
+ }
+}
+
+
+/// Add the specified instruction to live intervals. This function is used
+/// to update the live intervals while the program code is being changed.
+/// Neither the expansion of a MUX, nor the predication are atomic, and this
+/// function is used to update the live intervals while these transformations
+/// are being done.
+void HexagonExpandCondsets::addInstrToLiveness(MachineInstr *MI) {
+ SlotIndex MX = LIS->isNotInMIMap(MI) ? LIS->InsertMachineInstrInMaps(MI)
+ : LIS->getInstructionIndex(MI);
+ DEBUG(dbgs() << "adding liveness info for instr\n " << MX << " " << *MI);
+
+ MX = MX.getRegSlot();
+ bool Predicated = HII->isPredicated(MI);
+ MachineBasicBlock *MB = MI->getParent();
+
+ // Strip all implicit uses from predicated instructions. They will be
+ // added again, according to the updated information.
+ if (Predicated)
+ removeImplicitUses(MI);
+
+ // For each def in MI we need to insert a new live segment starting at MX
+ // into the interval. If there already exists a live segment in the interval
+ // that contains MX, we need to terminate it at MX.
+ SmallVector<RegisterRef,2> Defs;
+ for (auto &Op : MI->operands())
+ if (Op.isReg() && Op.isDef())
+ Defs.push_back(RegisterRef(Op));
+
+ for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+ unsigned DefR = Defs[i].Reg;
+ LiveInterval &LID = LIS->getInterval(DefR);
+ DEBUG(dbgs() << "adding def " << PrintReg(DefR, TRI)
+ << " with interval\n " << LID << "\n");
+ // If MX falls inside of an existing live segment, terminate it.
+ LiveInterval::iterator LT = LID.FindSegmentContaining(MX);
+ if (LT != LID.end())
+ terminateSegment(LT, MX, LID);
+ DEBUG(dbgs() << "after terminating segment\n " << LID << "\n");
+
+ // Create a new segment starting from MX.
+ LiveInterval::iterator P = prevSegment(LID, MX), N = nextSegment(LID, MX);
+ SlotIndex EX;
+ VNInfo *VN = LID.getNextValue(MX, LIS->getVNInfoAllocator());
+ if (N == LID.end()) {
+ // There is no live segment after MX. End this segment at the end of
+ // the block.
+ EX = LIS->getMBBEndIdx(MB);
+ } else {
+ // If the next segment starts at the block boundary, end the new segment
+ // at the boundary of the preceding block (i.e. the previous index).
+ // Otherwise, end the segment at the beginning of the next segment. In
+ // either case it will be "shrunk-to-uses" later.
+ EX = N->start.isBlock() ? N->start.getPrevIndex() : N->start;
+ }
+ if (Predicated) {
+ // Predicated instruction will have an implicit use of the defined
+ // register. This is necessary so that this definition will not make
+ // any previous definitions dead. If there are no previous live
+ // segments, still add the implicit use, but make it "undef".
+ // Because of the implicit use, the preceding definition is not
+ // dead. Mark is as such (if necessary).
+ MachineOperand ImpUse = MachineOperand::CreateReg(DefR, false, true);
+ ImpUse.setSubReg(Defs[i].Sub);
+ bool Undef = false;
+ if (P == LID.end())
+ Undef = true;
+ else {
+ // If the previous segment extends to the end of the previous block,
+ // the end index may actually be the beginning of this block. If
+ // the previous segment ends at a block boundary, move it back by one,
+ // to get the proper block for it.
+ SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end;
+ MachineBasicBlock *PB = LIS->getMBBFromIndex(PE);
+ if (PB != MB && !LIS->isLiveInToMBB(LID, MB))
+ Undef = true;
+ }
+ if (!Undef) {
+ makeUndead(DefR, P->valno->def);
+ // We are adding a live use, so extend the previous segment to
+ // include it.
+ P->end = MX;
+ } else {
+ ImpUse.setIsUndef(true);
+ }
+
+ if (!MI->readsRegister(DefR))
+ MI->addOperand(ImpUse);
+ if (N != LID.end())
+ makeDefined(DefR, N->start, true);
+ }
+ LiveRange::Segment NR = LiveRange::Segment(MX, EX, VN);
+ LID.addSegment(NR);
+ DEBUG(dbgs() << "added a new segment " << NR << "\n " << LID << "\n");
+ shrinkToUses(DefR, LID);
+ DEBUG(dbgs() << "updated imp-uses: " << *MI);
+ LID.verify();
+ }
+
+ // For each use in MI:
+ // - If there is no live segment that contains MX for the used register,
+ // extend the previous one. Ignore implicit uses.
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse() || Op.isImplicit() || Op.isUndef())
+ continue;
+ unsigned UseR = Op.getReg();
+ LiveInterval &LIU = LIS->getInterval(UseR);
+ // Find the last segment P that starts before MX.
+ LiveInterval::iterator P = LIU.FindSegmentContaining(MX);
+ if (P == LIU.end())
+ P = prevSegment(LIU, MX);
+
+ assert(P != LIU.end() && "MI uses undefined register?");
+ SlotIndex EX = P->end;
+ // If P contains MX, there is not much to do.
+ if (EX > MX) {
+ Op.setIsKill(false);
+ continue;
+ }
+ // Otherwise, extend P to "next(MX)".
+ P->end = MX.getNextIndex();
+ Op.setIsKill(true);
+ // Get the old "kill" instruction, and remove the kill flag.
+ if (MachineInstr *KI = LIS->getInstructionFromIndex(MX))
+ KI->clearRegisterKills(UseR, nullptr);
+ shrinkToUses(UseR, LIU);
+ LIU.verify();
+ }
+}
+
+
+/// Update the live interval information to reflect the removal of the given
+/// instruction from the program. As with "addInstrToLiveness", this function
+/// is called while the program code is being changed.
+void HexagonExpandCondsets::removeInstrFromLiveness(MachineInstr *MI) {
+ SlotIndex MX = LIS->getInstructionIndex(MI).getRegSlot();
+ DEBUG(dbgs() << "removing instr\n " << MX << " " << *MI);
+
+ // For each def in MI:
+ // If MI starts a live segment, merge this segment with the previous segment.
+ //
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ unsigned DefR = Op.getReg();
+ LiveInterval &LID = LIS->getInterval(DefR);
+ LiveInterval::iterator LT = LID.FindSegmentContaining(MX);
+ assert(LT != LID.end() && "Expecting live segments");
+ DEBUG(dbgs() << "removing def at " << MX << " of " << PrintReg(DefR, TRI)
+ << " with interval\n " << LID << "\n");
+ if (LT->start != MX)
+ continue;
+
+ VNInfo *MVN = LT->valno;
+ if (LT != LID.begin()) {
+ // If the current live segment is not the first, the task is easy. If
+ // the previous segment continues into the current block, extend it to
+ // the end of the current one, and merge the value numbers.
+ // Otherwise, remove the current segment, and make the end of it "undef".
+ LiveInterval::iterator P = std::prev(LT);
+ SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end;
+ MachineBasicBlock *MB = MI->getParent();
+ MachineBasicBlock *PB = LIS->getMBBFromIndex(PE);
+ if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) {
+ makeDefined(DefR, LT->end, false);
+ LID.removeSegment(*LT);
+ } else {
+ // Make the segments adjacent, so that merge-vn can also merge the
+ // segments.
+ P->end = LT->start;
+ makeUndead(DefR, P->valno->def);
+ LID.MergeValueNumberInto(MVN, P->valno);
+ }
+ } else {
+ LiveInterval::iterator N = std::next(LT);
+ LiveInterval::iterator RmB = LT, RmE = N;
+ while (N != LID.end()) {
+ // Iterate until the first register-based definition is found
+ // (i.e. skip all block-boundary entries).
+ LiveInterval::iterator Next = std::next(N);
+ if (N->start.isRegister()) {
+ makeDefined(DefR, N->start, false);
+ break;
+ }
+ if (N->end.isRegister()) {
+ makeDefined(DefR, N->end, false);
+ RmE = Next;
+ break;
+ }
+ RmE = Next;
+ N = Next;
+ }
+ // Erase the segments in one shot to avoid invalidating iterators.
+ LID.segments.erase(RmB, RmE);
+ }
+
+ bool VNUsed = false;
+ for (LiveInterval::iterator I = LID.begin(), E = LID.end(); I != E; ++I) {
+ if (I->valno != MVN)
+ continue;
+ VNUsed = true;
+ break;
+ }
+ if (!VNUsed)
+ MVN->markUnused();
+
+ DEBUG(dbgs() << "new interval: ");
+ if (!LID.empty()) {
+ DEBUG(dbgs() << LID << "\n");
+ LID.verify();
+ } else {
+ DEBUG(dbgs() << "<empty>\n");
+ LIS->removeInterval(DefR);
+ }
+ }
+
+ // For uses there is nothing to do. The intervals will be updated via
+ // shrinkToUses.
+ SmallVector<unsigned,4> Uses;
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ Uses.push_back(R);
+ }
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ for (unsigned i = 0, n = Uses.size(); i < n; ++i) {
+ LiveInterval &LI = LIS->getInterval(Uses[i]);
+ shrinkToUses(Uses[i], LI);
+ }
+}
+
+
+/// Get the opcode for a conditional transfer of the value in SO (source
+/// operand). The condition (true/false) is given in Cond.
+unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
+ bool Cond) {
+ using namespace Hexagon;
+ if (SO.isReg()) {
+ unsigned PhysR;
+ RegisterRef RS = SO;
+ if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) {
+ const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg);
+ assert(VC->begin() != VC->end() && "Empty register class");
+ PhysR = *VC->begin();
+ } else {
+ assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg));
+ PhysR = RS.Reg;
+ }
+ unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS);
+ switch (RC->getSize()) {
+ case 4:
+ return Cond ? A2_tfrt : A2_tfrf;
+ case 8:
+ return Cond ? A2_tfrpt : A2_tfrpf;
+ }
+ llvm_unreachable("Invalid register operand");
+ }
+ if (SO.isImm() || SO.isFPImm())
+ return Cond ? C2_cmoveit : C2_cmoveif;
+ llvm_unreachable("Unexpected source operand");
+}
+
+
+/// Generate a conditional transfer, copying the value SrcOp to the
+/// destination register DstR:DstSR, and using the predicate register from
+/// PredOp. The Cond argument specifies whether the predicate is to be
+/// if(PredOp), or if(!PredOp).
+MachineInstr *HexagonExpandCondsets::genTfrFor(MachineOperand &SrcOp,
+ unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, bool Cond) {
+ MachineInstr *MI = SrcOp.getParent();
+ MachineBasicBlock &B = *MI->getParent();
+ MachineBasicBlock::iterator At = MI;
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Don't avoid identity copies here (i.e. if the source and the destination
+ // are the same registers). It is actually better to generate them here,
+ // since this would cause the copy to potentially be predicated in the next
+ // step. The predication will remove such a copy if it is unable to
+ /// predicate.
+
+ unsigned Opc = getCondTfrOpcode(SrcOp, Cond);
+ MachineInstr *TfrI = BuildMI(B, At, DL, HII->get(Opc))
+ .addReg(DstR, RegState::Define, DstSR)
+ .addOperand(PredOp)
+ .addOperand(SrcOp);
+ // We don't want any kills yet.
+ TfrI->clearKillInfo();
+ DEBUG(dbgs() << "created an initial copy: " << *TfrI);
+ return TfrI;
+}
+
+
+/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function
+/// performs all necessary changes to complete the replacement.
+bool HexagonExpandCondsets::split(MachineInstr *MI) {
+ if (TfrLimitActive) {
+ if (TfrCounter >= TfrLimit)
+ return false;
+ TfrCounter++;
+ }
+ DEBUG(dbgs() << "\nsplitting BB#" << MI->getParent()->getNumber()
+ << ": " << *MI);
+ MachineOperand &MD = MI->getOperand(0); // Definition
+ MachineOperand &MP = MI->getOperand(1); // Predicate register
+ assert(MD.isDef());
+ unsigned DR = MD.getReg(), DSR = MD.getSubReg();
+
+ // First, create the two invididual conditional transfers, and add each
+ // of them to the live intervals information. Do that first and then remove
+ // the old instruction from live intervals.
+ if (MachineInstr *TfrT = genTfrFor(MI->getOperand(2), DR, DSR, MP, true))
+ addInstrToLiveness(TfrT);
+ if (MachineInstr *TfrF = genTfrFor(MI->getOperand(3), DR, DSR, MP, false))
+ addInstrToLiveness(TfrF);
+ removeInstrFromLiveness(MI);
+
+ return true;
+}
+
+
+/// Split all MUX instructions in the given block into pairs of contitional
+/// transfers.
+bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B) {
+ bool Changed = false;
+ MachineBasicBlock::iterator I, E, NextI;
+ for (I = B.begin(), E = B.end(); I != E; I = NextI) {
+ NextI = std::next(I);
+ if (isCondset(I))
+ Changed |= split(I);
+ }
+ return Changed;
+}
+
+
+bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) {
+ if (HII->isPredicated(MI) || !HII->isPredicable(MI))
+ return false;
+ if (MI->hasUnmodeledSideEffects() || MI->mayStore())
+ return false;
+ // Reject instructions with multiple defs (e.g. post-increment loads).
+ bool HasDef = false;
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ if (HasDef)
+ return false;
+ HasDef = true;
+ }
+ for (auto &Mo : MI->memoperands())
+ if (Mo->isVolatile())
+ return false;
+ return true;
+}
+
+
+/// Find the reaching definition for a predicated use of RD. The RD is used
+/// under the conditions given by PredR and Cond, and this function will ignore
+/// definitions that set RD under the opposite conditions.
+MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD,
+ MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) {
+ MachineBasicBlock &B = *UseIt->getParent();
+ MachineBasicBlock::iterator I = UseIt, S = B.begin();
+ if (I == S)
+ return 0;
+
+ bool PredValid = true;
+ do {
+ --I;
+ MachineInstr *MI = &*I;
+ // Check if this instruction can be ignored, i.e. if it is predicated
+ // on the complementary condition.
+ if (PredValid && HII->isPredicated(MI)) {
+ if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(MI)))
+ continue;
+ }
+
+ // Check the defs. If the PredR is defined, invalidate it. If RD is
+ // defined, return the instruction or 0, depending on the circumstances.
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ RegisterRef RR = Op;
+ if (RR.Reg == PredR) {
+ PredValid = false;
+ continue;
+ }
+ if (RR.Reg != RD.Reg)
+ continue;
+ // If the "Reg" part agrees, there is still the subregister to check.
+ // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but
+ // not vreg1 (w/o subregisters).
+ if (RR.Sub == RD.Sub)
+ return MI;
+ if (RR.Sub == 0 || RD.Sub == 0)
+ return 0;
+ // We have different subregisters, so we can continue looking.
+ }
+ } while (I != S);
+
+ return 0;
+}
+
+
+/// Check if the instruction MI can be safely moved over a set of instructions
+/// whose side-effects (in terms of register defs and uses) are expressed in
+/// the maps Defs and Uses. These maps reflect the conditional defs and uses
+/// that depend on the same predicate register to allow moving instructions
+/// over instructions predicated on the opposite condition.
+bool HexagonExpandCondsets::canMoveOver(MachineInstr *MI, ReferenceMap &Defs,
+ ReferenceMap &Uses) {
+ // In order to be able to safely move MI over instructions that define
+ // "Defs" and use "Uses", no def operand from MI can be defined or used
+ // and no use operand can be defined.
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg())
+ continue;
+ RegisterRef RR = Op;
+ // For physical register we would need to check register aliases, etc.
+ // and we don't want to bother with that. It would be of little value
+ // before the actual register rewriting (from virtual to physical).
+ if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return false;
+ // No redefs for any operand.
+ if (isRefInMap(RR, Defs, Exec_Then))
+ return false;
+ // For defs, there cannot be uses.
+ if (Op.isDef() && isRefInMap(RR, Uses, Exec_Then))
+ return false;
+ }
+ return true;
+}
+
+
+/// Check if the instruction accessing memory (TheI) can be moved to the
+/// location ToI.
+bool HexagonExpandCondsets::canMoveMemTo(MachineInstr *TheI, MachineInstr *ToI,
+ bool IsDown) {
+ bool IsLoad = TheI->mayLoad(), IsStore = TheI->mayStore();
+ if (!IsLoad && !IsStore)
+ return true;
+ if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI))
+ return true;
+ if (TheI->hasUnmodeledSideEffects())
+ return false;
+
+ MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI;
+ MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI;
+ bool Ordered = TheI->hasOrderedMemoryRef();
+
+ // Search for aliased memory reference in (StartI, EndI).
+ for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; ++I) {
+ MachineInstr *MI = &*I;
+ if (MI->hasUnmodeledSideEffects())
+ return false;
+ bool L = MI->mayLoad(), S = MI->mayStore();
+ if (!L && !S)
+ continue;
+ if (Ordered && MI->hasOrderedMemoryRef())
+ return false;
+
+ bool Conflict = (L && IsStore) || S;
+ if (Conflict)
+ return false;
+ }
+ return true;
+}
+
+
+/// Generate a predicated version of MI (where the condition is given via
+/// PredR and Cond) at the point indicated by Where.
+void HexagonExpandCondsets::predicateAt(RegisterRef RD, MachineInstr *MI,
+ MachineBasicBlock::iterator Where, unsigned PredR, bool Cond) {
+ // The problem with updating live intervals is that we can move one def
+ // past another def. In particular, this can happen when moving an A2_tfrt
+ // over an A2_tfrf defining the same register. From the point of view of
+ // live intervals, these two instructions are two separate definitions,
+ // and each one starts another live segment. LiveIntervals's "handleMove"
+ // does not allow such moves, so we need to handle it ourselves. To avoid
+ // invalidating liveness data while we are using it, the move will be
+ // implemented in 4 steps: (1) add a clone of the instruction MI at the
+ // target location, (2) update liveness, (3) delete the old instruction,
+ // and (4) update liveness again.
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = Where->getDebugLoc(); // "Where" points to an instruction.
+ unsigned Opc = MI->getOpcode();
+ unsigned PredOpc = HII->getCondOpcode(Opc, !Cond);
+ MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc));
+ unsigned Ox = 0, NP = MI->getNumOperands();
+ // Skip all defs from MI first.
+ while (Ox < NP) {
+ MachineOperand &MO = MI->getOperand(Ox);
+ if (!MO.isReg() || !MO.isDef())
+ break;
+ Ox++;
+ }
+ // Add the new def, then the predicate register, then the rest of the
+ // operands.
+ MB.addReg(RD.Reg, RegState::Define, RD.Sub);
+ MB.addReg(PredR);
+ while (Ox < NP) {
+ MachineOperand &MO = MI->getOperand(Ox);
+ if (!MO.isReg() || !MO.isImplicit())
+ MB.addOperand(MO);
+ Ox++;
+ }
+
+ MachineFunction &MF = *B.getParent();
+ MachineInstr::mmo_iterator I = MI->memoperands_begin();
+ unsigned NR = std::distance(I, MI->memoperands_end());
+ MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR);
+ for (unsigned i = 0; i < NR; ++i)
+ MemRefs[i] = *I++;
+ MB.setMemRefs(MemRefs, MemRefs+NR);
+
+ MachineInstr *NewI = MB;
+ NewI->clearKillInfo();
+ addInstrToLiveness(NewI);
+}
+
+
+/// In the range [First, Last], rename all references to the "old" register RO
+/// to the "new" register RN, but only in instructions predicated on the given
+/// condition.
+void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN,
+ unsigned PredR, bool Cond, MachineBasicBlock::iterator First,
+ MachineBasicBlock::iterator Last) {
+ MachineBasicBlock::iterator End = std::next(Last);
+ for (MachineBasicBlock::iterator I = First; I != End; ++I) {
+ MachineInstr *MI = &*I;
+ // Do not touch instructions that are not predicated, or are predicated
+ // on the opposite condition.
+ if (!HII->isPredicated(MI))
+ continue;
+ if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(MI)))
+ continue;
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || RO != RegisterRef(Op))
+ continue;
+ Op.setReg(RN.Reg);
+ Op.setSubReg(RN.Sub);
+ // In practice, this isn't supposed to see any defs.
+ assert(!Op.isDef() && "Not expecting a def");
+ }
+ }
+}
+
+
+/// For a given conditional copy, predicate the definition of the source of
+/// the copy under the given condition (using the same predicate register as
+/// the copy).
+bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) {
+ // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi).
+ unsigned Opc = TfrI->getOpcode();
+ (void)Opc;
+ assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf);
+ DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false")
+ << ": " << *TfrI);
+
+ MachineOperand &MD = TfrI->getOperand(0);
+ MachineOperand &MP = TfrI->getOperand(1);
+ MachineOperand &MS = TfrI->getOperand(2);
+ // The source operand should be a <kill>. This is not strictly necessary,
+ // but it makes things a lot simpler. Otherwise, we would need to rename
+ // some registers, which would complicate the transformation considerably.
+ if (!MS.isKill())
+ return false;
+
+ RegisterRef RT(MS);
+ unsigned PredR = MP.getReg();
+ MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond);
+ if (!DefI || !isPredicable(DefI))
+ return false;
+
+ DEBUG(dbgs() << "Source def: " << *DefI);
+
+ // Collect the information about registers defined and used between the
+ // DefI and the TfrI.
+ // Map: reg -> bitmask of subregs
+ ReferenceMap Uses, Defs;
+ MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI;
+
+ // Check if the predicate register is valid between DefI and TfrI.
+ // If it is, we can then ignore instructions predicated on the negated
+ // conditions when collecting def and use information.
+ bool PredValid = true;
+ for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
+ if (!I->modifiesRegister(PredR, 0))
+ continue;
+ PredValid = false;
+ break;
+ }
+
+ for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
+ MachineInstr *MI = &*I;
+ // If this instruction is predicated on the same register, it could
+ // potentially be ignored.
+ // By default assume that the instruction executes on the same condition
+ // as TfrI (Exec_Then), and also on the opposite one (Exec_Else).
+ unsigned Exec = Exec_Then | Exec_Else;
+ if (PredValid && HII->isPredicated(MI) && MI->readsRegister(PredR))
+ Exec = (Cond == HII->isPredicatedTrue(MI)) ? Exec_Then : Exec_Else;
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg())
+ continue;
+ // We don't want to deal with physical registers. The reason is that
+ // they can be aliased with other physical registers. Aliased virtual
+ // registers must share the same register number, and can only differ
+ // in the subregisters, which we are keeping track of. Physical
+ // registers ters no longer have subregisters---their super- and
+ // subregisters are other physical registers, and we are not checking
+ // that.
+ RegisterRef RR = Op;
+ if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return false;
+
+ ReferenceMap &Map = Op.isDef() ? Defs : Uses;
+ addRefToMap(RR, Map, Exec);
+ }
+ }
+
+ // The situation:
+ // RT = DefI
+ // ...
+ // RD = TfrI ..., RT
+
+ // If the register-in-the-middle (RT) is used or redefined between
+ // DefI and TfrI, we may not be able proceed with this transformation.
+ // We can ignore a def that will not execute together with TfrI, and a
+ // use that will. If there is such a use (that does execute together with
+ // TfrI), we will not be able to move DefI down. If there is a use that
+ // executed if TfrI's condition is false, then RT must be available
+ // unconditionally (cannot be predicated).
+ // Essentially, we need to be able to rename RT to RD in this segment.
+ if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else))
+ return false;
+ RegisterRef RD = MD;
+ // If the predicate register is defined between DefI and TfrI, the only
+ // potential thing to do would be to move the DefI down to TfrI, and then
+ // predicate. The reaching def (DefI) must be movable down to the location
+ // of the TfrI.
+ // If the target register of the TfrI (RD) is not used or defined between
+ // DefI and TfrI, consider moving TfrI up to DefI.
+ bool CanUp = canMoveOver(TfrI, Defs, Uses);
+ bool CanDown = canMoveOver(DefI, Defs, Uses);
+ // The TfrI does not access memory, but DefI could. Check if it's safe
+ // to move DefI down to TfrI.
+ if (DefI->mayLoad() || DefI->mayStore())
+ if (!canMoveMemTo(DefI, TfrI, true))
+ CanDown = false;
+
+ DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no")
+ << ", can move down: " << (CanDown ? "yes\n" : "no\n"));
+ MachineBasicBlock::iterator PastDefIt = std::next(DefIt);
+ if (CanUp)
+ predicateAt(RD, DefI, PastDefIt, PredR, Cond);
+ else if (CanDown)
+ predicateAt(RD, DefI, TfrIt, PredR, Cond);
+ else
+ return false;
+
+ if (RT != RD)
+ renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt);
+
+ // Delete the user of RT first (it should work either way, but this order
+ // of deleting is more natural).
+ removeInstrFromLiveness(TfrI);
+ removeInstrFromLiveness(DefI);
+ return true;
+}
+
+
+/// Predicate all cases of conditional copies in the specified block.
+bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B) {
+ bool Changed = false;
+ MachineBasicBlock::iterator I, E, NextI;
+ for (I = B.begin(), E = B.end(); I != E; I = NextI) {
+ NextI = std::next(I);
+ unsigned Opc = I->getOpcode();
+ if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) {
+ bool Done = predicate(I, (Opc == Hexagon::A2_tfrt));
+ if (!Done) {
+ // If we didn't predicate I, we may need to remove it in case it is
+ // an "identity" copy, e.g. vreg1 = A2_tfrt vreg2, vreg1.
+ if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2)))
+ removeInstrFromLiveness(I);
+ }
+ Changed |= Done;
+ }
+ }
+ return Changed;
+}
+
+
+void HexagonExpandCondsets::removeImplicitUses(MachineInstr *MI) {
+ for (unsigned i = MI->getNumOperands(); i > 0; --i) {
+ MachineOperand &MO = MI->getOperand(i-1);
+ if (MO.isReg() && MO.isUse() && MO.isImplicit())
+ MI->RemoveOperand(i-1);
+ }
+}
+
+
+void HexagonExpandCondsets::removeImplicitUses(MachineBasicBlock &B) {
+ for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ if (HII->isPredicated(MI))
+ removeImplicitUses(MI);
+ }
+}
+
+
+void HexagonExpandCondsets::postprocessUndefImplicitUses(MachineBasicBlock &B) {
+ // Implicit uses that are "undef" are only meaningful (outside of the
+ // internals of this pass) when the instruction defines a subregister,
+ // and the implicit-undef use applies to the defined register. In such
+ // cases, the proper way to record the information in the IR is to mark
+ // the definition as "undef", which will be interpreted as "read-undef".
+ typedef SmallSet<unsigned,2> RegisterSet;
+ for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ RegisterSet Undefs;
+ for (unsigned i = MI->getNumOperands(); i > 0; --i) {
+ MachineOperand &MO = MI->getOperand(i-1);
+ if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.isUndef()) {
+ MI->RemoveOperand(i-1);
+ Undefs.insert(MO.getReg());
+ }
+ }
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef() || !Op.getSubReg())
+ continue;
+ if (Undefs.count(Op.getReg()))
+ Op.setIsUndef(true);
+ }
+ }
+}
+
+
+bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
+ if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return false;
+ const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg);
+ if (RC == &Hexagon::IntRegsRegClass) {
+ BW = 32;
+ return true;
+ }
+ if (RC == &Hexagon::DoubleRegsRegClass) {
+ BW = (RR.Sub != 0) ? 32 : 64;
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) {
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ LiveRange::Segment &LR = *I;
+ // Range must start at a register...
+ if (!LR.start.isRegister())
+ return false;
+ // ...and end in a register or in a dead slot.
+ if (!LR.end.isRegister() && !LR.end.isDead())
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
+ if (CoaLimitActive) {
+ if (CoaCounter >= CoaLimit)
+ return false;
+ CoaCounter++;
+ }
+ unsigned BW1, BW2;
+ if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2)
+ return false;
+ if (MRI->isLiveIn(R1.Reg))
+ return false;
+ if (MRI->isLiveIn(R2.Reg))
+ return false;
+
+ LiveInterval &L1 = LIS->getInterval(R1.Reg);
+ LiveInterval &L2 = LIS->getInterval(R2.Reg);
+ bool Overlap = L1.overlaps(L2);
+
+ DEBUG(dbgs() << "compatible registers: ("
+ << (Overlap ? "overlap" : "disjoint") << ")\n "
+ << PrintReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n "
+ << PrintReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n");
+ if (R1.Sub || R2.Sub)
+ return false;
+ if (Overlap)
+ return false;
+
+ // Coalescing could have a negative impact on scheduling, so try to limit
+ // to some reasonable extent. Only consider coalescing segments, when one
+ // of them does not cross basic block boundaries.
+ if (!isIntraBlocks(L1) && !isIntraBlocks(L2))
+ return false;
+
+ MRI->replaceRegWith(R2.Reg, R1.Reg);
+
+ // Move all live segments from L2 to L1.
+ typedef DenseMap<VNInfo*,VNInfo*> ValueInfoMap;
+ ValueInfoMap VM;
+ for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) {
+ VNInfo *NewVN, *OldVN = I->valno;
+ ValueInfoMap::iterator F = VM.find(OldVN);
+ if (F == VM.end()) {
+ NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator());
+ VM.insert(std::make_pair(OldVN, NewVN));
+ } else {
+ NewVN = F->second;
+ }
+ L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN));
+ }
+ while (L2.begin() != L2.end())
+ L2.removeSegment(*L2.begin());
+
+ updateKillFlags(R1.Reg, L1);
+ DEBUG(dbgs() << "coalesced: " << L1 << "\n");
+ L1.verify();
+
+ return true;
+}
+
+
+/// Attempt to coalesce one of the source registers to a MUX intruction with
+/// the destination register. This could lead to having only one predicated
+/// instruction in the end instead of two.
+bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) {
+ SmallVector<MachineInstr*,16> Condsets;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock &B = *I;
+ for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) {
+ MachineInstr *MI = &*J;
+ if (!isCondset(MI))
+ continue;
+ MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3);
+ if (!S1.isReg() && !S2.isReg())
+ continue;
+ Condsets.push_back(MI);
+ }
+ }
+
+ bool Changed = false;
+ for (unsigned i = 0, n = Condsets.size(); i < n; ++i) {
+ MachineInstr *CI = Condsets[i];
+ RegisterRef RD = CI->getOperand(0);
+ RegisterRef RP = CI->getOperand(1);
+ MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3);
+ bool Done = false;
+ // Consider this case:
+ // vreg1 = instr1 ...
+ // vreg2 = instr2 ...
+ // vreg0 = C2_mux ..., vreg1, vreg2
+ // If vreg0 was coalesced with vreg1, we could end up with the following
+ // code:
+ // vreg0 = instr1 ...
+ // vreg2 = instr2 ...
+ // vreg0 = A2_tfrf ..., vreg2
+ // which will later become:
+ // vreg0 = instr1 ...
+ // vreg0 = instr2_cNotPt ...
+ // i.e. there will be an unconditional definition (instr1) of vreg0
+ // followed by a conditional one. The output dependency was there before
+ // and it unavoidable, but if instr1 is predicable, we will no longer be
+ // able to predicate it here.
+ // To avoid this scenario, don't coalesce the destination register with
+ // a source register that is defined by a predicable instruction.
+ if (S1.isReg()) {
+ RegisterRef RS = S1;
+ MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true);
+ if (!RDef || !HII->isPredicable(RDef))
+ Done = coalesceRegisters(RD, RegisterRef(S1));
+ }
+ if (!Done && S2.isReg()) {
+ RegisterRef RS = S2;
+ MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false);
+ if (!RDef || !HII->isPredicable(RDef))
+ Done = coalesceRegisters(RD, RegisterRef(S2));
+ }
+ Changed |= Done;
+ }
+ return Changed;
+}
+
+
+bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
+ HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo());
+ TRI = MF.getSubtarget().getRegisterInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ MRI = &MF.getRegInfo();
+
+ bool Changed = false;
+
+ // Try to coalesce the target of a mux with one of its sources.
+ // This could eliminate a register copy in some circumstances.
+ Changed |= coalesceSegments(MF);
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ // First, simply split all muxes into a pair of conditional transfers
+ // and update the live intervals to reflect the new arrangement.
+ // This is done mainly to make the live interval update simpler, than it
+ // would be while trying to predicate instructions at the same time.
+ Changed |= splitInBlock(*I);
+ // Traverse all blocks and collapse predicable instructions feeding
+ // conditional transfers into predicated instructions.
+ // Walk over all the instructions again, so we may catch pre-existing
+ // cases that were not created in the previous step.
+ Changed |= predicateInBlock(*I);
+ }
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ postprocessUndefImplicitUses(*I);
+ return Changed;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "Hexagon Expand Condsets";
+ PassInfo *PI = new PassInfo(Name, "expand-condsets",
+ &HexagonExpandCondsets::ID, 0, false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonExpandCondsetsPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+
+FunctionPass *llvm::createHexagonExpandCondsets() {
+ return new HexagonExpandCondsets();
+}
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index a2a847c47a20..40059fb27371 100644
--- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -20,7 +20,6 @@
#include "Hexagon.h"
#include "HexagonMachineFunctionInfo.h"
#include "HexagonSubtarget.h"
-#include "HexagonTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -49,13 +48,9 @@ namespace llvm {
namespace {
class HexagonExpandPredSpillCode : public MachineFunctionPass {
- const HexagonTargetMachine& QTM;
- const HexagonSubtarget &QST;
-
public:
static char ID;
- HexagonExpandPredSpillCode(const HexagonTargetMachine& TM) :
- MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
+ HexagonExpandPredSpillCode() : MachineFunctionPass(ID) {
PassRegistry &Registry = *PassRegistry::getPassRegistry();
initializeHexagonExpandPredSpillCodePass(Registry);
}
@@ -72,7 +67,8 @@ char HexagonExpandPredSpillCode::ID = 0;
bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
- const HexagonInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo();
+ const HexagonSubtarget &QST = Fn.getSubtarget<HexagonSubtarget>();
+ const HexagonInstrInfo *TII = QST.getInstrInfo();
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
@@ -83,20 +79,177 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
++MII) {
MachineInstr *MI = MII;
int Opc = MI->getOpcode();
- if (Opc == Hexagon::STriw_pred) {
+ if (Opc == Hexagon::S2_storerb_pci_pseudo ||
+ Opc == Hexagon::S2_storerh_pci_pseudo ||
+ Opc == Hexagon::S2_storeri_pci_pseudo ||
+ Opc == Hexagon::S2_storerd_pci_pseudo ||
+ Opc == Hexagon::S2_storerf_pci_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::S2_storerd_pci_pseudo)
+ Opcode = Hexagon::S2_storerd_pci;
+ else if (Opc == Hexagon::S2_storeri_pci_pseudo)
+ Opcode = Hexagon::S2_storeri_pci;
+ else if (Opc == Hexagon::S2_storerh_pci_pseudo)
+ Opcode = Hexagon::S2_storerh_pci;
+ else if (Opc == Hexagon::S2_storerf_pci_pseudo)
+ Opcode = Hexagon::S2_storerf_pci;
+ else if (Opc == Hexagon::S2_storerb_pci_pseudo)
+ Opcode = Hexagon::S2_storerb_pci;
+ else
+ llvm_unreachable("wrong Opc");
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
+ MachineOperand &Op4 = MI->getOperand(4);
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op3);
+ // Replace the pseude circ_ldd by the real circ_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(Op4);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ NewMI->addOperand(Op2);
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::L2_loadrd_pci_pseudo ||
+ Opc == Hexagon::L2_loadri_pci_pseudo ||
+ Opc == Hexagon::L2_loadrh_pci_pseudo ||
+ Opc == Hexagon::L2_loadruh_pci_pseudo||
+ Opc == Hexagon::L2_loadrb_pci_pseudo ||
+ Opc == Hexagon::L2_loadrub_pci_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::L2_loadrd_pci_pseudo)
+ Opcode = Hexagon::L2_loadrd_pci;
+ else if (Opc == Hexagon::L2_loadri_pci_pseudo)
+ Opcode = Hexagon::L2_loadri_pci;
+ else if (Opc == Hexagon::L2_loadrh_pci_pseudo)
+ Opcode = Hexagon::L2_loadrh_pci;
+ else if (Opc == Hexagon::L2_loadruh_pci_pseudo)
+ Opcode = Hexagon::L2_loadruh_pci;
+ else if (Opc == Hexagon::L2_loadrb_pci_pseudo)
+ Opcode = Hexagon::L2_loadrb_pci;
+ else if (Opc == Hexagon::L2_loadrub_pci_pseudo)
+ Opcode = Hexagon::L2_loadrub_pci;
+ else
+ llvm_unreachable("wrong Opc");
+
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
+ MachineOperand &Op5 = MI->getOperand(5);
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op4);
+ // Replace the pseude circ_ldd by the real circ_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op2);
+ NewMI->addOperand(Op5);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::L2_loadrd_pbr_pseudo ||
+ Opc == Hexagon::L2_loadri_pbr_pseudo ||
+ Opc == Hexagon::L2_loadrh_pbr_pseudo ||
+ Opc == Hexagon::L2_loadruh_pbr_pseudo||
+ Opc == Hexagon::L2_loadrb_pbr_pseudo ||
+ Opc == Hexagon::L2_loadrub_pbr_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::L2_loadrd_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrd_pbr;
+ else if (Opc == Hexagon::L2_loadri_pbr_pseudo)
+ Opcode = Hexagon::L2_loadri_pbr;
+ else if (Opc == Hexagon::L2_loadrh_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrh_pbr;
+ else if (Opc == Hexagon::L2_loadruh_pbr_pseudo)
+ Opcode = Hexagon::L2_loadruh_pbr;
+ else if (Opc == Hexagon::L2_loadrb_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrb_pbr;
+ else if (Opc == Hexagon::L2_loadrub_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrub_pbr;
+ else
+ llvm_unreachable("wrong Opc");
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op4);
+ // Replace the pseudo brev_ldd by the real brev_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op2);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::S2_storerd_pbr_pseudo ||
+ Opc == Hexagon::S2_storeri_pbr_pseudo ||
+ Opc == Hexagon::S2_storerh_pbr_pseudo ||
+ Opc == Hexagon::S2_storerb_pbr_pseudo ||
+ Opc == Hexagon::S2_storerf_pbr_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::S2_storerd_pbr_pseudo)
+ Opcode = Hexagon::S2_storerd_pbr;
+ else if (Opc == Hexagon::S2_storeri_pbr_pseudo)
+ Opcode = Hexagon::S2_storeri_pbr;
+ else if (Opc == Hexagon::S2_storerh_pbr_pseudo)
+ Opcode = Hexagon::S2_storerh_pbr;
+ else if (Opc == Hexagon::S2_storerf_pbr_pseudo)
+ Opcode = Hexagon::S2_storerf_pbr;
+ else if (Opc == Hexagon::S2_storerb_pbr_pseudo)
+ Opcode = Hexagon::S2_storerb_pbr;
+ else
+ llvm_unreachable("wrong Opc");
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op3);
+ // Replace the pseudo brev_ldd by the real brev_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ NewMI->addOperand(Op2);
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::STriw_pred) {
// STriw_pred [R30], ofst, SrcReg;
unsigned FP = MI->getOperand(0).getReg();
- assert(
- FP ==
- QTM.getSubtargetImpl()->getRegisterInfo()->getFrameRegister() &&
- "Not a Frame Pointer, Nor a Spill Slot");
+ assert(FP == QST.getRegisterInfo()->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
assert(MI->getOperand(1).isImm() && "Not an offset");
int Offset = MI->getOperand(1).getImm();
int SrcReg = MI->getOperand(2).getReg();
assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
"Not a predicate register");
if (!TII->isValidOffset(Hexagon::S2_storeri_io, Offset)) {
- if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+ if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::CONST32_Int_Real),
HEXAGON_RESERVED_REG_1).addImm(Offset);
@@ -110,7 +263,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
.addReg(HEXAGON_RESERVED_REG_1)
.addImm(0).addReg(HEXAGON_RESERVED_REG_2);
} else {
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi),
HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr),
HEXAGON_RESERVED_REG_2).addReg(SrcReg);
@@ -135,14 +288,12 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
assert(Hexagon::PredRegsRegClass.contains(DstReg) &&
"Not a predicate register");
unsigned FP = MI->getOperand(1).getReg();
- assert(
- FP ==
- QTM.getSubtargetImpl()->getRegisterInfo()->getFrameRegister() &&
- "Not a Frame Pointer, Nor a Spill Slot");
+ assert(FP == QST.getRegisterInfo()->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
assert(MI->getOperand(2).isImm() && "Not an offset");
int Offset = MI->getOperand(2).getImm();
if (!TII->isValidOffset(Hexagon::L2_loadri_io, Offset)) {
- if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+ if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::CONST32_Int_Real),
HEXAGON_RESERVED_REG_1).addImm(Offset);
@@ -157,7 +308,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp),
DstReg).addReg(HEXAGON_RESERVED_REG_2);
} else {
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi),
HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io),
HEXAGON_RESERVED_REG_2)
@@ -200,6 +351,6 @@ void llvm::initializeHexagonExpandPredSpillCodePass(PassRegistry &Registry) {
}
FunctionPass*
-llvm::createHexagonExpandPredSpillCode(const HexagonTargetMachine &TM) {
- return new HexagonExpandPredSpillCode(TM);
+llvm::createHexagonExpandPredSpillCode() {
+ return new HexagonExpandPredSpillCode();
}
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index e8d8f1497bdb..3d786a92b9e5 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -6,9 +6,8 @@
// License. See LICENSE.TXT for details.
//
// The loop start address in the LOOPn instruction is encoded as a distance
-// from the LOOPn instruction itself. If the start address is too far from
-// the LOOPn instruction, the loop needs to be set up manually, i.e. via
-// direct transfers to SAn and LCn.
+// from the LOOPn instruction itself. If the start address is too far from
+// the LOOPn instruction, the instruction needs to use a constant extender.
// This pass will identify and convert such LOOPn instructions to a proper
// form.
//===----------------------------------------------------------------------===//
@@ -21,12 +20,15 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/PassSupport.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
+static cl::opt<unsigned> MaxLoopRange(
+ "hexagon-loop-range", cl::Hidden, cl::init(200),
+ cl::desc("Restrict range of loopN instructions (testing only)"));
+
namespace llvm {
void initializeHexagonFixupHwLoopsPass(PassRegistry&);
}
@@ -52,20 +54,15 @@ namespace {
}
private:
- /// \brief Maximum distance between the loop instr and the basic block.
- /// Just an estimate.
- static const unsigned MAX_LOOP_DISTANCE = 200;
-
/// \brief Check the offset between each loop instruction and
/// the loop basic block to determine if we can use the LOOP instruction
/// or if we need to set the LC/SA registers explicitly.
bool fixupLoopInstrs(MachineFunction &MF);
- /// \brief Add the instruction to set the LC and SA registers explicitly.
- void convertLoopInstr(MachineFunction &MF,
- MachineBasicBlock::iterator &MII,
- RegScavenger &RS);
-
+ /// \brief Replace loop instruction with the constant extended
+ /// version if the loop label is too far from the loop instruction.
+ void useExtLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII);
};
char HexagonFixupHwLoops::ID = 0;
@@ -78,20 +75,18 @@ FunctionPass *llvm::createHexagonFixupHwLoops() {
return new HexagonFixupHwLoops();
}
-
/// \brief Returns true if the instruction is a hardware loop instruction.
static bool isHardwareLoop(const MachineInstr *MI) {
return MI->getOpcode() == Hexagon::J2_loop0r ||
- MI->getOpcode() == Hexagon::J2_loop0i;
+ MI->getOpcode() == Hexagon::J2_loop0i ||
+ MI->getOpcode() == Hexagon::J2_loop1r ||
+ MI->getOpcode() == Hexagon::J2_loop1i;
}
-
bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
- bool Changed = fixupLoopInstrs(MF);
- return Changed;
+ return fixupLoopInstrs(MF);
}
-
/// \brief For Hexagon, if the loop label is to far from the
/// loop instruction then we need to set the LC0 and SA0 registers
/// explicitly instead of using LOOP(start,count). This function
@@ -105,41 +100,49 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
// Offset of the current instruction from the start.
unsigned InstOffset = 0;
// Map for each basic block to it's first instruction.
- DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
+ DenseMap<const MachineBasicBlock *, unsigned> BlockToInstOffset;
+
+ const HexagonInstrInfo *HII =
+ static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
// First pass - compute the offset of each basic block.
- for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
- MBB != MBBe; ++MBB) {
- BlockToInstOffset[MBB] = InstOffset;
- InstOffset += (MBB->size() * 4);
+ for (const MachineBasicBlock &MBB : MF) {
+ if (MBB.getAlignment()) {
+ // Although we don't know the exact layout of the final code, we need
+ // to account for alignment padding somehow. This heuristic pads each
+ // aligned basic block according to the alignment value.
+ int ByteAlign = (1u << MBB.getAlignment()) - 1;
+ InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign);
+ }
+
+ BlockToInstOffset[&MBB] = InstOffset;
+ for (const MachineInstr &MI : MBB)
+ InstOffset += HII->getSize(&MI);
}
- // Second pass - check each loop instruction to see if it needs to
- // be converted.
+ // Second pass - check each loop instruction to see if it needs to be
+ // converted.
InstOffset = 0;
bool Changed = false;
- RegScavenger RS;
-
- // Loop over all the basic blocks.
- for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
- MBB != MBBe; ++MBB) {
- InstOffset = BlockToInstOffset[MBB];
- RS.enterBasicBlock(MBB);
+ for (MachineBasicBlock &MBB : MF) {
+ InstOffset = BlockToInstOffset[&MBB];
// Loop over all the instructions.
- MachineBasicBlock::iterator MIE = MBB->end();
- MachineBasicBlock::iterator MII = MBB->begin();
+ MachineBasicBlock::iterator MII = MBB.begin();
+ MachineBasicBlock::iterator MIE = MBB.end();
while (MII != MIE) {
+ InstOffset += HII->getSize(&*MII);
+ if (MII->isDebugValue()) {
+ ++MII;
+ continue;
+ }
if (isHardwareLoop(MII)) {
- RS.forward(MII);
assert(MII->getOperand(0).isMBB() &&
"Expect a basic block as loop operand");
- int Sub = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
- unsigned Dist = Sub > 0 ? Sub : -Sub;
- if (Dist > MAX_LOOP_DISTANCE) {
- // Convert to explicity setting LC0 and SA0.
- convertLoopInstr(MF, MII, RS);
- MII = MBB->erase(MII);
+ int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
+ if ((unsigned)abs(diff) > MaxLoopRange) {
+ useExtLoopInstr(MF, MII);
+ MII = MBB.erase(MII);
Changed = true;
} else {
++MII;
@@ -147,39 +150,38 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
} else {
++MII;
}
- InstOffset += 4;
}
}
return Changed;
}
-
-/// \brief convert a loop instruction to a sequence of instructions that
-/// set the LC0 and SA0 register explicitly.
-void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
- MachineBasicBlock::iterator &MII,
- RegScavenger &RS) {
+/// \brief Replace loop instructions with the constant extended version.
+void HexagonFixupHwLoops::useExtLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII) {
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
MachineBasicBlock *MBB = MII->getParent();
DebugLoc DL = MII->getDebugLoc();
- unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0);
-
- // First, set the LC0 with the trip count.
- if (MII->getOperand(1).isReg()) {
- // Trip count is a register
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::LC0)
- .addReg(MII->getOperand(1).getReg());
- } else {
- // Trip count is an immediate.
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrsi), Scratch)
- .addImm(MII->getOperand(1).getImm());
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::LC0)
- .addReg(Scratch);
+ MachineInstrBuilder MIB;
+ unsigned newOp;
+ switch (MII->getOpcode()) {
+ case Hexagon::J2_loop0r:
+ newOp = Hexagon::J2_loop0rext;
+ break;
+ case Hexagon::J2_loop0i:
+ newOp = Hexagon::J2_loop0iext;
+ break;
+ case Hexagon::J2_loop1r:
+ newOp = Hexagon::J2_loop1rext;
+ break;
+ case Hexagon::J2_loop1i:
+ newOp = Hexagon::J2_loop1iext;
+ break;
+ default:
+ llvm_unreachable("Invalid Hardware Loop Instruction.");
}
- // Then, set the SA0 with the loop start address.
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
- .addMBB(MII->getOperand(0).getMBB());
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::SA0)
- .addReg(Scratch);
+ MIB = BuildMI(*MBB, MII, DL, TII->get(newOp));
+
+ for (unsigned i = 0; i < MII->getNumOperands(); ++i)
+ MIB.addOperand(MII->getOperand(i));
}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 9d1a527eddb3..0885a794a7b4 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -8,6 +8,8 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "hexagon-pei"
+
#include "HexagonFrameLowering.h"
#include "Hexagon.h"
#include "HexagonInstrInfo.h"
@@ -16,334 +18,1274 @@
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+// Hexagon stack frame layout as defined by the ABI:
+//
+// Incoming arguments
+// passed via stack
+// |
+// |
+// SP during function's FP during function's |
+// +-- runtime (top of stack) runtime (bottom) --+ |
+// | | |
+// --++---------------------+------------------+-----------------++-+-------
+// | parameter area for | variable-size | fixed-size |LR| arg
+// | called functions | local objects | local objects |FP|
+// --+----------------------+------------------+-----------------+--+-------
+// <- size known -> <- size unknown -> <- size known ->
+//
+// Low address High address
+//
+// <--- stack growth
+//
+//
+// - In any circumstances, the outgoing function arguments are always accessi-
+// ble using the SP, and the incoming arguments are accessible using the FP.
+// - If the local objects are not aligned, they can always be accessed using
+// the FP.
+// - If there are no variable-sized objects, the local objects can always be
+// accessed using the SP, regardless whether they are aligned or not. (The
+// alignment padding will be at the bottom of the stack (highest address),
+// and so the offset with respect to the SP will be known at the compile-
+// -time.)
+//
+// The only complication occurs if there are both, local aligned objects, and
+// dynamically allocated (variable-sized) objects. The alignment pad will be
+// placed between the FP and the local objects, thus preventing the use of the
+// FP to access the local objects. At the same time, the variable-sized objects
+// will be between the SP and the local objects, thus introducing an unknown
+// distance from the SP to the locals.
+//
+// To avoid this problem, a new register is created that holds the aligned
+// address of the bottom of the stack, referred in the sources as AP (aligned
+// pointer). The AP will be equal to "FP-p", where "p" is the smallest pad
+// that aligns AP to the required boundary (a maximum of the alignments of
+// all stack objects, fixed- and variable-sized). All local objects[1] will
+// then use AP as the base pointer.
+// [1] The exception is with "fixed" stack objects. "Fixed" stack objects get
+// their name from being allocated at fixed locations on the stack, relative
+// to the FP. In the presence of dynamic allocation and local alignment, such
+// objects can only be accessed through the FP.
+//
+// Illustration of the AP:
+// FP --+
+// |
+// ---------------+---------------------+-----+-----------------------++-+--
+// Rest of the | Local stack objects | Pad | Fixed stack objects |LR|
+// stack frame | (aligned) | | (CSR, spills, etc.) |FP|
+// ---------------+---------------------+-----+-----------------+-----+--+--
+// |<-- Multiple of the -->|
+// stack alignment +-- AP
+//
+// The AP is set up at the beginning of the function. Since it is not a dedi-
+// cated (reserved) register, it needs to be kept live throughout the function
+// to be available as the base register for local object accesses.
+// Normally, an address of a stack objects is obtained by a pseudo-instruction
+// TFR_FI. To access local objects with the AP register present, a different
+// pseudo-instruction needs to be used: TFR_FIA. The TFR_FIA takes one extra
+// argument compared to TFR_FI: the first input register is the AP register.
+// This keeps the register live between its definition and its uses.
+
+// The AP register is originally set up using pseudo-instruction ALIGNA:
+// AP = ALIGNA A
+// where
+// A - required stack alignment
+// The alignment value must be the maximum of all alignments required by
+// any stack object.
+
+// The dynamic allocation uses a pseudo-instruction ALLOCA:
+// Rd = ALLOCA Rs, A
+// where
+// Rd - address of the allocated space
+// Rs - minimum size (the actual allocated can be larger to accommodate
+// alignment)
+// A - required alignment
+
+
using namespace llvm;
-static cl::opt<bool> DisableDeallocRet(
- "disable-hexagon-dealloc-ret",
- cl::Hidden,
- cl::desc("Disable Dealloc Return for Hexagon target"));
+static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",
+ cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"));
-/// determineFrameLayout - Determine the size of the frame and maximum call
-/// frame size.
-void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- // Get the number of bytes to allocate from the FrameInfo.
- unsigned FrameSize = MFI->getStackSize();
+static cl::opt<int> NumberScavengerSlots("number-scavenger-slots",
+ cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2),
+ cl::ZeroOrMore);
- // Get the alignments provided by the target.
- unsigned TargetAlign = MF.getTarget()
- .getSubtargetImpl()
- ->getFrameLowering()
- ->getStackAlignment();
- // Get the maximum call frame size of all the calls.
- unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+static cl::opt<int> SpillFuncThreshold("spill-func-threshold",
+ cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"),
+ cl::init(6), cl::ZeroOrMore);
- // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
- // that allocations will be aligned.
- if (MFI->hasVarSizedObjects())
- maxCallFrameSize = RoundUpToAlignment(maxCallFrameSize, TargetAlign);
+static cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os",
+ cl::Hidden, cl::desc("Specify Os spill func threshold"),
+ cl::init(1), cl::ZeroOrMore);
- // Update maximum call frame size.
- MFI->setMaxCallFrameSize(maxCallFrameSize);
+static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame",
+ cl::init(true), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Enable stack frame shrink wrapping"));
- // Include call frame size in total.
- FrameSize += maxCallFrameSize;
+static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame "
+ "shrink-wraps"));
- // Make sure the frame is aligned.
- FrameSize = RoundUpToAlignment(FrameSize, TargetAlign);
+namespace {
+ /// Map a register pair Reg to the subregister that has the greater "number",
+ /// i.e. D3 (aka R7:6) will be mapped to R7, etc.
+ unsigned getMax32BitSubRegister(unsigned Reg, const TargetRegisterInfo &TRI,
+ bool hireg = true) {
+ if (Reg < Hexagon::D0 || Reg > Hexagon::D15)
+ return Reg;
- // Update frame info.
- MFI->setStackSize(FrameSize);
+ unsigned RegNo = 0;
+ for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); ++SubRegs) {
+ if (hireg) {
+ if (*SubRegs > RegNo)
+ RegNo = *SubRegs;
+ } else {
+ if (!RegNo || *SubRegs < RegNo)
+ RegNo = *SubRegs;
+ }
+ }
+ return RegNo;
+ }
+
+ /// Returns the callee saved register with the largest id in the vector.
+ unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo &TRI) {
+ assert(Hexagon::R1 > 0 &&
+ "Assume physical registers are encoded as positive integers");
+ if (CSI.empty())
+ return 0;
+
+ unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);
+ for (unsigned I = 1, E = CSI.size(); I < E; ++I) {
+ unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI);
+ if (Reg > Max)
+ Max = Reg;
+ }
+ return Max;
+ }
+
+ /// Checks if the basic block contains any instruction that needs a stack
+ /// frame to be already in place.
+ bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR) {
+ for (auto &I : MBB) {
+ const MachineInstr *MI = &I;
+ if (MI->isCall())
+ return true;
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::ALLOCA:
+ case Hexagon::ALIGNA:
+ return true;
+ default:
+ break;
+ }
+ // Check individual operands.
+ for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) {
+ // While the presence of a frame index does not prove that a stack
+ // frame will be required, all frame indexes should be within alloc-
+ // frame/deallocframe. Otherwise, the code that translates a frame
+ // index into an offset would have to be aware of the placement of
+ // the frame creation/destruction instructions.
+ if (Mo->isFI())
+ return true;
+ if (!Mo->isReg())
+ continue;
+ unsigned R = Mo->getReg();
+ // Virtual registers will need scavenging, which then may require
+ // a stack slot.
+ if (TargetRegisterInfo::isVirtualRegister(R))
+ return true;
+ if (CSR[R])
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /// Returns true if MBB has a machine instructions that indicates a tail call
+ /// in the block.
+ bool hasTailCall(const MachineBasicBlock &MBB) {
+ MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
+ unsigned RetOpc = I->getOpcode();
+ return RetOpc == Hexagon::TCRETURNi || RetOpc == Hexagon::TCRETURNr;
+ }
+
+ /// Returns true if MBB contains an instruction that returns.
+ bool hasReturn(const MachineBasicBlock &MBB) {
+ for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I)
+ if (I->isReturn())
+ return true;
+ return false;
+ }
+}
+
+
+/// Implements shrink-wrapping of the stack frame. By default, stack frame
+/// is created in the function entry block, and is cleaned up in every block
+/// that returns. This function finds alternate blocks: one for the frame
+/// setup (prolog) and one for the cleanup (epilog).
+void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
+ MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
+ static unsigned ShrinkCounter = 0;
+
+ if (ShrinkLimit.getPosition()) {
+ if (ShrinkCounter >= ShrinkLimit)
+ return;
+ ShrinkCounter++;
+ }
+
+ auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HRI = *HST.getRegisterInfo();
+
+ MachineDominatorTree MDT;
+ MDT.runOnMachineFunction(MF);
+ MachinePostDominatorTree MPT;
+ MPT.runOnMachineFunction(MF);
+
+ typedef DenseMap<unsigned,unsigned> UnsignedMap;
+ UnsignedMap RPO;
+ typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType;
+ RPOTType RPOT(&MF);
+ unsigned RPON = 0;
+ for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
+ RPO[(*I)->getNumber()] = RPON++;
+
+ // Don't process functions that have loops, at least for now. Placement
+ // of prolog and epilog must take loop structure into account. For simpli-
+ // city don't do it right now.
+ for (auto &I : MF) {
+ unsigned BN = RPO[I.getNumber()];
+ for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) {
+ // If found a back-edge, return.
+ if (RPO[(*SI)->getNumber()] <= BN)
+ return;
+ }
+ }
+
+ // Collect the set of blocks that need a stack frame to execute. Scan
+ // each block for uses/defs of callee-saved registers, calls, etc.
+ SmallVector<MachineBasicBlock*,16> SFBlocks;
+ BitVector CSR(Hexagon::NUM_TARGET_REGS);
+ for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P)
+ CSR[*P] = true;
+
+ for (auto &I : MF)
+ if (needsStackFrame(I, CSR))
+ SFBlocks.push_back(&I);
+
+ DEBUG({
+ dbgs() << "Blocks needing SF: {";
+ for (auto &B : SFBlocks)
+ dbgs() << " BB#" << B->getNumber();
+ dbgs() << " }\n";
+ });
+ // No frame needed?
+ if (SFBlocks.empty())
+ return;
+
+ // Pick a common dominator and a common post-dominator.
+ MachineBasicBlock *DomB = SFBlocks[0];
+ for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
+ DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]);
+ if (!DomB)
+ break;
+ }
+ MachineBasicBlock *PDomB = SFBlocks[0];
+ for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
+ PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]);
+ if (!PDomB)
+ break;
+ }
+ DEBUG({
+ dbgs() << "Computed dom block: BB#";
+ if (DomB) dbgs() << DomB->getNumber();
+ else dbgs() << "<null>";
+ dbgs() << ", computed pdom block: BB#";
+ if (PDomB) dbgs() << PDomB->getNumber();
+ else dbgs() << "<null>";
+ dbgs() << "\n";
+ });
+ if (!DomB || !PDomB)
+ return;
+
+ // Make sure that DomB dominates PDomB and PDomB post-dominates DomB.
+ if (!MDT.dominates(DomB, PDomB)) {
+ DEBUG(dbgs() << "Dom block does not dominate pdom block\n");
+ return;
+ }
+ if (!MPT.dominates(PDomB, DomB)) {
+ DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");
+ return;
+ }
+
+ // Finally, everything seems right.
+ PrologB = DomB;
+ EpilogB = PDomB;
+}
+
+/// Perform most of the PEI work here:
+/// - saving/restoring of the callee-saved registers,
+/// - stack frame creation and destruction.
+/// Normally, this work is distributed among various functions, but doing it
+/// in one place allows shrink-wrapping of the stack frame.
+void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HRI = *HST.getRegisterInfo();
+
+ assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr;
+ if (EnableShrinkWrapping)
+ findShrunkPrologEpilog(MF, PrologB, EpilogB);
+
+ insertCSRSpillsInBlock(*PrologB, CSI, HRI);
+ insertPrologueInBlock(*PrologB);
+
+ if (EpilogB) {
+ insertCSRRestoresInBlock(*EpilogB, CSI, HRI);
+ insertEpilogueInBlock(*EpilogB);
+ } else {
+ for (auto &B : MF)
+ if (!B.empty() && B.back().isReturn())
+ insertCSRRestoresInBlock(B, CSI, HRI);
+
+ for (auto &B : MF)
+ if (!B.empty() && B.back().isReturn())
+ insertEpilogueInBlock(B);
+ }
}
-void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front();
+void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
+ MachineFunction &MF = *MBB.getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
MachineBasicBlock::iterator MBBI = MBB.begin();
- const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
- determineFrameLayout(MF);
+ auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
+ auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HII = *HST.getInstrInfo();
+ auto &HRI = *HST.getRegisterInfo();
+ DebugLoc dl;
+ unsigned MaxAlign = std::max(MFI->getMaxAlignment(), getStackAlignment());
+
+ // Calculate the total stack frame size.
// Get the number of bytes to allocate from the FrameInfo.
- int NumBytes = (int) MFI->getStackSize();
+ unsigned FrameSize = MFI->getStackSize();
+ // Round up the max call frame size to the max alignment on the stack.
+ unsigned MaxCFA = RoundUpToAlignment(MFI->getMaxCallFrameSize(), MaxAlign);
+ MFI->setMaxCallFrameSize(MaxCFA);
+
+ FrameSize = MaxCFA + RoundUpToAlignment(FrameSize, MaxAlign);
+ MFI->setStackSize(FrameSize);
- // LLVM expects allocframe not to be the first instruction in the
- // basic block.
+ bool AlignStack = (MaxAlign > getStackAlignment());
+
+ // Check if frame moves are needed for EH.
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ MF.getFunction()->needsUnwindTableEntry();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned NumBytes = MFI->getStackSize();
+ unsigned SP = HRI.getStackRegister();
+ unsigned MaxCF = MFI->getMaxCallFrameSize();
MachineBasicBlock::iterator InsertPt = MBB.begin();
- //
- // ALLOCA adjust regs. Iterate over ADJDYNALLOC nodes and change the offset.
- //
- HexagonMachineFunctionInfo *FuncInfo =
- MF.getInfo<HexagonMachineFunctionInfo>();
- const std::vector<MachineInstr*>& AdjustRegs =
- FuncInfo->getAllocaAdjustInsts();
- for (std::vector<MachineInstr*>::const_iterator i = AdjustRegs.begin(),
- e = AdjustRegs.end();
- i != e; ++i) {
- MachineInstr* MI = *i;
- assert((MI->getOpcode() == Hexagon::ADJDYNALLOC) &&
- "Expected adjust alloca node");
+ auto *FuncInfo = MF.getInfo<HexagonMachineFunctionInfo>();
+ auto &AdjustRegs = FuncInfo->getAllocaAdjustInsts();
- MachineOperand& MO = MI->getOperand(2);
- assert(MO.isImm() && "Expected immediate");
- MO.setImm(MFI->getMaxCallFrameSize());
+ for (auto MI : AdjustRegs) {
+ assert((MI->getOpcode() == Hexagon::ALLOCA) && "Expected alloca");
+ expandAlloca(MI, HII, SP, MaxCF);
+ MI->eraseFromParent();
}
//
- // Only insert ALLOCFRAME if we need to.
+ // Only insert ALLOCFRAME if we need to or at -O0 for the debugger. Think
+ // that this shouldn't be required, but doing so now because gcc does and
+ // gdb can't break at the start of the function without it. Will remove if
+ // this turns out to be a gdb bug.
//
- if (hasFP(MF)) {
- // Check for overflow.
- // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
- const int ALLOCFRAME_MAX = 16384;
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-
- if (NumBytes >= ALLOCFRAME_MAX) {
- // Emit allocframe(#0).
- BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::S2_allocframe)).addImm(0);
-
- // Subtract offset from frame pointer.
- BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real),
- HEXAGON_RESERVED_REG_1).addImm(NumBytes);
- BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::A2_sub),
- QRI->getStackRegister()).
- addReg(QRI->getStackRegister()).
- addReg(HEXAGON_RESERVED_REG_1);
- } else {
- BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::S2_allocframe)).addImm(NumBytes);
- }
+ bool NoOpt = (HTM.getOptLevel() == CodeGenOpt::None);
+ if (!NoOpt && !FuncInfo->hasClobberLR() && !hasFP(MF))
+ return;
+
+ // Check for overflow.
+ // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
+ const unsigned int ALLOCFRAME_MAX = 16384;
+
+ // Create a dummy memory operand to avoid allocframe from being treated as
+ // a volatile memory reference.
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
+ 4, 4);
+
+ if (NumBytes >= ALLOCFRAME_MAX) {
+ // Emit allocframe(#0).
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+ .addImm(0)
+ .addMemOperand(MMO);
+
+ // Subtract offset from frame pointer.
+ // We use a caller-saved non-parameter register for that.
+ unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg();
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32_Int_Real),
+ CallerSavedReg).addImm(NumBytes);
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP)
+ .addReg(SP)
+ .addReg(CallerSavedReg);
+ } else {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+ .addImm(NumBytes)
+ .addMemOperand(MMO);
}
-}
-// Returns true if MBB has a machine instructions that indicates a tail call
-// in the block.
-bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- unsigned RetOpcode = MBBI->getOpcode();
- return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;
+ if (AlignStack) {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
+ .addReg(SP)
+ .addImm(-int64_t(MaxAlign));
+ }
+
+ if (needsFrameMoves) {
+ std::vector<MCCFIInstruction> Instructions = MMI.getFrameInstructions();
+ MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
+
+ // Advance CFA. DW_CFA_def_cfa
+ unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
+ unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
+
+ // CFA = FP + 8
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
+ FrameLabel, DwFPReg, -8));
+ BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // R31 (return addr) = CFA - #4
+ CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ FrameLabel, DwRAReg, -4));
+ BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // R30 (frame ptr) = CFA - #8)
+ CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ FrameLabel, DwFPReg, -8));
+ BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ unsigned int regsToMove[] = {
+ Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2,
+ Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
+ Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
+ Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
+ Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9, Hexagon::D10,
+ Hexagon::D11, Hexagon::D12, Hexagon::D13, Hexagon::NoRegister
+ };
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ for (unsigned i = 0; regsToMove[i] != Hexagon::NoRegister; ++i) {
+ for (unsigned I = 0, E = CSI.size(); I < E; ++I) {
+ if (CSI[I].getReg() == regsToMove[i]) {
+ // Subtract 8 to make room for R30 and R31, which are added above.
+ int64_t Offset = getFrameIndexOffset(MF, CSI[I].getFrameIdx()) - 8;
+
+ if (regsToMove[i] < Hexagon::D0 || regsToMove[i] > Hexagon::D15) {
+ unsigned DwarfReg = HRI.getDwarfRegNum(regsToMove[i], true);
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(FrameLabel,
+ DwarfReg, Offset));
+ BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ } else {
+ // Split the double regs into subregs, and generate appropriate
+ // cfi_offsets.
+ // The only reason, we are split double regs is, llvm-mc does not
+ // understand paired registers for cfi_offset.
+ // Eg .cfi_offset r1:0, -64
+ unsigned HiReg = getMax32BitSubRegister(regsToMove[i], HRI);
+ unsigned LoReg = getMax32BitSubRegister(regsToMove[i], HRI, false);
+ unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
+ unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
+ unsigned HiCFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(FrameLabel,
+ HiDwarfReg, Offset+4));
+ BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(HiCFIIndex);
+ unsigned LoCFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(FrameLabel,
+ LoDwarfReg, Offset));
+ BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(LoCFIIndex);
+ }
+ break;
+ }
+ } // for CSI.size()
+ } // for regsToMove
+ } // needsFrameMoves
}
-void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = std::prev(MBB.end());
- DebugLoc dl = MBBI->getDebugLoc();
+void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
+ MachineFunction &MF = *MBB.getParent();
//
// Only insert deallocframe if we need to. Also at -O0. See comment
- // in emitPrologue above.
+ // in insertPrologueInBlock above.
//
- if (hasFP(MF) || MF.getTarget().getOptLevel() == CodeGenOpt::None) {
- MachineBasicBlock::iterator MBBI = std::prev(MBB.end());
- MachineBasicBlock::iterator MBBI_end = MBB.end();
-
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- // Handle EH_RETURN.
- if (MBBI->getOpcode() == Hexagon::EH_RETURN_JMPR) {
- assert(MBBI->getOperand(0).isReg() && "Offset should be in register!");
- BuildMI(MBB, MBBI, dl, TII.get(Hexagon::L2_deallocframe));
- BuildMI(MBB, MBBI, dl, TII.get(Hexagon::A2_add),
- Hexagon::R29).addReg(Hexagon::R29).addReg(Hexagon::R28);
- return;
- }
- // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher
- // versions.
- if (MF.getTarget().getSubtarget<HexagonSubtarget>().hasV4TOps() &&
- MBBI->getOpcode() == Hexagon::JMPret && !DisableDeallocRet) {
- // Check for RESTORE_DEALLOC_RET_JMP_V4 call. Don't emit an extra DEALLOC
- // instruction if we encounter it.
- MachineBasicBlock::iterator BeforeJMPR =
- MBB.begin() == MBBI ? MBBI : std::prev(MBBI);
- if (BeforeJMPR != MBBI &&
- BeforeJMPR->getOpcode() == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) {
- // Remove the JMPR node.
- MBB.erase(MBBI);
- return;
- }
+ if (!hasFP(MF) && MF.getTarget().getOptLevel() != CodeGenOpt::None)
+ return;
- // Add dealloc_return.
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::L4_return));
- // Transfer the function live-out registers.
- MIB->copyImplicitOps(*MBB.getParent(), &*MBBI);
- // Remove the JUMPR node.
- MBB.erase(MBBI);
- } else { // Add deallocframe for V2 and V3, and V4 tail calls.
- // Check for RESTORE_DEALLOC_BEFORE_TAILCALL_V4. We don't need an extra
- // DEALLOCFRAME instruction after it.
- MachineBasicBlock::iterator Term = MBB.getFirstTerminator();
- MachineBasicBlock::iterator I =
- Term == MBB.begin() ? MBB.end() : std::prev(Term);
- if (I != MBB.end() &&
- I->getOpcode() == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4)
- return;
+ auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HII = *HST.getInstrInfo();
+ auto &HRI = *HST.getRegisterInfo();
+ unsigned SP = HRI.getStackRegister();
+
+ MachineInstr *RetI = nullptr;
+ for (auto &I : MBB) {
+ if (!I.isReturn())
+ continue;
+ RetI = &I;
+ break;
+ }
+ unsigned RetOpc = RetI ? RetI->getOpcode() : 0;
- BuildMI(MBB, MBBI, dl, TII.get(Hexagon::L2_deallocframe));
+ MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
+ DebugLoc DL;
+ if (InsertPt != MBB.end())
+ DL = InsertPt->getDebugLoc();
+ else if (!MBB.empty())
+ DL = std::prev(MBB.end())->getDebugLoc();
+
+ // Handle EH_RETURN.
+ if (RetOpc == Hexagon::EH_RETURN_JMPR) {
+ BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
+ BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP)
+ .addReg(SP)
+ .addReg(Hexagon::R28);
+ return;
+ }
+
+ // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc-
+ // frame instruction if we encounter it.
+ if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) {
+ MachineBasicBlock::iterator It = RetI;
+ ++It;
+ // Delete all instructions after the RESTORE (except labels).
+ while (It != MBB.end()) {
+ if (!It->isLabel())
+ It = MBB.erase(It);
+ else
+ ++It;
}
+ return;
+ }
+
+ // It is possible that the restoring code is a call to a library function.
+ // All of the restore* functions include "deallocframe", so we need to make
+ // sure that we don't add an extra one.
+ bool NeedsDeallocframe = true;
+ if (!MBB.empty() && InsertPt != MBB.begin()) {
+ MachineBasicBlock::iterator PrevIt = std::prev(InsertPt);
+ unsigned COpc = PrevIt->getOpcode();
+ if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4)
+ NeedsDeallocframe = false;
+ }
+
+ if (!NeedsDeallocframe)
+ return;
+ // If the returning instruction is JMPret, replace it with dealloc_return,
+ // otherwise just add deallocframe. The function could be returning via a
+ // tail call.
+ if (RetOpc != Hexagon::JMPret || DisableDeallocRet) {
+ BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
+ return;
}
+ unsigned NewOpc = Hexagon::L4_return;
+ MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc));
+ // Transfer the function live-out registers.
+ NewI->copyImplicitOps(MF, RetI);
+ MBB.erase(RetI);
}
+
bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const HexagonMachineFunctionInfo *FuncInfo =
MF.getInfo<HexagonMachineFunctionInfo>();
- return (MFI->hasCalls() || (MFI->getStackSize() > 0) ||
- FuncInfo->hasClobberLR() );
+ return MFI->hasCalls() || MFI->getStackSize() > 0 ||
+ FuncInfo->hasClobberLR();
}
-static inline
-unsigned uniqueSuperReg(unsigned Reg, const TargetRegisterInfo *TRI) {
- MCSuperRegIterator SRI(Reg, TRI);
- assert(SRI.isValid() && "Expected a superreg");
- unsigned SuperReg = *SRI;
- ++SRI;
- assert(!SRI.isValid() && "Expected exactly one superreg");
- return SuperReg;
-}
-bool
-HexagonFrameLowering::spillCalleeSavedRegisters(
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- MachineFunction *MF = MBB.getParent();
- const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
+enum SpillKind {
+ SK_ToMem,
+ SK_FromMem,
+ SK_FromMemTailcall
+};
- if (CSI.empty()) {
- return false;
+static const char *
+getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType) {
+ const char * V4SpillToMemoryFunctions[] = {
+ "__save_r16_through_r17",
+ "__save_r16_through_r19",
+ "__save_r16_through_r21",
+ "__save_r16_through_r23",
+ "__save_r16_through_r25",
+ "__save_r16_through_r27" };
+
+ const char * V4SpillFromMemoryFunctions[] = {
+ "__restore_r16_through_r17_and_deallocframe",
+ "__restore_r16_through_r19_and_deallocframe",
+ "__restore_r16_through_r21_and_deallocframe",
+ "__restore_r16_through_r23_and_deallocframe",
+ "__restore_r16_through_r25_and_deallocframe",
+ "__restore_r16_through_r27_and_deallocframe" };
+
+ const char * V4SpillFromMemoryTailcallFunctions[] = {
+ "__restore_r16_through_r17_and_deallocframe_before_tailcall",
+ "__restore_r16_through_r19_and_deallocframe_before_tailcall",
+ "__restore_r16_through_r21_and_deallocframe_before_tailcall",
+ "__restore_r16_through_r23_and_deallocframe_before_tailcall",
+ "__restore_r16_through_r25_and_deallocframe_before_tailcall",
+ "__restore_r16_through_r27_and_deallocframe_before_tailcall"
+ };
+
+ const char **SpillFunc = nullptr;
+
+ switch(SpillType) {
+ case SK_ToMem:
+ SpillFunc = V4SpillToMemoryFunctions;
+ break;
+ case SK_FromMem:
+ SpillFunc = V4SpillFromMemoryFunctions;
+ break;
+ case SK_FromMemTailcall:
+ SpillFunc = V4SpillFromMemoryTailcallFunctions;
+ break;
+ }
+ assert(SpillFunc && "Unknown spill kind");
+
+ // Spill all callee-saved registers up to the highest register used.
+ switch (MaxReg) {
+ case Hexagon::R17:
+ return SpillFunc[0];
+ case Hexagon::R19:
+ return SpillFunc[1];
+ case Hexagon::R21:
+ return SpillFunc[2];
+ case Hexagon::R23:
+ return SpillFunc[3];
+ case Hexagon::R25:
+ return SpillFunc[4];
+ case Hexagon::R27:
+ return SpillFunc[5];
+ default:
+ llvm_unreachable("Unhandled maximum callee save register");
}
+ return 0;
+}
- // We can only schedule double loads if we spill contiguous callee-saved regs
- // For instance, we cannot scheduled double-word loads if we spill r24,
- // r26, and r27.
- // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
- // above.
- bool ContiguousRegs = true;
+/// Adds all callee-saved registers up to MaxReg to the instruction.
+static void addCalleeSaveRegistersAsImpOperand(MachineInstr *Inst,
+ unsigned MaxReg, bool IsDef) {
+ // Add the callee-saved registers as implicit uses.
+ for (unsigned R = Hexagon::R16; R <= MaxReg; ++R) {
+ MachineOperand ImpUse = MachineOperand::CreateReg(R, IsDef, true);
+ Inst->addOperand(ImpUse);
+ }
+}
- for (unsigned i = 0; i < CSI.size(); ++i) {
+
+int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ return MF.getFrameInfo()->getObjectOffset(FI);
+}
+
+
+bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
+ const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
+ if (CSI.empty())
+ return true;
+
+ MachineBasicBlock::iterator MI = MBB.begin();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+
+ if (useSpillFunction(MF, CSI)) {
+ unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI);
+ const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem);
+ // Call spill function.
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
+ MachineInstr *SaveRegsCall =
+ BuildMI(MBB, MI, DL, TII.get(Hexagon::SAVE_REGISTERS_CALL_V4))
+ .addExternalSymbol(SpillFun);
+ // Add callee-saved registers as use.
+ addCalleeSaveRegistersAsImpOperand(SaveRegsCall, MaxReg, false);
+ // Add live in registers.
+ for (unsigned I = 0; I < CSI.size(); ++I)
+ MBB.addLiveIn(CSI[I].getReg());
+ return true;
+ }
+
+ for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
unsigned Reg = CSI[i].getReg();
+ // Add live in registers. We treat eh_return callee saved register r0 - r3
+ // specially. They are not really callee saved registers as they are not
+ // supposed to be killed.
+ bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
+ int FI = CSI[i].getFrameIdx();
+ const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
+ if (IsKill)
+ MBB.addLiveIn(Reg);
+ }
+ return true;
+}
- //
- // Check if we can use a double-word store.
- //
- unsigned SuperReg = uniqueSuperReg(Reg, TRI);
- bool CanUseDblStore = false;
- const TargetRegisterClass* SuperRegClass = nullptr;
-
- if (ContiguousRegs && (i < CSI.size()-1)) {
- unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI);
- SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg);
- CanUseDblStore = (SuperRegNext == SuperReg);
- }
+bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
+ const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- if (CanUseDblStore) {
- TII.storeRegToStackSlot(MBB, MI, SuperReg, true,
- CSI[i+1].getFrameIdx(), SuperRegClass, TRI);
- MBB.addLiveIn(SuperReg);
- ++i;
+ if (useRestoreFunction(MF, CSI)) {
+ bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
+ unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI);
+ SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem;
+ const char *RestoreFn = getSpillFunctionFor(MaxR, Kind);
+
+ // Call spill function.
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
+ : MBB.getLastNonDebugInstr()->getDebugLoc();
+ MachineInstr *DeallocCall = nullptr;
+
+ if (HasTC) {
+ unsigned ROpc = Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
+ DeallocCall = BuildMI(MBB, MI, DL, TII.get(ROpc))
+ .addExternalSymbol(RestoreFn);
} else {
- // Cannot use a double-word store.
- ContiguousRegs = false;
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC,
- TRI);
- MBB.addLiveIn(Reg);
+ // The block has a return.
+ MachineBasicBlock::iterator It = MBB.getFirstTerminator();
+ assert(It->isReturn() && std::next(It) == MBB.end());
+ unsigned ROpc = Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
+ DeallocCall = BuildMI(MBB, It, DL, TII.get(ROpc))
+ .addExternalSymbol(RestoreFn);
+ // Transfer the function live-out registers.
+ DeallocCall->copyImplicitOps(MF, It);
}
+ addCalleeSaveRegistersAsImpOperand(DeallocCall, MaxR, true);
+ return true;
+ }
+
+ for (unsigned i = 0; i < CSI.size(); ++i) {
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
+ int FI = CSI[i].getFrameIdx();
+ TII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
}
return true;
}
-bool HexagonFrameLowering::restoreCalleeSavedRegisters(
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+void HexagonFrameLowering::eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+ unsigned Opc = MI.getOpcode();
+ (void)Opc; // Silence compiler warning.
+ assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) &&
+ "Cannot handle this call frame pseudo instruction");
+ MBB.erase(I);
+}
+
+
+void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF, RegScavenger *RS) const {
+ // If this function has uses aligned stack and also has variable sized stack
+ // objects, then we need to map all spill slots to fixed positions, so that
+ // they can be accessed through FP. Otherwise they would have to be accessed
+ // via AP, which may not be available at the particular place in the program.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool HasAlloca = MFI->hasVarSizedObjects();
+ bool HasAligna = (MFI->getMaxAlignment() > getStackAlignment());
+
+ if (!HasAlloca || !HasAligna)
+ return;
+
+ unsigned LFS = MFI->getLocalFrameSize();
+ int Offset = -LFS;
+ for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (!MFI->isSpillSlotObjectIndex(i) || MFI->isDeadObjectIndex(i))
+ continue;
+ int S = MFI->getObjectSize(i);
+ LFS += S;
+ Offset -= S;
+ MFI->mapLocalFrameObject(i, Offset);
+ }
+
+ MFI->setLocalFrameSize(LFS);
+ unsigned A = MFI->getLocalFrameMaxAlign();
+ assert(A <= 8 && "Unexpected local frame alignment");
+ if (A == 0)
+ MFI->setLocalFrameMaxAlign(8);
+ MFI->setUseLocalStackAllocationBlock(true);
+}
- MachineFunction *MF = MBB.getParent();
- const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
+/// Returns true if there is no caller saved registers available.
+static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
+ const HexagonRegisterInfo &HRI) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MCPhysReg *CallerSavedRegs = HRI.getCallerSavedRegs(&MF);
+ // Check for an unused caller-saved register.
+ for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
+ MCPhysReg FreeReg = *CallerSavedRegs;
+ if (MRI.isPhysRegUsed(FreeReg))
+ continue;
- if (CSI.empty()) {
+ // Check aliased register usage.
+ bool IsCurrentRegUsed = false;
+ for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
+ if (MRI.isPhysRegUsed(*AI)) {
+ IsCurrentRegUsed = true;
+ break;
+ }
+ if (IsCurrentRegUsed)
+ continue;
+
+ // Neither directly used nor used through an aliased register.
return false;
}
+ // All caller-saved registers are used.
+ return true;
+}
- // We can only schedule double loads if we spill contiguous callee-saved regs
- // For instance, we cannot scheduled double-word loads if we spill r24,
- // r26, and r27.
- // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
- // above.
- bool ContiguousRegs = true;
- for (unsigned i = 0; i < CSI.size(); ++i) {
- unsigned Reg = CSI[i].getReg();
+/// Replaces the predicate spill code pseudo instructions by valid instructions.
+bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF)
+ const {
+ auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HII = *HST.getInstrInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool HasReplacedPseudoInst = false;
+ // Replace predicate spill pseudo instructions by real code.
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+ // Traverse the basic block.
+ MachineBasicBlock::iterator NextII;
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ MII = NextII) {
+ MachineInstr *MI = MII;
+ NextII = std::next(MII);
+ int Opc = MI->getOpcode();
+ if (Opc == Hexagon::STriw_pred) {
+ HasReplacedPseudoInst = true;
+ // STriw_pred FI, 0, SrcReg;
+ unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ unsigned SrcReg = MI->getOperand(2).getReg();
+ bool IsOrigSrcRegKilled = MI->getOperand(2).isKill();
+
+ assert(MI->getOperand(0).isFI() && "Expect a frame index");
+ assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ "Not a predicate register");
+
+ // Insert transfer to general purpose register.
+ // VirtReg = C2_tfrpr SrcPredReg
+ BuildMI(*MBB, MII, MI->getDebugLoc(), HII.get(Hexagon::C2_tfrpr),
+ VirtReg).addReg(SrcReg, getKillRegState(IsOrigSrcRegKilled));
+
+ // Change instruction to S2_storeri_io.
+ // S2_storeri_io FI, 0, VirtReg
+ MI->setDesc(HII.get(Hexagon::S2_storeri_io));
+ MI->getOperand(2).setReg(VirtReg);
+ MI->getOperand(2).setIsKill();
- //
- // Check if we can use a double-word load.
- //
- unsigned SuperReg = uniqueSuperReg(Reg, TRI);
- const TargetRegisterClass* SuperRegClass = nullptr;
- bool CanUseDblLoad = false;
- if (ContiguousRegs && (i < CSI.size()-1)) {
- unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI);
- SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg);
- CanUseDblLoad = (SuperRegNext == SuperReg);
+ } else if (Opc == Hexagon::LDriw_pred) {
+ // DstReg = LDriw_pred FI, 0
+ MachineOperand &M0 = MI->getOperand(0);
+ if (M0.isDead()) {
+ MBB->erase(MII);
+ continue;
+ }
+
+ unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ unsigned DestReg = MI->getOperand(0).getReg();
+
+ assert(MI->getOperand(1).isFI() && "Expect a frame index");
+ assert(Hexagon::PredRegsRegClass.contains(DestReg) &&
+ "Not a predicate register");
+
+ // Change instruction to L2_loadri_io.
+ // VirtReg = L2_loadri_io FI, 0
+ MI->setDesc(HII.get(Hexagon::L2_loadri_io));
+ MI->getOperand(0).setReg(VirtReg);
+
+ // Insert transfer to general purpose register.
+ // DestReg = C2_tfrrp VirtReg
+ const MCInstrDesc &D = HII.get(Hexagon::C2_tfrrp);
+ BuildMI(*MBB, std::next(MII), MI->getDebugLoc(), D, DestReg)
+ .addReg(VirtReg, getKillRegState(true));
+ HasReplacedPseudoInst = true;
+ }
}
+ }
+ return HasReplacedPseudoInst;
+}
- if (CanUseDblLoad) {
- TII.loadRegFromStackSlot(MBB, MI, SuperReg, CSI[i+1].getFrameIdx(),
- SuperRegClass, TRI);
- MBB.addLiveIn(SuperReg);
- ++i;
- } else {
- // Cannot use a double-word load.
- ContiguousRegs = false;
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
- MBB.addLiveIn(Reg);
+void HexagonFrameLowering::processFunctionBeforeCalleeSavedScan(
+ MachineFunction &MF, RegScavenger* RS) const {
+ auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HRI = *HST.getRegisterInfo();
+
+ bool HasEHReturn = MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn();
+
+ // If we have a function containing __builtin_eh_return we want to spill and
+ // restore all callee saved registers. Pretend that they are used.
+ if (HasEHReturn) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *CSRegs = HRI.getCalleeSavedRegs(&MF); *CSRegs;
+ ++CSRegs)
+ if (!MRI.isPhysRegUsed(*CSRegs))
+ MRI.setPhysRegUsed(*CSRegs);
+ }
+
+ const TargetRegisterClass &RC = Hexagon::IntRegsRegClass;
+
+ // Replace predicate register pseudo spill code.
+ bool HasReplacedPseudoInst = replacePredRegPseudoSpillCode(MF);
+
+ // We need to reserve a a spill slot if scavenging could potentially require
+ // spilling a scavenged register.
+ if (HasReplacedPseudoInst && needToReserveScavengingSpillSlots(MF, HRI)) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ for (int i=0; i < NumberScavengerSlots; i++)
+ RS->addScavengingFrameIndex(
+ MFI->CreateSpillStackObject(RC.getSize(), RC.getAlignment()));
+ }
+}
+
+
+#ifndef NDEBUG
+static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {
+ dbgs() << '{';
+ for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) {
+ unsigned R = x;
+ dbgs() << ' ' << PrintReg(R, &TRI);
+ }
+ dbgs() << " }";
+}
+#endif
+
+
+bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {
+ DEBUG(dbgs() << LLVM_FUNCTION_NAME << " on "
+ << MF.getFunction()->getName() << '\n');
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ BitVector SRegs(Hexagon::NUM_TARGET_REGS);
+
+ // Generate a set of unique, callee-saved registers (SRegs), where each
+ // register in the set is maximal in terms of sub-/super-register relation,
+ // i.e. for each R in SRegs, no proper super-register of R is also in SRegs.
+
+ // (1) For each callee-saved register, add that register and all of its
+ // sub-registers to SRegs.
+ DEBUG(dbgs() << "Initial CS registers: {");
+ for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
+ unsigned R = CSI[i].getReg();
+ DEBUG(dbgs() << ' ' << PrintReg(R, TRI));
+ for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
+ SRegs[*SR] = true;
+ }
+ DEBUG(dbgs() << " }\n");
+ DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+ // (2) For each reserved register, remove that register and all of its
+ // sub- and super-registers from SRegs.
+ BitVector Reserved = TRI->getReservedRegs(MF);
+ for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) {
+ unsigned R = x;
+ for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
+ SRegs[*SR] = false;
+ }
+ DEBUG(dbgs() << "Res: "; dump_registers(Reserved, *TRI); dbgs() << "\n");
+ DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+ // (3) Collect all registers that have at least one sub-register in SRegs,
+ // and also have no sub-registers that are reserved. These will be the can-
+ // didates for saving as a whole instead of their individual sub-registers.
+ // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.)
+ BitVector TmpSup(Hexagon::NUM_TARGET_REGS);
+ for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+ unsigned R = x;
+ for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR)
+ TmpSup[*SR] = true;
+ }
+ for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) {
+ unsigned R = x;
+ for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) {
+ if (!Reserved[*SR])
+ continue;
+ TmpSup[R] = false;
+ break;
+ }
+ }
+ DEBUG(dbgs() << "TmpSup: "; dump_registers(TmpSup, *TRI); dbgs() << "\n");
+
+ // (4) Include all super-registers found in (3) into SRegs.
+ SRegs |= TmpSup;
+ DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+ // (5) For each register R in SRegs, if any super-register of R is in SRegs,
+ // remove R from SRegs.
+ for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+ unsigned R = x;
+ for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) {
+ if (!SRegs[*SR])
+ continue;
+ SRegs[R] = false;
+ break;
+ }
+ }
+ DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+ // Now, for each register that has a fixed stack slot, create the stack
+ // object for it.
+ CSI.clear();
+
+ typedef TargetFrameLowering::SpillSlot SpillSlot;
+ unsigned NumFixed;
+ int MinOffset = 0; // CS offsets are negative.
+ const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed);
+ for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) {
+ if (!SRegs[S->Reg])
+ continue;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);
+ int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), S->Offset);
+ MinOffset = std::min(MinOffset, S->Offset);
+ CSI.push_back(CalleeSavedInfo(S->Reg, FI));
+ SRegs[S->Reg] = false;
+ }
+
+ // There can be some registers that don't have fixed slots. For example,
+ // we need to store R0-R3 in functions with exception handling. For each
+ // such register, create a non-fixed stack object.
+ for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+ unsigned R = x;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
+ int Off = MinOffset - RC->getSize();
+ unsigned Align = std::min(RC->getAlignment(), getStackAlignment());
+ assert(isPowerOf2_32(Align));
+ Off &= -Align;
+ int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), Off);
+ MinOffset = std::min(MinOffset, Off);
+ CSI.push_back(CalleeSavedInfo(R, FI));
+ SRegs[R] = false;
+ }
+
+ DEBUG({
+ dbgs() << "CS information: {";
+ for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
+ int FI = CSI[i].getFrameIdx();
+ int Off = MFI->getObjectOffset(FI);
+ dbgs() << ' ' << PrintReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp";
+ if (Off >= 0)
+ dbgs() << '+';
+ dbgs() << Off;
}
+ dbgs() << " }\n";
+ });
+
+#ifndef NDEBUG
+ // Verify that all registers were handled.
+ bool MissedReg = false;
+ for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+ unsigned R = x;
+ dbgs() << PrintReg(R, TRI) << ' ';
+ MissedReg = true;
}
+ if (MissedReg)
+ llvm_unreachable("...there are unhandled callee-saved registers!");
+#endif
+
return true;
}
-void HexagonFrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- MachineInstr &MI = *I;
- if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) {
- // Hexagon_TODO: add code
- } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) {
- // Hexagon_TODO: add code
- } else {
- llvm_unreachable("Cannot handle this call frame pseudo instruction");
+void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
+ const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const {
+ MachineBasicBlock &MB = *AI->getParent();
+ DebugLoc DL = AI->getDebugLoc();
+ unsigned A = AI->getOperand(2).getImm();
+
+ // Have
+ // Rd = alloca Rs, #A
+ //
+ // If Rs and Rd are different registers, use this sequence:
+ // Rd = sub(r29, Rs)
+ // r29 = sub(r29, Rs)
+ // Rd = and(Rd, #-A) ; if necessary
+ // r29 = and(r29, #-A) ; if necessary
+ // Rd = add(Rd, #CF) ; CF size aligned to at most A
+ // otherwise, do
+ // Rd = sub(r29, Rs)
+ // Rd = and(Rd, #-A) ; if necessary
+ // r29 = Rd
+ // Rd = add(Rd, #CF) ; CF size aligned to at most A
+
+ MachineOperand &RdOp = AI->getOperand(0);
+ MachineOperand &RsOp = AI->getOperand(1);
+ unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg();
+
+ // Rd = sub(r29, Rs)
+ BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd)
+ .addReg(SP)
+ .addReg(Rs);
+ if (Rs != Rd) {
+ // r29 = sub(r29, Rs)
+ BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP)
+ .addReg(SP)
+ .addReg(Rs);
+ }
+ if (A > 8) {
+ // Rd = and(Rd, #-A)
+ BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd)
+ .addReg(Rd)
+ .addImm(-int64_t(A));
+ if (Rs != Rd)
+ BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP)
+ .addReg(SP)
+ .addImm(-int64_t(A));
+ }
+ if (Rs == Rd) {
+ // r29 = Rd
+ BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP)
+ .addReg(Rd);
+ }
+ if (CF > 0) {
+ // Rd = add(Rd, #CF)
+ BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd)
+ .addReg(Rd)
+ .addImm(CF);
}
- MBB.erase(I);
}
-int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- return MF.getFrameInfo()->getObjectOffset(FI);
+
+bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (!MFI->hasVarSizedObjects())
+ return false;
+ unsigned MaxA = MFI->getMaxAlignment();
+ if (MaxA <= getStackAlignment())
+ return false;
+ return true;
+}
+
+
+MachineInstr *HexagonFrameLowering::getAlignaInstr(MachineFunction &MF) const {
+ for (auto &B : MF)
+ for (auto &I : B)
+ if (I.getOpcode() == Hexagon::ALIGNA)
+ return &I;
+ return nullptr;
+}
+
+
+inline static bool isOptSize(const MachineFunction &MF) {
+ AttributeSet AF = MF.getFunction()->getAttributes();
+ return AF.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize);
+}
+
+inline static bool isMinSize(const MachineFunction &MF) {
+ AttributeSet AF = MF.getFunction()->getAttributes();
+ return AF.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+}
+
+
+/// Determine whether the callee-saved register saves and restores should
+/// be generated via inline code. If this function returns "true", inline
+/// code will be generated. If this function returns "false", additional
+/// checks are performed, which may still lead to the inline code.
+bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF,
+ const CSIVect &CSI) const {
+ if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
+ return true;
+ if (!isOptSize(MF) && !isMinSize(MF))
+ if (MF.getTarget().getOptLevel() > CodeGenOpt::Default)
+ return true;
+
+ // Check if CSI only has double registers, and if the registers form
+ // a contiguous block starting from D8.
+ BitVector Regs(Hexagon::NUM_TARGET_REGS);
+ for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
+ unsigned R = CSI[i].getReg();
+ if (!Hexagon::DoubleRegsRegClass.contains(R))
+ return true;
+ Regs[R] = true;
+ }
+ int F = Regs.find_first();
+ if (F != Hexagon::D8)
+ return true;
+ while (F >= 0) {
+ int N = Regs.find_next(F);
+ if (N >= 0 && N != F+1)
+ return true;
+ F = N;
+ }
+
+ return false;
+}
+
+
+bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
+ const CSIVect &CSI) const {
+ if (shouldInlineCSR(MF, CSI))
+ return false;
+ unsigned NumCSI = CSI.size();
+ if (NumCSI <= 1)
+ return false;
+
+ unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs
+ : SpillFuncThreshold;
+ return Threshold < NumCSI;
+}
+
+
+bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
+ const CSIVect &CSI) const {
+ if (shouldInlineCSR(MF, CSI))
+ return false;
+ unsigned NumCSI = CSI.size();
+ unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1
+ : SpillFuncThreshold;
+ return Threshold < NumCSI;
}
+
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index 2d6b45793809..89500cb85724 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -15,35 +15,88 @@
namespace llvm {
-class HexagonFrameLowering : public TargetFrameLowering {
-private:
- void determineFrameLayout(MachineFunction &MF) const;
+class HexagonInstrInfo;
+class HexagonRegisterInfo;
+class HexagonFrameLowering : public TargetFrameLowering {
public:
- explicit HexagonFrameLowering() : TargetFrameLowering(StackGrowsDown, 8, 0) {}
+ explicit HexagonFrameLowering()
+ : TargetFrameLowering(StackGrowsDown, 8, 0, 1, true) {}
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- void emitPrologue(MachineFunction &MF) const override;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ // All of the prolog/epilog functionality, including saving and restoring
+ // callee-saved registers is handled in emitPrologue. This is to have the
+ // logic for shrink-wrapping in one place.
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
+ override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+ override {}
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const override;
-
- void
- eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const override;
-
- bool
- restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const override;
+ MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override {
+ return true;
+ }
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override {
+ return true;
+ }
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = nullptr) const override;
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const override;
+
+ bool targetHandlesStackFrameRounding() const override {
+ return true;
+ }
int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
bool hasFP(const MachineFunction &MF) const override;
- bool hasTailCall(MachineBasicBlock &MBB) const;
+
+ const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries)
+ const override {
+ static const SpillSlot Offsets[] = {
+ { Hexagon::R17, -4 }, { Hexagon::R16, -8 }, { Hexagon::D8, -8 },
+ { Hexagon::R19, -12 }, { Hexagon::R18, -16 }, { Hexagon::D9, -16 },
+ { Hexagon::R21, -20 }, { Hexagon::R20, -24 }, { Hexagon::D10, -24 },
+ { Hexagon::R23, -28 }, { Hexagon::R22, -32 }, { Hexagon::D11, -32 },
+ { Hexagon::R25, -36 }, { Hexagon::R24, -40 }, { Hexagon::D12, -40 },
+ { Hexagon::R27, -44 }, { Hexagon::R26, -48 }, { Hexagon::D13, -48 }
+ };
+ NumEntries = array_lengthof(Offsets);
+ return Offsets;
+ }
+
+ bool assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI)
+ const override;
+
+ bool needsAligna(const MachineFunction &MF) const;
+ MachineInstr *getAlignaInstr(MachineFunction &MF) const;
+
+private:
+ typedef std::vector<CalleeSavedInfo> CSIVect;
+
+ void expandAlloca(MachineInstr *AI, const HexagonInstrInfo &TII,
+ unsigned SP, unsigned CF) const;
+ void insertPrologueInBlock(MachineBasicBlock &MBB) const;
+ void insertEpilogueInBlock(MachineBasicBlock &MBB) const;
+ bool insertCSRSpillsInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
+ const HexagonRegisterInfo &HRI) const;
+ bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
+ const HexagonRegisterInfo &HRI) const;
+
+ void adjustForCalleeSavedRegsSpillCall(MachineFunction &MF) const;
+ bool replacePredRegPseudoSpillCode(MachineFunction &MF) const;
+ bool replaceVecPredRegPseudoSpillCode(MachineFunction &MF) const;
+
+ void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB,
+ MachineBasicBlock *&EpilogB) const;
+
+ bool shouldInlineCSR(llvm::MachineFunction&, const CSIVect&) const;
+ bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const;
+ bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const;
};
} // End llvm namespace
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 637b0a0d0bff..db72899388e5 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -21,14 +21,13 @@
// - Countable loops (w/ ind. var for a trip count)
// - Assumes loops are normalized by IndVarSimplify
// - Try inner-most loops first
-// - No nested hardware loops.
// - No function calls in loops.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallSet.h"
#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -49,9 +48,18 @@ using namespace llvm;
#define DEBUG_TYPE "hwloops"
#ifndef NDEBUG
-static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1));
+static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
+
+// Option to create preheader only for a specific function.
+static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
+ cl::init(""));
#endif
+// Option to create a preheader if one doesn't exist.
+static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
+ cl::Hidden, cl::init(true),
+ cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
+
STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
namespace llvm {
@@ -64,9 +72,7 @@ namespace {
MachineLoopInfo *MLI;
MachineRegisterInfo *MRI;
MachineDominatorTree *MDT;
- const HexagonTargetMachine *TM;
const HexagonInstrInfo *TII;
- const HexagonRegisterInfo *TRI;
#ifndef NDEBUG
static int Counter;
#endif
@@ -89,14 +95,16 @@ namespace {
}
private:
+ typedef std::map<unsigned, MachineInstr *> LoopFeederMap;
+
/// Kinds of comparisons in the compare instructions.
struct Comparison {
enum Kind {
EQ = 0x01,
NE = 0x02,
- L = 0x04, // Less-than property.
- G = 0x08, // Greater-than property.
- U = 0x40, // Unsigned property.
+ L = 0x04,
+ G = 0x08,
+ U = 0x40,
LTs = L,
LEs = L | EQ,
GTs = G,
@@ -113,6 +121,23 @@ namespace {
return (Kind)(Cmp ^ (L|G));
return Cmp;
}
+
+ static Kind getNegatedComparison(Kind Cmp) {
+ if ((Cmp & L) || (Cmp & G))
+ return (Kind)((Cmp ^ (L | G)) ^ EQ);
+ if ((Cmp & NE) || (Cmp & EQ))
+ return (Kind)(Cmp ^ (EQ | NE));
+ return (Kind)0;
+ }
+
+ static bool isSigned(Kind Cmp) {
+ return (Cmp & (L | G) && !(Cmp & U));
+ }
+
+ static bool isUnsigned(Kind Cmp) {
+ return (Cmp & U);
+ }
+
};
/// \brief Find the register that contains the loop controlling
@@ -130,6 +155,12 @@ namespace {
bool findInductionRegister(MachineLoop *L, unsigned &Reg,
int64_t &IVBump, MachineInstr *&IVOp) const;
+ /// \brief Return the comparison kind for the specified opcode.
+ Comparison::Kind getComparisonKind(unsigned CondOpc,
+ MachineOperand *InitialValue,
+ const MachineOperand *Endvalue,
+ int64_t IVBump) const;
+
/// \brief Analyze the statements in a loop to determine if the loop
/// has a computable trip count and, if so, return a value that represents
/// the trip count expression.
@@ -143,24 +174,22 @@ namespace {
/// If the trip count is not directly available (as an immediate value,
/// or a register), the function will attempt to insert computation of it
/// to the loop's preheader.
- CountValue *computeCount(MachineLoop *Loop,
- const MachineOperand *Start,
- const MachineOperand *End,
- unsigned IVReg,
- int64_t IVBump,
- Comparison::Kind Cmp) const;
+ CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
+ const MachineOperand *End, unsigned IVReg,
+ int64_t IVBump, Comparison::Kind Cmp) const;
/// \brief Return true if the instruction is not valid within a hardware
/// loop.
- bool isInvalidLoopOperation(const MachineInstr *MI) const;
+ bool isInvalidLoopOperation(const MachineInstr *MI,
+ bool IsInnerHWLoop) const;
/// \brief Return true if the loop contains an instruction that inhibits
/// using the hardware loop.
- bool containsInvalidInstruction(MachineLoop *L) const;
+ bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
/// \brief Given a loop, check if we can convert it to a hardware loop.
/// If so, then perform the conversion and return true.
- bool convertToHardwareLoop(MachineLoop *L);
+ bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
/// \brief Return true if the instruction is now dead.
bool isDead(const MachineInstr *MI,
@@ -175,14 +204,44 @@ namespace {
/// defined. If the instructions are out of order, try to reorder them.
bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
- /// \brief Get the instruction that loads an immediate value into \p R,
- /// or 0 if such an instruction does not exist.
- MachineInstr *defWithImmediate(unsigned R);
+ /// \brief Return true if MO and MI pair is visited only once. If visited
+ /// more than once, this indicates there is recursion. In such a case,
+ /// return false.
+ bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
+ const MachineOperand *MO,
+ LoopFeederMap &LoopFeederPhi) const;
+
+ /// \brief Return true if the Phi may generate a value that may underflow,
+ /// or may wrap.
+ bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
+ MachineBasicBlock *MBB, MachineLoop *L,
+ LoopFeederMap &LoopFeederPhi) const;
+
+ /// \brief Return true if the induction variable may underflow an unsigned
+ /// value in the first iteration.
+ bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
+ const MachineOperand *EndVal,
+ MachineBasicBlock *MBB, MachineLoop *L,
+ LoopFeederMap &LoopFeederPhi) const;
+
+ /// \brief Check if the given operand has a compile-time known constant
+ /// value. Return true if yes, and false otherwise. When returning true, set
+ /// Val to the corresponding constant value.
+ bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
+
+ /// \brief Check if the operand has a compile-time known constant value.
+ bool isImmediate(const MachineOperand &MO) const {
+ int64_t V;
+ return checkForImmediate(MO, V);
+ }
- /// \brief Get the immediate value referenced to by \p MO, either for
- /// immediate operands, or for register operands, where the register
- /// was defined with an immediate value.
- int64_t getImmediate(MachineOperand &MO);
+ /// \brief Return the immediate for the specified operand.
+ int64_t getImmediate(const MachineOperand &MO) const {
+ int64_t V;
+ if (!checkForImmediate(MO, V))
+ llvm_unreachable("Invalid operand");
+ return V;
+ }
/// \brief Reset the given machine operand to now refer to a new immediate
/// value. Assumes that the operand was already referencing an immediate
@@ -265,9 +324,7 @@ namespace {
return Contents.ImmVal;
}
- void print(raw_ostream &OS, const TargetMachine *TM = nullptr) const {
- const TargetRegisterInfo *TRI =
- TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr;
+ void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
if (isImm()) { OS << Contents.ImmVal; }
}
@@ -282,18 +339,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
"Hexagon Hardware Loops", false, false)
-
-/// \brief Returns true if the instruction is a hardware loop instruction.
-static bool isHardwareLoop(const MachineInstr *MI) {
- return MI->getOpcode() == Hexagon::J2_loop0r ||
- MI->getOpcode() == Hexagon::J2_loop0i;
-}
-
FunctionPass *llvm::createHexagonHardwareLoops() {
return new HexagonHardwareLoops();
}
-
bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
@@ -302,22 +351,30 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
MLI = &getAnalysis<MachineLoopInfo>();
MRI = &MF.getRegInfo();
MDT = &getAnalysis<MachineDominatorTree>();
- TM = static_cast<const HexagonTargetMachine*>(&MF.getTarget());
- TII = static_cast<const HexagonInstrInfo *>(
- TM->getSubtargetImpl()->getInstrInfo());
- TRI = static_cast<const HexagonRegisterInfo *>(
- TM->getSubtargetImpl()->getRegisterInfo());
+ TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
- for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
- I != E; ++I) {
- MachineLoop *L = *I;
- if (!L->getParentLoop())
- Changed |= convertToHardwareLoop(L);
- }
+ for (auto &L : *MLI)
+ if (!L->getParentLoop()) {
+ bool L0Used = false;
+ bool L1Used = false;
+ Changed |= convertToHardwareLoop(L, L0Used, L1Used);
+ }
return Changed;
}
+/// \brief Return the latch block if it's one of the exiting blocks. Otherwise,
+/// return the exiting block. Return 'null' when multiple exiting blocks are
+/// present.
+static MachineBasicBlock* getExitingBlock(MachineLoop *L) {
+ if (MachineBasicBlock *Latch = L->getLoopLatch()) {
+ if (L->isLoopExiting(Latch))
+ return Latch;
+ else
+ return L->getExitingBlock();
+ }
+ return nullptr;
+}
bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
unsigned &Reg,
@@ -327,7 +384,8 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
MachineBasicBlock *Header = L->getHeader();
MachineBasicBlock *Preheader = L->getLoopPreheader();
MachineBasicBlock *Latch = L->getLoopLatch();
- if (!Header || !Preheader || !Latch)
+ MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+ if (!Header || !Preheader || !Latch || !ExitingBlock)
return false;
// This pair represents an induction register together with an immediate
@@ -357,15 +415,16 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
unsigned PhiOpReg = Phi->getOperand(i).getReg();
MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
unsigned UpdOpc = DI->getOpcode();
- bool isAdd = (UpdOpc == Hexagon::ADD_ri);
+ bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp);
if (isAdd) {
- // If the register operand to the add is the PHI we're
- // looking at, this meets the induction pattern.
+ // If the register operand to the add is the PHI we're looking at, this
+ // meets the induction pattern.
unsigned IndReg = DI->getOperand(1).getReg();
- if (MRI->getVRegDef(IndReg) == Phi) {
+ MachineOperand &Opnd2 = DI->getOperand(2);
+ int64_t V;
+ if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
unsigned UpdReg = DI->getOperand(0).getReg();
- int64_t V = DI->getOperand(2).getImm();
IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
}
}
@@ -374,13 +433,13 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
SmallVector<MachineOperand,2> Cond;
MachineBasicBlock *TB = nullptr, *FB = nullptr;
- bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+ bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
if (NotAnalyzed)
return false;
- unsigned CSz = Cond.size();
- assert (CSz == 1 || CSz == 2);
- unsigned PredR = Cond[CSz-1].getReg();
+ unsigned PredR, PredPos, PredRegFlags;
+ if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
+ return false;
MachineInstr *PredI = MRI->getVRegDef(PredR);
if (!PredI->isCompare())
@@ -392,7 +451,7 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
CmpMask, CmpImm);
// Fail if the compare was not analyzed, or it's not comparing a register
// with an immediate value. Not checking the mask here, since we handle
- // the individual compare opcodes (including CMPb) later on.
+ // the individual compare opcodes (including A4_cmpb*) later on.
if (!CmpAnalyzed)
return false;
@@ -422,6 +481,44 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
return true;
}
+// Return the comparison kind for the specified opcode.
+HexagonHardwareLoops::Comparison::Kind
+HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
+ MachineOperand *InitialValue,
+ const MachineOperand *EndValue,
+ int64_t IVBump) const {
+ Comparison::Kind Cmp = (Comparison::Kind)0;
+ switch (CondOpc) {
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpeqp:
+ Cmp = Comparison::EQ;
+ break;
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmpneqi:
+ Cmp = Comparison::NE;
+ break;
+ case Hexagon::C4_cmplte:
+ Cmp = Comparison::LEs;
+ break;
+ case Hexagon::C4_cmplteu:
+ Cmp = Comparison::LEu;
+ break;
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgtup:
+ Cmp = Comparison::GTu;
+ break;
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtp:
+ Cmp = Comparison::GTs;
+ break;
+ default:
+ return (Comparison::Kind)0;
+ }
+ return Cmp;
+}
/// \brief Analyze the statements in a loop to determine if the loop has
/// a computable trip count and, if so, return a value that represents
@@ -431,7 +528,7 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
/// induction variable patterns that are used in the calculation for
/// the number of time the loop is executed.
CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
- SmallVectorImpl<MachineInstr *> &OldInsts) {
+ SmallVectorImpl<MachineInstr *> &OldInsts) {
MachineBasicBlock *TopMBB = L->getTopBlock();
MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
assert(PI != TopMBB->pred_end() &&
@@ -455,8 +552,8 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
// Look for the cmp instruction to determine if we can get a useful trip
// count. The trip count can be either a register or an immediate. The
// location of the value depends upon the type (reg or imm).
- MachineBasicBlock *Latch = L->getLoopLatch();
- if (!Latch)
+ MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+ if (!ExitingBlock)
return nullptr;
unsigned IVReg = 0;
@@ -470,6 +567,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
MachineOperand *InitialValue = nullptr;
MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
+ MachineBasicBlock *Latch = L->getLoopLatch();
for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
if (MBB == Preheader)
@@ -482,7 +580,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
SmallVector<MachineOperand,2> Cond;
MachineBasicBlock *TB = nullptr, *FB = nullptr;
- bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+ bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
if (NotAnalyzed)
return nullptr;
@@ -490,7 +588,18 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
// TB must be non-null. If FB is also non-null, one of them must be
// the header. Otherwise, branch to TB could be exiting the loop, and
// the fall through can go to the header.
- assert (TB && "Latch block without a branch?");
+ assert (TB && "Exit block without a branch?");
+ if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
+ MachineBasicBlock *LTB = 0, *LFB = 0;
+ SmallVector<MachineOperand,2> LCond;
+ bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false);
+ if (NotAnalyzed)
+ return nullptr;
+ if (TB == Latch)
+ TB = (LTB == Header) ? LTB : LFB;
+ else
+ FB = (LTB == Header) ? LTB: LFB;
+ }
assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
if (!TB || (FB && TB != Header && FB != Header))
return nullptr;
@@ -499,8 +608,10 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
// to put imm(0), followed by P in the vector Cond.
// If TB is not the header, it means that the "not-taken" path must lead
// to the header.
- bool Negated = (Cond.size() > 1) ^ (TB != Header);
- unsigned PredReg = Cond[Cond.size()-1].getReg();
+ bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
+ unsigned PredReg, PredPos, PredRegFlags;
+ if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
+ return nullptr;
MachineInstr *CondI = MRI->getVRegDef(PredReg);
unsigned CondOpc = CondI->getOpcode();
@@ -539,57 +650,13 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
if (!EndValue)
return nullptr;
- switch (CondOpc) {
- case Hexagon::C2_cmpeqi:
- case Hexagon::C2_cmpeq:
- Cmp = !Negated ? Comparison::EQ : Comparison::NE;
- break;
- case Hexagon::C2_cmpgtui:
- case Hexagon::C2_cmpgtu:
- Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
- break;
- case Hexagon::C2_cmpgti:
- case Hexagon::C2_cmpgt:
- Cmp = !Negated ? Comparison::GTs : Comparison::LEs;
- break;
- // Very limited support for byte/halfword compares.
- case Hexagon::CMPbEQri_V4:
- case Hexagon::CMPhEQri_V4: {
- if (IVBump != 1)
- return nullptr;
-
- int64_t InitV, EndV;
- // Since the comparisons are "ri", the EndValue should be an
- // immediate. Check it just in case.
- assert(EndValue->isImm() && "Unrecognized latch comparison");
- EndV = EndValue->getImm();
- // Allow InitialValue to be a register defined with an immediate.
- if (InitialValue->isReg()) {
- if (!defWithImmediate(InitialValue->getReg()))
- return nullptr;
- InitV = getImmediate(*InitialValue);
- } else {
- assert(InitialValue->isImm());
- InitV = InitialValue->getImm();
- }
- if (InitV >= EndV)
- return nullptr;
- if (CondOpc == Hexagon::CMPbEQri_V4) {
- if (!isInt<8>(InitV) || !isInt<8>(EndV))
- return nullptr;
- } else { // Hexagon::CMPhEQri_V4
- if (!isInt<16>(InitV) || !isInt<16>(EndV))
- return nullptr;
- }
- Cmp = !Negated ? Comparison::EQ : Comparison::NE;
- break;
- }
- default:
- return nullptr;
- }
-
+ Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
+ if (!Cmp)
+ return nullptr;
+ if (Negated)
+ Cmp = Comparison::getNegatedComparison(Cmp);
if (isSwapped)
- Cmp = Comparison::getSwappedComparison(Cmp);
+ Cmp = Comparison::getSwappedComparison(Cmp);
if (InitialValue->isReg()) {
unsigned R = InitialValue->getReg();
@@ -603,6 +670,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
if (!MDT->properlyDominates(DefBB, Header))
return nullptr;
+ OldInsts.push_back(MRI->getVRegDef(R));
}
return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
@@ -626,32 +694,45 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
// If so, use the immediate value rather than the register.
if (Start->isReg()) {
const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
- if (StartValInstr && StartValInstr->getOpcode() == Hexagon::A2_tfrsi)
+ if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
+ StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
Start = &StartValInstr->getOperand(1);
}
if (End->isReg()) {
const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
- if (EndValInstr && EndValInstr->getOpcode() == Hexagon::A2_tfrsi)
+ if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
+ EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
End = &EndValInstr->getOperand(1);
}
- assert (Start->isReg() || Start->isImm());
- assert (End->isReg() || End->isImm());
+ if (!Start->isReg() && !Start->isImm())
+ return nullptr;
+ if (!End->isReg() && !End->isImm())
+ return nullptr;
bool CmpLess = Cmp & Comparison::L;
bool CmpGreater = Cmp & Comparison::G;
bool CmpHasEqual = Cmp & Comparison::EQ;
// Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
- // If loop executes while iv is "less" with the iv value going down, then
- // the iv must wrap.
if (CmpLess && IVBump < 0)
+ // Loop going while iv is "less" with the iv value going down. Must wrap.
return nullptr;
- // If loop executes while iv is "greater" with the iv value going up, then
- // the iv must wrap.
+
if (CmpGreater && IVBump > 0)
+ // Loop going while iv is "greater" with the iv value going up. Must wrap.
return nullptr;
+ // Phis that may feed into the loop.
+ LoopFeederMap LoopFeederPhi;
+
+ // Check if the inital value may be zero and can be decremented in the first
+ // iteration. If the value is zero, the endloop instruction will not decrement
+ // the loop counter, so we shoudn't generate a hardware loop in this case.
+ if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
+ LoopFeederPhi))
+ return nullptr;
+
if (Start->isImm() && End->isImm()) {
// Both, start and end are immediates.
int64_t StartV = Start->getImm();
@@ -674,14 +755,16 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
if (CmpHasEqual)
Dist = Dist > 0 ? Dist+1 : Dist-1;
- // assert (CmpLess => Dist > 0);
- assert ((!CmpLess || Dist > 0) && "Loop should never iterate!");
- // assert (CmpGreater => Dist < 0);
- assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!");
+ // For the loop to iterate, CmpLess should imply Dist > 0. Similarly,
+ // CmpGreater should imply Dist < 0. These conditions could actually
+ // fail, for example, in unreachable code (which may still appear to be
+ // reachable in the CFG).
+ if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
+ return nullptr;
// "Normalized" distance, i.e. with the bump set to +-1.
- int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump-1)) / IVBump
- : (-Dist + (-IVBump-1)) / (-IVBump);
+ int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump
+ : (-Dist + (-IVBump - 1)) / (-IVBump);
assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
uint64_t Count = Dist1;
@@ -698,14 +781,15 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
// If the induction variable bump is not a power of 2, quit.
// Othwerise we'd need a general integer division.
- if (!isPowerOf2_64(abs64(IVBump)))
+ if (!isPowerOf2_64(std::abs(IVBump)))
return nullptr;
MachineBasicBlock *PH = Loop->getLoopPreheader();
assert (PH && "Should have a preheader by now");
MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
- DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc()
- : DebugLoc();
+ DebugLoc DL;
+ if (InsertPos != PH->end())
+ DL = InsertPos->getDebugLoc();
// If Start is an immediate and End is a register, the trip count
// will be "reg - imm". Hexagon's "subtract immediate" instruction
@@ -782,23 +866,37 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
DistSR = End->getSubReg();
} else {
const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
- (RegToImm ? TII->get(Hexagon::SUB_ri) :
- TII->get(Hexagon::ADD_ri));
- unsigned SubR = MRI->createVirtualRegister(IntRC);
- MachineInstrBuilder SubIB =
- BuildMI(*PH, InsertPos, DL, SubD, SubR);
-
- if (RegToReg) {
- SubIB.addReg(End->getReg(), 0, End->getSubReg())
- .addReg(Start->getReg(), 0, Start->getSubReg());
- } else if (RegToImm) {
- SubIB.addImm(EndV)
- .addReg(Start->getReg(), 0, Start->getSubReg());
- } else { // ImmToReg
- SubIB.addReg(End->getReg(), 0, End->getSubReg())
- .addImm(-StartV);
+ (RegToImm ? TII->get(Hexagon::A2_subri) :
+ TII->get(Hexagon::A2_addi));
+ if (RegToReg || RegToImm) {
+ unsigned SubR = MRI->createVirtualRegister(IntRC);
+ MachineInstrBuilder SubIB =
+ BuildMI(*PH, InsertPos, DL, SubD, SubR);
+
+ if (RegToReg)
+ SubIB.addReg(End->getReg(), 0, End->getSubReg())
+ .addReg(Start->getReg(), 0, Start->getSubReg());
+ else
+ SubIB.addImm(EndV)
+ .addReg(Start->getReg(), 0, Start->getSubReg());
+ DistR = SubR;
+ } else {
+ // If the loop has been unrolled, we should use the original loop count
+ // instead of recalculating the value. This will avoid additional
+ // 'Add' instruction.
+ const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
+ if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
+ EndValInstr->getOperand(2).getImm() == StartV) {
+ DistR = EndValInstr->getOperand(1).getReg();
+ } else {
+ unsigned SubR = MRI->createVirtualRegister(IntRC);
+ MachineInstrBuilder SubIB =
+ BuildMI(*PH, InsertPos, DL, SubD, SubR);
+ SubIB.addReg(End->getReg(), 0, End->getSubReg())
+ .addImm(-StartV);
+ DistR = SubR;
+ }
}
- DistR = SubR;
DistSR = 0;
}
@@ -811,7 +909,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
} else {
// Generate CountR = ADD DistR, AdjVal
unsigned AddR = MRI->createVirtualRegister(IntRC);
- const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri);
+ MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
BuildMI(*PH, InsertPos, DL, AddD, AddR)
.addReg(DistR, 0, DistSR)
.addImm(AdjV);
@@ -844,50 +942,50 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
return new CountValue(CountValue::CV_Register, CountR, CountSR);
}
-
/// \brief Return true if the operation is invalid within hardware loop.
-bool HexagonHardwareLoops::isInvalidLoopOperation(
- const MachineInstr *MI) const {
+bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
+ bool IsInnerHWLoop) const {
- // call is not allowed because the callee may use a hardware loop
- if (MI->getDesc().isCall())
+ // Call is not allowed because the callee may use a hardware loop except for
+ // the case when the call never returns.
+ if (MI->getDesc().isCall() && MI->getOpcode() != Hexagon::CALLv3nr)
return true;
- // do not allow nested hardware loops
- if (isHardwareLoop(MI))
- return true;
-
- // check if the instruction defines a hardware loop register
+ // Check if the instruction defines a hardware loop register.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
unsigned R = MO.getReg();
- if (R == Hexagon::LC0 || R == Hexagon::LC1 ||
- R == Hexagon::SA0 || R == Hexagon::SA1)
+ if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 ||
+ R == Hexagon::LC1 || R == Hexagon::SA1))
+ return true;
+ if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1))
return true;
}
return false;
}
-
-/// \brief - Return true if the loop contains an instruction that inhibits
-/// the use of the hardware loop function.
-bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
+/// \brief Return true if the loop contains an instruction that inhibits
+/// the use of the hardware loop instruction.
+bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
+ bool IsInnerHWLoop) const {
const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
+ DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
MachineBasicBlock *MBB = Blocks[i];
for (MachineBasicBlock::iterator
MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
const MachineInstr *MI = &*MII;
- if (isInvalidLoopOperation(MI))
+ if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
+ DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
return true;
+ }
}
}
return false;
}
-
/// \brief Returns true if the instruction is dead. This was essentially
/// copied from DeadMachineInstructionElim::isDead, but with special cases
/// for inline asm, physical registers and instructions with side effects
@@ -928,7 +1026,7 @@ bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
MachineOperand &Use = *J;
MachineInstr *UseMI = Use.getParent();
- // If the phi node has a user that is not MI, bail...
+ // If the phi node has a user that is not MI, bail.
if (MI != UseMI)
return false;
}
@@ -965,8 +1063,6 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
continue;
if (Use.isDebug())
UseMI->getOperand(0).setReg(0U);
- // This may also be a "instr -> phi -> instr" case which can
- // be removed too.
}
}
@@ -984,19 +1080,47 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
///
/// The code makes several assumptions about the representation of the loop
/// in llvm.
-bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
+bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
+ bool &RecL0used,
+ bool &RecL1used) {
// This is just for sanity.
assert(L->getHeader() && "Loop without a header?");
bool Changed = false;
+ bool L0Used = false;
+ bool L1Used = false;
+
// Process nested loops first.
- for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- Changed |= convertToHardwareLoop(*I);
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
+ L0Used |= RecL0used;
+ L1Used |= RecL1used;
+ }
// If a nested loop has been converted, then we can't convert this loop.
- if (Changed)
+ if (Changed && L0Used && L1Used)
return Changed;
+ unsigned LOOP_i;
+ unsigned LOOP_r;
+ unsigned ENDLOOP;
+
+ // Flag used to track loopN instruction:
+ // 1 - Hardware loop is being generated for the inner most loop.
+ // 0 - Hardware loop is being generated for the outer loop.
+ unsigned IsInnerHWLoop = 1;
+
+ if (L0Used) {
+ LOOP_i = Hexagon::J2_loop1i;
+ LOOP_r = Hexagon::J2_loop1r;
+ ENDLOOP = Hexagon::ENDLOOP1;
+ IsInnerHWLoop = 0;
+ } else {
+ LOOP_i = Hexagon::J2_loop0i;
+ LOOP_r = Hexagon::J2_loop0r;
+ ENDLOOP = Hexagon::ENDLOOP0;
+ }
+
#ifndef NDEBUG
// Stop trying after reaching the limit (if any).
int Limit = HWLoopLimit;
@@ -1008,14 +1132,10 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
#endif
// Does the loop contain any invalid instructions?
- if (containsInvalidInstruction(L))
+ if (containsInvalidInstruction(L, IsInnerHWLoop))
return false;
- // Is the induction variable bump feeding the latch condition?
- if (!fixupInductionVariable(L))
- return false;
-
- MachineBasicBlock *LastMBB = L->getExitingBlock();
+ MachineBasicBlock *LastMBB = getExitingBlock(L);
// Don't generate hw loop if the loop has more than one exit.
if (!LastMBB)
return false;
@@ -1024,16 +1144,19 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
if (LastI == LastMBB->end())
return false;
+ // Is the induction variable bump feeding the latch condition?
+ if (!fixupInductionVariable(L))
+ return false;
+
// Ensure the loop has a preheader: the loop instruction will be
// placed there.
- bool NewPreheader = false;
MachineBasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
Preheader = createPreheaderForLoop(L);
if (!Preheader)
return false;
- NewPreheader = true;
}
+
MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
SmallVector<MachineInstr*, 2> OldInsts;
@@ -1048,31 +1171,30 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
// so make sure that the register is actually defined at that point.
MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
MachineBasicBlock *BBDef = TCDef->getParent();
- if (!NewPreheader) {
- if (!MDT->dominates(BBDef, Preheader))
- return false;
- } else {
- // If we have just created a preheader, the dominator tree won't be
- // aware of it. Check if the definition of the register dominates
- // the header, but is not the header itself.
- if (!MDT->properlyDominates(BBDef, L->getHeader()))
- return false;
- }
+ if (!MDT->dominates(BBDef, Preheader))
+ return false;
}
// Determine the loop start.
- MachineBasicBlock *LoopStart = L->getTopBlock();
- if (L->getLoopLatch() != LastMBB) {
- // When the exit and latch are not the same, use the latch block as the
- // start.
- // The loop start address is used only after the 1st iteration, and the
- // loop latch may contains instrs. that need to be executed after the
- // first iteration.
- LoopStart = L->getLoopLatch();
- // Make sure the latch is a successor of the exit, otherwise it won't work.
- if (!LastMBB->isSuccessor(LoopStart))
+ MachineBasicBlock *TopBlock = L->getTopBlock();
+ MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+ MachineBasicBlock *LoopStart = 0;
+ if (ExitingBlock != L->getLoopLatch()) {
+ MachineBasicBlock *TB = 0, *FB = 0;
+ SmallVector<MachineOperand, 2> Cond;
+
+ if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false))
+ return false;
+
+ if (L->contains(TB))
+ LoopStart = TB;
+ else if (L->contains(FB))
+ LoopStart = FB;
+ else
return false;
}
+ else
+ LoopStart = TopBlock;
// Convert the loop to a hardware loop.
DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
@@ -1086,8 +1208,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
.addReg(TripCount->getReg(), 0, TripCount->getSubReg());
// Add the Loop instruction to the beginning of the loop.
- BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r))
- .addMBB(LoopStart)
+ BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
.addReg(CountReg);
} else {
assert(TripCount->isImm() && "Expecting immediate value for trip count");
@@ -1095,14 +1216,14 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
// if the immediate fits in the instructions. Otherwise, we need to
// create a new virtual register.
int64_t CountImm = TripCount->getImm();
- if (!TII->isValidOffset(Hexagon::J2_loop0i, CountImm)) {
+ if (!TII->isValidOffset(LOOP_i, CountImm)) {
unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
.addImm(CountImm);
- BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r))
+ BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
.addMBB(LoopStart).addReg(CountReg);
} else
- BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0i))
+ BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
.addMBB(LoopStart).addImm(CountImm);
}
@@ -1116,8 +1237,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
// Replace the loop branch with an endloop instruction.
DebugLoc LastIDL = LastI->getDebugLoc();
- BuildMI(*LastMBB, LastI, LastIDL,
- TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
+ BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
// The loop ends with either:
// - a conditional branch followed by an unconditional branch, or
@@ -1145,10 +1265,18 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
removeIfDead(OldInsts[i]);
++NumHWLoops;
+
+ // Set RecL1used and RecL0used only after hardware loop has been
+ // successfully generated. Doing it earlier can cause wrong loop instruction
+ // to be used.
+ if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
+ RecL1used = true;
+ else
+ RecL0used = true;
+
return true;
}
-
bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
MachineInstr *CmpI) {
assert (BumpI != CmpI && "Bump and compare in the same instruction?");
@@ -1189,35 +1317,226 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
return FoundBump;
}
+/// This function is required to break recursion. Visiting phis in a loop may
+/// result in recursion during compilation. We break the recursion by making
+/// sure that we visit a MachineOperand and its definition in a
+/// MachineInstruction only once. If we attempt to visit more than once, then
+/// there is recursion, and will return false.
+bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
+ MachineInstr *MI,
+ const MachineOperand *MO,
+ LoopFeederMap &LoopFeederPhi) const {
+ if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
+ const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
+ DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
+ // Ignore all BBs that form Loop.
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Blocks[i];
+ if (A == MBB)
+ return false;
+ }
+ MachineInstr *Def = MRI->getVRegDef(MO->getReg());
+ LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
+ return true;
+ } else
+ // Already visited node.
+ return false;
+}
+
+/// Return true if a Phi may generate a value that can underflow.
+/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
+bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
+ MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
+ MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
+ assert(Phi->isPHI() && "Expecting a Phi.");
+ // Walk through each Phi, and its used operands. Make sure that
+ // if there is recursion in Phi, we won't generate hardware loops.
+ for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
+ if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
+ if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
+ Phi->getParent(), L, LoopFeederPhi))
+ return true;
+ return false;
+}
+
+/// Return true if the induction variable can underflow in the first iteration.
+/// An example, is an initial unsigned value that is 0 and is decrement in the
+/// first itertion of a do-while loop. In this case, we cannot generate a
+/// hardware loop because the endloop instruction does not decrement the loop
+/// counter if it is <= 1. We only need to perform this analysis if the
+/// initial value is a register.
+///
+/// This function assumes the initial value may underfow unless proven
+/// otherwise. If the type is signed, then we don't care because signed
+/// underflow is undefined. We attempt to prove the initial value is not
+/// zero by perfoming a crude analysis of the loop counter. This function
+/// checks if the initial value is used in any comparison prior to the loop
+/// and, if so, assumes the comparison is a range check. This is inexact,
+/// but will catch the simple cases.
+bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
+ const MachineOperand *InitVal, const MachineOperand *EndVal,
+ MachineBasicBlock *MBB, MachineLoop *L,
+ LoopFeederMap &LoopFeederPhi) const {
+ // Only check register values since they are unknown.
+ if (!InitVal->isReg())
+ return false;
+
+ if (!EndVal->isImm())
+ return false;
+
+ // A register value that is assigned an immediate is a known value, and it
+ // won't underflow in the first iteration.
+ int64_t Imm;
+ if (checkForImmediate(*InitVal, Imm))
+ return (EndVal->getImm() == Imm);
+
+ unsigned Reg = InitVal->getReg();
+
+ // We don't know the value of a physical register.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return true;
+
+ MachineInstr *Def = MRI->getVRegDef(Reg);
+ if (!Def)
+ return true;
-MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) {
+ // If the initial value is a Phi or copy and the operands may not underflow,
+ // then the definition cannot be underflow either.
+ if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
+ L, LoopFeederPhi))
+ return false;
+ if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
+ EndVal, Def->getParent(),
+ L, LoopFeederPhi))
+ return false;
+
+ // Iterate over the uses of the initial value. If the initial value is used
+ // in a compare, then we assume this is a range check that ensures the loop
+ // doesn't underflow. This is not an exact test and should be improved.
+ for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
+ E = MRI->use_instr_nodbg_end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ unsigned CmpReg1 = 0, CmpReg2 = 0;
+ int CmpMask = 0, CmpValue = 0;
+
+ if (!TII->analyzeCompare(MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
+ continue;
+
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 2> Cond;
+ if (TII->AnalyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
+ continue;
+
+ Comparison::Kind Cmp = getComparisonKind(MI->getOpcode(), 0, 0, 0);
+ if (Cmp == 0)
+ continue;
+ if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
+ Cmp = Comparison::getNegatedComparison(Cmp);
+ if (CmpReg2 != 0 && CmpReg2 == Reg)
+ Cmp = Comparison::getSwappedComparison(Cmp);
+
+ // Signed underflow is undefined.
+ if (Comparison::isSigned(Cmp))
+ return false;
+
+ // Check if there is a comparison of the inital value. If the initial value
+ // is greater than or not equal to another value, then assume this is a
+ // range check.
+ if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
+ return false;
+ }
+
+ // OK - this is a hack that needs to be improved. We really need to analyze
+ // the instructions performed on the initial value. This works on the simplest
+ // cases only.
+ if (!Def->isCopy() && !Def->isPHI())
+ return false;
+
+ return true;
+}
+
+bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
+ int64_t &Val) const {
+ if (MO.isImm()) {
+ Val = MO.getImm();
+ return true;
+ }
+ if (!MO.isReg())
+ return false;
+
+ // MO is a register. Check whether it is defined as an immediate value,
+ // and if so, get the value of it in TV. That value will then need to be
+ // processed to handle potential subregisters in MO.
+ int64_t TV;
+
+ unsigned R = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ return false;
MachineInstr *DI = MRI->getVRegDef(R);
unsigned DOpc = DI->getOpcode();
switch (DOpc) {
+ case TargetOpcode::COPY:
case Hexagon::A2_tfrsi:
case Hexagon::A2_tfrpi:
case Hexagon::CONST32_Int_Real:
- case Hexagon::CONST64_Int_Real:
- return DI;
- }
- return nullptr;
-}
+ case Hexagon::CONST64_Int_Real: {
+ // Call recursively to avoid an extra check whether operand(1) is
+ // indeed an immediate (it could be a global address, for example),
+ // plus we can handle COPY at the same time.
+ if (!checkForImmediate(DI->getOperand(1), TV))
+ return false;
+ break;
+ }
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineir:
+ case Hexagon::A4_combineii:
+ case Hexagon::A4_combineri:
+ case Hexagon::A2_combinew: {
+ const MachineOperand &S1 = DI->getOperand(1);
+ const MachineOperand &S2 = DI->getOperand(2);
+ int64_t V1, V2;
+ if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
+ return false;
+ TV = V2 | (V1 << 32);
+ break;
+ }
+ case TargetOpcode::REG_SEQUENCE: {
+ const MachineOperand &S1 = DI->getOperand(1);
+ const MachineOperand &S3 = DI->getOperand(3);
+ int64_t V1, V3;
+ if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
+ return false;
+ unsigned Sub2 = DI->getOperand(2).getImm();
+ unsigned Sub4 = DI->getOperand(4).getImm();
+ if (Sub2 == Hexagon::subreg_loreg && Sub4 == Hexagon::subreg_hireg)
+ TV = V1 | (V3 << 32);
+ else if (Sub2 == Hexagon::subreg_hireg && Sub4 == Hexagon::subreg_loreg)
+ TV = V3 | (V1 << 32);
+ else
+ llvm_unreachable("Unexpected form of REG_SEQUENCE");
+ break;
+ }
+ default:
+ return false;
+ }
-int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) {
- if (MO.isImm())
- return MO.getImm();
- assert(MO.isReg());
- unsigned R = MO.getReg();
- MachineInstr *DI = defWithImmediate(R);
- assert(DI && "Need an immediate operand");
- // All currently supported "define-with-immediate" instructions have the
- // actual immediate value in the operand(1).
- int64_t v = DI->getOperand(1).getImm();
- return v;
+ // By now, we should have successfuly obtained the immediate value defining
+ // the register referenced in MO. Handle a potential use of a subregister.
+ switch (MO.getSubReg()) {
+ case Hexagon::subreg_loreg:
+ Val = TV & 0xFFFFFFFFULL;
+ break;
+ case Hexagon::subreg_hireg:
+ Val = (TV >> 32) & 0xFFFFFFFFULL;
+ break;
+ default:
+ Val = TV;
+ break;
+ }
+ return true;
}
-
void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
if (MO.isImm()) {
MO.setImm(Val);
@@ -1226,30 +1545,32 @@ void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
assert(MO.isReg());
unsigned R = MO.getReg();
- MachineInstr *DI = defWithImmediate(R);
- if (MRI->hasOneNonDBGUse(R)) {
- // If R has only one use, then just change its defining instruction to
- // the new immediate value.
- DI->getOperand(1).setImm(Val);
- return;
- }
+ MachineInstr *DI = MRI->getVRegDef(R);
const TargetRegisterClass *RC = MRI->getRegClass(R);
unsigned NewR = MRI->createVirtualRegister(RC);
MachineBasicBlock &B = *DI->getParent();
DebugLoc DL = DI->getDebugLoc();
- BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR)
- .addImm(Val);
+ BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
MO.setReg(NewR);
}
+static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
+ // These two instructions are not extendable.
+ if (CmpOpc == Hexagon::A4_cmpbeqi)
+ return isUInt<8>(Imm);
+ if (CmpOpc == Hexagon::A4_cmpbgti)
+ return isInt<8>(Imm);
+ // The rest of the comparison-with-immediate instructions are extendable.
+ return true;
+}
bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
MachineBasicBlock *Header = L->getHeader();
- MachineBasicBlock *Preheader = L->getLoopPreheader();
MachineBasicBlock *Latch = L->getLoopLatch();
+ MachineBasicBlock *ExitingBlock = getExitingBlock(L);
- if (!Header || !Preheader || !Latch)
+ if (!(Header && Latch && ExitingBlock))
return false;
// These data structures follow the same concept as the corresponding
@@ -1277,15 +1598,16 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
unsigned PhiReg = Phi->getOperand(i).getReg();
MachineInstr *DI = MRI->getVRegDef(PhiReg);
unsigned UpdOpc = DI->getOpcode();
- bool isAdd = (UpdOpc == Hexagon::ADD_ri);
+ bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp);
if (isAdd) {
// If the register operand to the add/sub is the PHI we are looking
// at, this meets the induction pattern.
unsigned IndReg = DI->getOperand(1).getReg();
- if (MRI->getVRegDef(IndReg) == Phi) {
+ MachineOperand &Opnd2 = DI->getOperand(2);
+ int64_t V;
+ if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
unsigned UpdReg = DI->getOperand(0).getReg();
- int64_t V = DI->getOperand(2).getImm();
IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
}
}
@@ -1298,17 +1620,38 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
MachineBasicBlock *TB = nullptr, *FB = nullptr;
SmallVector<MachineOperand,2> Cond;
// AnalyzeBranch returns true if it fails to analyze branch.
- bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
- if (NotAnalyzed)
+ bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
+ if (NotAnalyzed || Cond.empty())
return false;
- // Check if the latch branch is unconditional.
- if (Cond.empty())
- return false;
+ if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
+ MachineBasicBlock *LTB = 0, *LFB = 0;
+ SmallVector<MachineOperand,2> LCond;
+ bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false);
+ if (NotAnalyzed)
+ return false;
- if (TB != Header && FB != Header)
- // The latch does not go back to the header. Not a latch we know and love.
- return false;
+ // Since latch is not the exiting block, the latch branch should be an
+ // unconditional branch to the loop header.
+ if (TB == Latch)
+ TB = (LTB == Header) ? LTB : LFB;
+ else
+ FB = (LTB == Header) ? LTB : LFB;
+ }
+ if (TB != Header) {
+ if (FB != Header) {
+ // The latch/exit block does not go back to the header.
+ return false;
+ }
+ // FB is the header (i.e., uncond. jump to branch header)
+ // In this case, the LoopBody -> TB should not be a back edge otherwise
+ // it could result in an infinite loop after conversion to hw_loop.
+ // This case can happen when the Latch has two jumps like this:
+ // Jmp_c OuterLoopHeader <-- TB
+ // Jmp InnerLoopHeader <-- FB
+ if (MDT->dominates(TB, FB))
+ return false;
+ }
// Expecting a predicate register as a condition. It won't be a hardware
// predicate register at this point yet, just a vreg.
@@ -1319,6 +1662,9 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
if (CSz != 1 && CSz != 2)
return false;
+ if (!Cond[CSz-1].isReg())
+ return false;
+
unsigned P = Cond[CSz-1].getReg();
MachineInstr *PredDef = MRI->getVRegDef(P);
@@ -1340,8 +1686,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
if (MO.isImplicit())
continue;
if (MO.isUse()) {
- unsigned R = MO.getReg();
- if (!defWithImmediate(R)) {
+ if (!isImmediate(MO)) {
CmpRegs.insert(MO.getReg());
continue;
}
@@ -1374,20 +1719,70 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
// compared against an immediate, we can fix it.
const RegisterBump &RB = I->second;
if (CmpRegs.count(RB.first)) {
- if (!CmpImmOp)
+ if (!CmpImmOp) {
+ // If both operands to the compare instruction are registers, see if
+ // it can be changed to use induction register as one of the operands.
+ MachineInstr *IndI = nullptr;
+ MachineInstr *nonIndI = nullptr;
+ MachineOperand *IndMO = nullptr;
+ MachineOperand *nonIndMO = nullptr;
+
+ for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = PredDef->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RB.first) {
+ DEBUG(dbgs() << "\n DefMI(" << i << ") = "
+ << *(MRI->getVRegDef(I->first)));
+ if (IndI)
+ return false;
+
+ IndI = MRI->getVRegDef(I->first);
+ IndMO = &MO;
+ } else if (MO.isReg()) {
+ DEBUG(dbgs() << "\n DefMI(" << i << ") = "
+ << *(MRI->getVRegDef(MO.getReg())));
+ if (nonIndI)
+ return false;
+
+ nonIndI = MRI->getVRegDef(MO.getReg());
+ nonIndMO = &MO;
+ }
+ }
+ if (IndI && nonIndI &&
+ nonIndI->getOpcode() == Hexagon::A2_addi &&
+ nonIndI->getOperand(2).isImm() &&
+ nonIndI->getOperand(2).getImm() == - RB.second) {
+ bool Order = orderBumpCompare(IndI, PredDef);
+ if (Order) {
+ IndMO->setReg(I->first);
+ nonIndMO->setReg(nonIndI->getOperand(1).getReg());
+ return true;
+ }
+ }
return false;
+ }
+ // It is not valid to do this transformation on an unsigned comparison
+ // because it may underflow.
+ Comparison::Kind Cmp = getComparisonKind(PredDef->getOpcode(), 0, 0, 0);
+ if (!Cmp || Comparison::isUnsigned(Cmp))
+ return false;
+
+ // If the register is being compared against an immediate, try changing
+ // the compare instruction to use induction register and adjust the
+ // immediate operand.
int64_t CmpImm = getImmediate(*CmpImmOp);
int64_t V = RB.second;
- if (V > 0 && CmpImm+V < CmpImm) // Overflow (64-bit).
- return false;
- if (V < 0 && CmpImm+V > CmpImm) // Overflow (64-bit).
+ // Handle Overflow (64-bit).
+ if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
+ ((V < 0) && (CmpImm < INT64_MIN - V)))
return false;
CmpImm += V;
- // Some forms of cmp-immediate allow u9 and s10. Assume the worst case
- // scenario, i.e. an 8-bit value.
- if (CmpImmOp->isImm() && !isInt<8>(CmpImm))
- return false;
+ // Most comparisons of register against an immediate value allow
+ // the immediate to be constant-extended. There are some exceptions
+ // though. Make sure the new combination will work.
+ if (CmpImmOp->isImm())
+ if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
+ return false;
// Make sure that the compare happens after the bump. Otherwise,
// after the fixup, the compare would use a yet-undefined register.
@@ -1411,19 +1806,27 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
return false;
}
-
/// \brief Create a preheader for a given loop.
MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
MachineLoop *L) {
if (MachineBasicBlock *TmpPH = L->getLoopPreheader())
return TmpPH;
+ if (!HWCreatePreheader)
+ return nullptr;
+
MachineBasicBlock *Header = L->getHeader();
MachineBasicBlock *Latch = L->getLoopLatch();
+ MachineBasicBlock *ExitingBlock = getExitingBlock(L);
MachineFunction *MF = Header->getParent();
DebugLoc DL;
- if (!Latch || Header->hasAddressTaken())
+#ifndef NDEBUG
+ if ((PHFn != "") && (PHFn != MF->getName()))
+ return nullptr;
+#endif
+
+ if (!Latch || !ExitingBlock || Header->hasAddressTaken())
return nullptr;
typedef MachineBasicBlock::instr_iterator instr_iterator;
@@ -1435,16 +1838,14 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
SmallVector<MachineOperand,2> Tmp1;
MachineBasicBlock *TB = nullptr, *FB = nullptr;
- if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false))
+ if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
return nullptr;
for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
MachineBasicBlock *PB = *I;
- if (PB != Latch) {
- bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
- if (NotAnalyzed)
- return nullptr;
- }
+ bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
+ if (NotAnalyzed)
+ return nullptr;
}
MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
@@ -1453,7 +1854,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
if (Header->pred_size() > 2) {
// Ensure that the header has only two predecessors: the preheader and
// the loop latch. Any additional predecessors of the header should
- // join at the newly created preheader. Inspect all PHI nodes from the
+ // join at the newly created preheader. Inspect all PHI nodes from the
// header and create appropriate corresponding PHI nodes in the preheader.
for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
@@ -1473,11 +1874,14 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
// created PHI node in the preheader.
for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
unsigned PredR = PN->getOperand(i).getReg();
+ unsigned PredRSub = PN->getOperand(i).getSubReg();
MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
if (PredB == Latch)
continue;
- NewPN->addOperand(MachineOperand::CreateReg(PredR, false));
+ MachineOperand MO = MachineOperand::CreateReg(PredR, false);
+ MO.setSubReg(PredRSub);
+ NewPN->addOperand(MO);
NewPN->addOperand(MachineOperand::CreateMBB(PredB));
}
@@ -1547,5 +1951,16 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
NewPH->addSuccessor(Header);
+ MachineLoop *ParentLoop = L->getParentLoop();
+ if (ParentLoop)
+ ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
+
+ // Update the dominator information with the new preheader.
+ if (MDT) {
+ MachineDomTreeNode *HDom = MDT->getNode(Header);
+ MDT->addNewBlock(NewPH, HDom->getIDom()->getBlock());
+ MDT->changeImmediateDominator(Header, NewPH);
+ }
+
return NewPH;
}
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index ea3a1770ac31..7a213aad072c 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -13,8 +13,11 @@
#include "Hexagon.h"
#include "HexagonISelLowering.h"
+#include "HexagonMachineFunctionInfo.h"
#include "HexagonTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
@@ -45,51 +48,43 @@ namespace llvm {
///
namespace {
class HexagonDAGToDAGISel : public SelectionDAGISel {
- /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can
- /// make the right decision when generating code for different targets.
- const HexagonSubtarget &Subtarget;
-
- // Keep a reference to HexagonTargetMachine.
- const HexagonTargetMachine& TM;
- DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap;
+ const HexagonTargetMachine& HTM;
+ const HexagonSubtarget *HST;
public:
- explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine,
+ explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(targetmachine, OptLevel),
- Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()),
- TM(targetmachine) {
+ : SelectionDAGISel(tm, OptLevel), HTM(tm) {
initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
- bool hasNumUsesBelowThresGA(SDNode *N) const;
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // Reset the subtarget each time through.
+ HST = &MF.getSubtarget<HexagonSubtarget>();
+ SelectionDAGISel::runOnMachineFunction(MF);
+ return true;
+ }
+
+ virtual void PreprocessISelDAG() override;
+ virtual void EmitFunctionEntryCode() override;
SDNode *Select(SDNode *N) override;
// Complex Pattern Selectors.
- inline bool foldGlobalAddress(SDValue &N, SDValue &R);
- inline bool foldGlobalAddressGP(SDValue &N, SDValue &R);
- bool foldGlobalAddressImpl(SDValue &N, SDValue &R, bool ShouldLookForGP);
- bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset);
- bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset);
- bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2);
+ inline bool SelectAddrGA(SDValue &N, SDValue &R);
+ inline bool SelectAddrGP(SDValue &N, SDValue &R);
+ bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP);
+ bool SelectAddrFI(SDValue &N, SDValue &R);
const char *getPassName() const override {
return "Hexagon DAG->DAG Pattern Instruction Selection";
}
+ SDNode *SelectFrameIndex(SDNode *N);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
- bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset);
-
SDNode *SelectLoad(SDNode *N);
SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl);
SDNode *SelectIndexedLoad(LoadSDNode *LD, SDLoc dl);
@@ -101,88 +96,98 @@ public:
SDNode *SelectIndexedStore(StoreSDNode *ST, SDLoc dl);
SDNode *SelectStore(SDNode *N);
SDNode *SelectSHL(SDNode *N);
- SDNode *SelectSelect(SDNode *N);
- SDNode *SelectTruncate(SDNode *N);
SDNode *SelectMul(SDNode *N);
SDNode *SelectZeroExtend(SDNode *N);
- SDNode *SelectIntrinsicWOChain(SDNode *N);
SDNode *SelectIntrinsicWChain(SDNode *N);
+ SDNode *SelectIntrinsicWOChain(SDNode *N);
SDNode *SelectConstant(SDNode *N);
SDNode *SelectConstantFP(SDNode *N);
SDNode *SelectAdd(SDNode *N);
- bool isConstExtProfitable(SDNode *N) const;
-
-// XformMskToBitPosU5Imm - Returns the bit position which
-// the single bit 32 bit mask represents.
-// Used in Clr and Set bit immediate memops.
-SDValue XformMskToBitPosU5Imm(uint32_t Imm) {
- int32_t bitPos;
- bitPos = Log2_32(Imm);
- assert(bitPos >= 0 && bitPos < 32 &&
- "Constant out of range for 32 BitPos Memops");
- return CurDAG->getTargetConstant(bitPos, MVT::i32);
-}
+ SDNode *SelectBitOp(SDNode *N);
+
+ // XformMskToBitPosU5Imm - Returns the bit position which
+ // the single bit 32 bit mask represents.
+ // Used in Clr and Set bit immediate memops.
+ SDValue XformMskToBitPosU5Imm(uint32_t Imm, SDLoc DL) {
+ int32_t bitPos;
+ bitPos = Log2_32(Imm);
+ assert(bitPos >= 0 && bitPos < 32 &&
+ "Constant out of range for 32 BitPos Memops");
+ return CurDAG->getTargetConstant(bitPos, DL, MVT::i32);
+ }
-// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit
-// mask represents. Used in Clr and Set bit immediate memops.
-SDValue XformMskToBitPosU4Imm(uint16_t Imm) {
- return XformMskToBitPosU5Imm(Imm);
-}
+ // XformMskToBitPosU4Imm - Returns the bit position which the single-bit
+ // 16 bit mask represents. Used in Clr and Set bit immediate memops.
+ SDValue XformMskToBitPosU4Imm(uint16_t Imm, SDLoc DL) {
+ return XformMskToBitPosU5Imm(Imm, DL);
+ }
-// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit
-// mask represents. Used in Clr and Set bit immediate memops.
-SDValue XformMskToBitPosU3Imm(uint8_t Imm) {
- return XformMskToBitPosU5Imm(Imm);
-}
+ // XformMskToBitPosU3Imm - Returns the bit position which the single-bit
+ // 8 bit mask represents. Used in Clr and Set bit immediate memops.
+ SDValue XformMskToBitPosU3Imm(uint8_t Imm, SDLoc DL) {
+ return XformMskToBitPosU5Imm(Imm, DL);
+ }
-// Return true if there is exactly one bit set in V, i.e., if V is one of the
-// following integers: 2^0, 2^1, ..., 2^31.
-bool ImmIsSingleBit(uint32_t v) const {
- uint32_t c = CountPopulation_64(v);
- // Only return true if we counted 1 bit.
- return c == 1;
-}
+ // Return true if there is exactly one bit set in V, i.e., if V is one of the
+ // following integers: 2^0, 2^1, ..., 2^31.
+ bool ImmIsSingleBit(uint32_t v) const {
+ return isPowerOf2_32(v);
+ }
-// XformM5ToU5Imm - Return a target constant with the specified value, of type
-// i32 where the negative literal is transformed into a positive literal for
-// use in -= memops.
-inline SDValue XformM5ToU5Imm(signed Imm) {
- assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
- return CurDAG->getTargetConstant( - Imm, MVT::i32);
-}
+ // XformM5ToU5Imm - Return a target constant with the specified value, of
+ // type i32 where the negative literal is transformed into a positive literal
+ // for use in -= memops.
+ inline SDValue XformM5ToU5Imm(signed Imm, SDLoc DL) {
+ assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
+ return CurDAG->getTargetConstant( - Imm, DL, MVT::i32);
+ }
+ // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
+ // [1..128], used in cmpb.gtu instructions.
+ inline SDValue XformU7ToU7M1Imm(signed Imm, SDLoc DL) {
+ assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
+ return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i8);
+ }
-// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
-// [1..128], used in cmpb.gtu instructions.
-inline SDValue XformU7ToU7M1Imm(signed Imm) {
- assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
- return CurDAG->getTargetConstant(Imm - 1, MVT::i8);
-}
+ // XformS8ToS8M1Imm - Return a target constant decremented by 1.
+ inline SDValue XformSToSM1Imm(signed Imm, SDLoc DL) {
+ return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32);
+ }
-// XformS8ToS8M1Imm - Return a target constant decremented by 1.
-inline SDValue XformSToSM1Imm(signed Imm) {
- return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
-}
+ // XformU8ToU8M1Imm - Return a target constant decremented by 1.
+ inline SDValue XformUToUM1Imm(unsigned Imm, SDLoc DL) {
+ assert((Imm >= 1) && "Cannot decrement unsigned int less than 1");
+ return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32);
+ }
-// XformU8ToU8M1Imm - Return a target constant decremented by 1.
-inline SDValue XformUToUM1Imm(unsigned Imm) {
- assert((Imm >= 1) && "Cannot decrement unsigned int less than 1");
- return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
-}
+ // XformSToSM2Imm - Return a target constant decremented by 2.
+ inline SDValue XformSToSM2Imm(unsigned Imm, SDLoc DL) {
+ return CurDAG->getTargetConstant(Imm - 2, DL, MVT::i32);
+ }
+
+ // XformSToSM3Imm - Return a target constant decremented by 3.
+ inline SDValue XformSToSM3Imm(unsigned Imm, SDLoc DL) {
+ return CurDAG->getTargetConstant(Imm - 3, DL, MVT::i32);
+ }
-// Include the pieces autogenerated from the target description.
-#include "HexagonGenDAGISel.inc"
-};
+ // Include the pieces autogenerated from the target description.
+ #include "HexagonGenDAGISel.inc"
+
+private:
+ bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src);
+}; // end HexagonDAGToDAGISel
} // end anonymous namespace
/// createHexagonISelDag - This pass converts a legalized DAG into a
/// Hexagon-specific DAG, ready for instruction scheduling.
///
-FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+namespace llvm {
+FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
return new HexagonDAGToDAGISel(TM, OptLevel);
}
+}
static void initializePassOnce(PassRegistry &Registry) {
const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection";
@@ -196,76 +201,6 @@ void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) {
}
-static bool IsS11_0_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<11>(v);
-}
-
-
-static bool IsS11_1_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<11,1>(v);
-}
-
-
-static bool IsS11_2_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<11,2>(v);
-}
-
-
-static bool IsS11_3_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<11,3>(v);
-}
-
-
-static bool IsU6_0_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<6>(v);
-}
-
-
-static bool IsU6_1_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<6,1>(v);
-}
-
-
-static bool IsU6_2_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<6,2>(v);
-}
-
-
// Intrinsics that return a a predicate.
static unsigned doesIntrinsicReturnPredicate(unsigned ID)
{
@@ -312,268 +247,119 @@ static unsigned doesIntrinsicReturnPredicate(unsigned ID)
}
}
-
-// Intrinsics that have predicate operands.
-static unsigned doesIntrinsicContainPredicate(unsigned ID)
-{
- switch (ID) {
- default:
- return 0;
- case Intrinsic::hexagon_C2_tfrpr:
- return Hexagon::C2_tfrpr;
- case Intrinsic::hexagon_C2_and:
- return Hexagon::C2_and;
- case Intrinsic::hexagon_C2_xor:
- return Hexagon::C2_xor;
- case Intrinsic::hexagon_C2_or:
- return Hexagon::C2_or;
- case Intrinsic::hexagon_C2_not:
- return Hexagon::C2_not;
- case Intrinsic::hexagon_C2_any8:
- return Hexagon::C2_any8;
- case Intrinsic::hexagon_C2_all8:
- return Hexagon::C2_all8;
- case Intrinsic::hexagon_C2_vitpack:
- return Hexagon::C2_vitpack;
- case Intrinsic::hexagon_C2_mask:
- return Hexagon::C2_mask;
- case Intrinsic::hexagon_C2_mux:
- return Hexagon::C2_mux;
-
- // Mapping hexagon_C2_muxir to MUX_pri. This is pretty weird - but
- // that's how it's mapped in q6protos.h.
- case Intrinsic::hexagon_C2_muxir:
- return Hexagon::C2_muxri;
-
- // Mapping hexagon_C2_muxri to MUX_pir. This is pretty weird - but
- // that's how it's mapped in q6protos.h.
- case Intrinsic::hexagon_C2_muxri:
- return Hexagon::C2_muxir;
-
- case Intrinsic::hexagon_C2_muxii:
- return Hexagon::C2_muxii;
- case Intrinsic::hexagon_C2_vmux:
- return Hexagon::VMUX_prr64;
- case Intrinsic::hexagon_S2_valignrb:
- return Hexagon::VALIGN_rrp;
- case Intrinsic::hexagon_S2_vsplicerb:
- return Hexagon::VSPLICE_rrp;
- }
-}
-
-
-static bool OffsetFitsS11(EVT MemType, int64_t Offset) {
- if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) {
- return true;
- }
- if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) {
- return true;
- }
- if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) {
- return true;
- }
- if (MemType == MVT::i8 && isInt<11>(Offset)) {
- return true;
- }
- return false;
-}
-
-
-//
-// Try to lower loads of GlobalAdresses into base+offset loads. Custom
-// lowering for GlobalAddress nodes has already turned it into a
-// CONST32.
-//
-SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl) {
- SDValue Chain = LD->getChain();
- SDNode* Const32 = LD->getBasePtr().getNode();
- unsigned Opcode = 0;
-
- if (Const32->getOpcode() == HexagonISD::CONST32 &&
- ISD::isNormalLoad(LD)) {
- SDValue Base = Const32->getOperand(0);
- EVT LoadedVT = LD->getMemoryVT();
- int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
- if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) {
- MVT PointerTy = getTargetLowering()->getPointerTy();
- const GlobalValue* GV =
- cast<GlobalAddressSDNode>(Base)->getGlobal();
- SDValue TargAddr =
- CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
- SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
- dl, PointerTy,
- TargAddr);
- // Figure out base + offset opcode
- if (LoadedVT == MVT::i64) Opcode = Hexagon::L2_loadrd_io;
- else if (LoadedVT == MVT::i32) Opcode = Hexagon::L2_loadri_io;
- else if (LoadedVT == MVT::i16) Opcode = Hexagon::L2_loadrh_io;
- else if (LoadedVT == MVT::i8) Opcode = Hexagon::L2_loadrb_io;
- else llvm_unreachable("unknown memory type");
-
- // Build indexed load.
- SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy);
- SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
- LD->getValueType(0),
- MVT::Other,
- SDValue(NewBase,0),
- TargetConstOff,
- Chain);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
- ReplaceUses(LD, Result);
- return Result;
- }
- }
-
- return SelectCode(LD);
-}
-
-
SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
unsigned Opcode,
- SDLoc dl)
-{
+ SDLoc dl) {
SDValue Chain = LD->getChain();
EVT LoadedVT = LD->getMemoryVT();
SDValue Base = LD->getBasePtr();
SDValue Offset = LD->getOffset();
SDNode *OffsetNode = Offset.getNode();
int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
- SDValue N1 = LD->getOperand(1);
- SDValue CPTmpN1_0;
- SDValue CPTmpN1_1;
-
- if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
- N1.getNode()->getValueType(0) == MVT::i32) {
- const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
- if (TII->isValidAutoIncImm(LoadedVT, Val)) {
- SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32);
- SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
- MVT::Other, Base, TargetConst,
- Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
- SDValue(Result_1, 0));
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
- cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
- const SDValue Froms[] = { SDValue(LD, 0),
- SDValue(LD, 1),
- SDValue(LD, 2)
- };
- const SDValue Tos[] = { SDValue(Result_2, 0),
- SDValue(Result_1, 1),
- SDValue(Result_1, 2)
- };
- ReplaceUses(Froms, Tos, 3);
- return Result_2;
- }
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
- SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
- MVT::Other, Base, TargetConst0,
+
+ const HexagonInstrInfo &TII = *HST->getInstrInfo();
+ if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConst = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
+ MVT::Other, Base, TargetConst,
Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl,
- MVT::i64, SDValue(Result_1, 0));
- SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl,
- MVT::i32, Base, TargetConstVal,
- SDValue(Result_1, 1));
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
+ SDValue(Result_1, 0));
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = LD->getMemOperand();
cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
const SDValue Froms[] = { SDValue(LD, 0),
SDValue(LD, 1),
- SDValue(LD, 2)
- };
+ SDValue(LD, 2) };
const SDValue Tos[] = { SDValue(Result_2, 0),
- SDValue(Result_3, 0),
- SDValue(Result_1, 1)
- };
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2) };
ReplaceUses(Froms, Tos, 3);
return Result_2;
}
- return SelectCode(LD);
+
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other,
+ Base, TargetConst0, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
+ SDValue(Result_1, 0));
+ SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2) };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_3, 0),
+ SDValue(Result_1, 1) };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
}
SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
unsigned Opcode,
- SDLoc dl)
-{
+ SDLoc dl) {
SDValue Chain = LD->getChain();
EVT LoadedVT = LD->getMemoryVT();
SDValue Base = LD->getBasePtr();
SDValue Offset = LD->getOffset();
SDNode *OffsetNode = Offset.getNode();
int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
- SDValue N1 = LD->getOperand(1);
- SDValue CPTmpN1_0;
- SDValue CPTmpN1_1;
-
- if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
- N1.getNode()->getValueType(0) == MVT::i32) {
- const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
- if (TII->isValidAutoIncImm(LoadedVT, Val)) {
- SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
- MVT::i32, MVT::Other, Base,
- TargetConstVal, Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
- TargetConst0);
- SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
- MVT::i64, MVT::Other,
- SDValue(Result_2,0),
- SDValue(Result_1,0));
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
- cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
- const SDValue Froms[] = { SDValue(LD, 0),
- SDValue(LD, 1),
- SDValue(LD, 2)
- };
- const SDValue Tos[] = { SDValue(Result_3, 0),
- SDValue(Result_1, 1),
- SDValue(Result_1, 2)
- };
- ReplaceUses(Froms, Tos, 3);
- return Result_3;
- }
- // Generate an indirect load.
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ const HexagonInstrInfo &TII = *HST->getInstrInfo();
+ if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
- MVT::Other,
- Base, TargetConst0, Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
- TargetConst0);
- SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
+ MVT::i32, MVT::Other, Base,
+ TargetConstVal, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl,
MVT::i64, MVT::Other,
- SDValue(Result_2,0),
+ TargetConst0,
SDValue(Result_1,0));
- // Add offset to base.
- SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
- Base, TargetConstVal,
- SDValue(Result_1, 1));
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = LD->getMemOperand();
cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
const SDValue Froms[] = { SDValue(LD, 0),
SDValue(LD, 1),
- SDValue(LD, 2)
- };
- const SDValue Tos[] = { SDValue(Result_3, 0), // Load value.
- SDValue(Result_4, 0), // New address.
- SDValue(Result_1, 1)
- };
+ SDValue(LD, 2) };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2) };
ReplaceUses(Froms, Tos, 3);
- return Result_3;
+ return Result_2;
}
- return SelectCode(LD);
+ // Generate an indirect load.
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other, Base, TargetConst0,
+ Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl,
+ MVT::i64, MVT::Other,
+ TargetConst0,
+ SDValue(Result_1,0));
+ // Add offset to base.
+ SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2) };
+ const SDValue Tos[] = { SDValue(Result_2, 0), // Load value.
+ SDValue(Result_3, 0), // New address.
+ SDValue(Result_1, 1) };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
}
@@ -587,47 +373,45 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
EVT LoadedVT = LD->getMemoryVT();
unsigned Opcode = 0;
- // Check for zero ext loads.
- bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD);
+ // Check for zero extended loads. Treat any-extend loads as zero extended
+ // loads.
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
// Figure out the opcode.
- const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
+ const HexagonInstrInfo &TII = *HST->getInstrInfo();
if (LoadedVT == MVT::i64) {
- if (TII->isValidAutoIncImm(LoadedVT, Val))
+ if (TII.isValidAutoIncImm(LoadedVT, Val))
Opcode = Hexagon::L2_loadrd_pi;
else
Opcode = Hexagon::L2_loadrd_io;
} else if (LoadedVT == MVT::i32) {
- if (TII->isValidAutoIncImm(LoadedVT, Val))
+ if (TII.isValidAutoIncImm(LoadedVT, Val))
Opcode = Hexagon::L2_loadri_pi;
else
Opcode = Hexagon::L2_loadri_io;
} else if (LoadedVT == MVT::i16) {
- if (TII->isValidAutoIncImm(LoadedVT, Val))
- Opcode = zextval ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi;
+ if (TII.isValidAutoIncImm(LoadedVT, Val))
+ Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi;
else
- Opcode = zextval ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io;
+ Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io;
} else if (LoadedVT == MVT::i8) {
- if (TII->isValidAutoIncImm(LoadedVT, Val))
- Opcode = zextval ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi;
+ if (TII.isValidAutoIncImm(LoadedVT, Val))
+ Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi;
else
- Opcode = zextval ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io;
+ Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io;
} else
llvm_unreachable("unknown memory type");
- // For zero ext i64 loads, we need to add combine instructions.
- if (LD->getValueType(0) == MVT::i64 &&
- LD->getExtensionType() == ISD::ZEXTLOAD) {
+ // For zero extended i64 loads, we need to add combine instructions.
+ if (LD->getValueType(0) == MVT::i64 && IsZeroExt)
return SelectIndexedLoadZeroExtend64(LD, Opcode, dl);
- }
- if (LD->getValueType(0) == MVT::i64 &&
- LD->getExtensionType() == ISD::SEXTLOAD) {
- // Handle sign ext i64 loads.
+ // Handle sign extended i64 loads.
+ if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD)
return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
- }
- if (TII->isValidAutoIncImm(LoadedVT, Val)) {
- SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+
+ if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
LD->getValueType(0),
MVT::i32, MVT::Other, Base,
@@ -646,13 +430,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
ReplaceUses(Froms, Tos, 3);
return Result;
} else {
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl,
LD->getValueType(0),
MVT::Other, Base, TargetConst0,
Chain);
- SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+ SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
Base, TargetConstVal,
SDValue(Result_1, 1));
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
@@ -682,7 +466,7 @@ SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
if (AM != ISD::UNINDEXED) {
result = SelectIndexedLoad(LD, dl);
} else {
- result = SelectBaseOffsetLoad(LD, dl);
+ result = SelectCode(LD);
}
return result;
@@ -698,14 +482,12 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
// Get the constant value.
int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
EVT StoredVT = ST->getMemoryVT();
+ EVT ValueVT = Value.getValueType();
// Offset value must be within representable range
// and must have correct alignment properties.
- const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
- if (TII->isValidAutoIncImm(StoredVT, Val)) {
- SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value,
- Chain};
+ const HexagonInstrInfo &TII = *HST->getInstrInfo();
+ if (TII.isValidAutoIncImm(StoredVT, Val)) {
unsigned Opcode = 0;
// Figure out the post inc version of opcode.
@@ -715,6 +497,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi;
else llvm_unreachable("unknown memory type");
+ if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
+ assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
+ Value = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg,
+ dl, MVT::i32, Value);
+ }
+ SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, dl, MVT::i32), Value,
+ Chain};
// Build post increment store.
SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
MVT::Other, Ops);
@@ -728,9 +517,10 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
}
// Note: Order of operands matches the def of instruction:
- // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ...
+ // def S2_storerd_io
+ // : STInst<(outs), (ins IntRegs:$base, imm:$offset, DoubleRegs:$src1), ...
// and it differs for POST_ST* for instance.
- SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value,
+ SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, dl, MVT::i32), Value,
Chain};
unsigned Opcode = 0;
@@ -742,10 +532,10 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
else llvm_unreachable("unknown memory type");
// Build regular store.
- SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
// Build splitted incriment instruction.
- SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+ SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
Base,
TargetConstVal,
SDValue(Result_1, 0));
@@ -758,61 +548,6 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
return Result_2;
}
-
-SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
- SDLoc dl) {
- SDValue Chain = ST->getChain();
- SDNode* Const32 = ST->getBasePtr().getNode();
- SDValue Value = ST->getValue();
- unsigned Opcode = 0;
-
- // Try to lower stores of GlobalAdresses into indexed stores. Custom
- // lowering for GlobalAddress nodes has already turned it into a
- // CONST32. Avoid truncating stores for the moment. Post-inc stores
- // do the same. Don't think there's a reason for it, so will file a
- // bug to fix.
- if ((Const32->getOpcode() == HexagonISD::CONST32) &&
- !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) {
- SDValue Base = Const32->getOperand(0);
- if (Base.getOpcode() == ISD::TargetGlobalAddress) {
- EVT StoredVT = ST->getMemoryVT();
- int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
- if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) {
- MVT PointerTy = getTargetLowering()->getPointerTy();
- const GlobalValue* GV =
- cast<GlobalAddressSDNode>(Base)->getGlobal();
- SDValue TargAddr =
- CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
- SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
- dl, PointerTy,
- TargAddr);
-
- // Figure out base + offset opcode
- if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io;
- else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io;
- else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io;
- else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io;
- else llvm_unreachable("unknown memory type");
-
- SDValue Ops[] = {SDValue(NewBase,0),
- CurDAG->getTargetConstant(Offset,PointerTy),
- Value, Chain};
- // build indexed store
- SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
- MVT::Other, Ops);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = ST->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
- ReplaceUses(ST, Result);
- return Result;
- }
- }
- }
-
- return SelectCode(ST);
-}
-
-
SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
SDLoc dl(N);
StoreSDNode *ST = cast<StoreSDNode>(N);
@@ -823,7 +558,7 @@ SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
return SelectIndexedStore(ST, dl);
}
- return SelectBaseOffsetStore(ST, dl);
+ return SelectCode(ST);
}
SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
@@ -864,7 +599,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
}
SDValue Chain = LD->getChain();
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
MVT::Other,
LD->getBasePtr(), TargetConst0,
@@ -890,7 +625,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
}
SDValue Chain = LD->getChain();
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
MVT::Other,
LD->getBasePtr(), TargetConst0,
@@ -909,187 +644,6 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
return SelectCode(N);
}
-
-SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) {
- SDLoc dl(N);
- SDValue N0 = N->getOperand(0);
- if (N0.getOpcode() == ISD::SETCC) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) {
- SDValue N000 = N00.getOperand(0);
- SDValue N001 = N00.getOperand(1);
- if (cast<VTSDNode>(N001)->getVT() == MVT::i16) {
- SDValue N01 = N0.getOperand(1);
- SDValue N02 = N0.getOperand(2);
-
- // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
- // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1,
- // IntRegs:i32:$src2)
- // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
- // Pattern complexity = 9 cost = 1 size = 0.
- if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) {
- SDValue N1 = N->getOperand(1);
- if (N01 == N1) {
- SDValue N2 = N->getOperand(2);
- if (N000 == N2 &&
- N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
- N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
- SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl,
- MVT::i32, N000);
- SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_max, dl,
- MVT::i32,
- SDValue(SextNode, 0),
- N1);
- ReplaceUses(N, Result);
- return Result;
- }
- }
- }
-
- // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
- // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1,
- // IntRegs:i32:$src2)
- // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
- // Pattern complexity = 9 cost = 1 size = 0.
- if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) {
- SDValue N1 = N->getOperand(1);
- if (N01 == N1) {
- SDValue N2 = N->getOperand(2);
- if (N000 == N2 &&
- N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
- N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
- SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl,
- MVT::i32, N000);
- SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_min, dl,
- MVT::i32,
- SDValue(SextNode, 0),
- N1);
- ReplaceUses(N, Result);
- return Result;
- }
- }
- }
- }
- }
- }
-
- return SelectCode(N);
-}
-
-
-SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) {
- SDLoc dl(N);
- SDValue Shift = N->getOperand(0);
-
- //
- // %conv.i = sext i32 %tmp1 to i64
- // %conv2.i = sext i32 %add to i64
- // %mul.i = mul nsw i64 %conv2.i, %conv.i
- // %shr5.i = lshr i64 %mul.i, 32
- // %conv3.i = trunc i64 %shr5.i to i32
- //
- // --- match with the following ---
- //
- // %conv3.i = mpy (%tmp1, %add)
- //
- // Trunc to i32.
- if (N->getValueType(0) == MVT::i32) {
- // Trunc from i64.
- if (Shift.getNode()->getValueType(0) == MVT::i64) {
- // Trunc child is logical shift right.
- if (Shift.getOpcode() != ISD::SRL) {
- return SelectCode(N);
- }
-
- SDValue ShiftOp0 = Shift.getOperand(0);
- SDValue ShiftOp1 = Shift.getOperand(1);
-
- // Shift by const 32
- if (ShiftOp1.getOpcode() != ISD::Constant) {
- return SelectCode(N);
- }
-
- int32_t ShiftConst =
- cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue();
- if (ShiftConst != 32) {
- return SelectCode(N);
- }
-
- // Shifting a i64 signed multiply
- SDValue Mul = ShiftOp0;
- if (Mul.getOpcode() != ISD::MUL) {
- return SelectCode(N);
- }
-
- SDValue MulOp0 = Mul.getOperand(0);
- SDValue MulOp1 = Mul.getOperand(1);
-
- SDValue OP0;
- SDValue OP1;
-
- // Handle sign_extend and sextload
- if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
- SDValue Sext0 = MulOp0.getOperand(0);
- if (Sext0.getNode()->getValueType(0) != MVT::i32) {
- return SelectCode(N);
- }
-
- OP0 = Sext0;
- } else if (MulOp0.getOpcode() == ISD::LOAD) {
- LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
- if (LD->getMemoryVT() != MVT::i32 ||
- LD->getExtensionType() != ISD::SEXTLOAD ||
- LD->getAddressingMode() != ISD::UNINDEXED) {
- return SelectCode(N);
- }
-
- SDValue Chain = LD->getChain();
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
- MVT::Other,
- LD->getBasePtr(),
- TargetConst0, Chain), 0);
- } else {
- return SelectCode(N);
- }
-
- // Same goes for the second operand.
- if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
- SDValue Sext1 = MulOp1.getOperand(0);
- if (Sext1.getNode()->getValueType(0) != MVT::i32)
- return SelectCode(N);
-
- OP1 = Sext1;
- } else if (MulOp1.getOpcode() == ISD::LOAD) {
- LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
- if (LD->getMemoryVT() != MVT::i32 ||
- LD->getExtensionType() != ISD::SEXTLOAD ||
- LD->getAddressingMode() != ISD::UNINDEXED) {
- return SelectCode(N);
- }
-
- SDValue Chain = LD->getChain();
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
- MVT::Other,
- LD->getBasePtr(),
- TargetConst0, Chain), 0);
- } else {
- return SelectCode(N);
- }
-
- // Generate a mpy instruction.
- SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpy_up, dl, MVT::i32,
- OP0, OP1);
- ReplaceUses(N, Result);
- return Result;
- }
- }
-
- return SelectCode(N);
-}
-
-
SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
SDLoc dl(N);
if (N->getValueType(0) == MVT::i32) {
@@ -1107,7 +661,7 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
int32_t MulConst =
cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue();
int32_t ValConst = MulConst << ShlConst;
- SDValue Val = CurDAG->getTargetConstant(ValConst,
+ SDValue Val = CurDAG->getTargetConstant(ValConst, dl,
MVT::i32);
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode()))
if (isInt<9>(CN->getSExtValue())) {
@@ -1135,7 +689,8 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
int32_t Shl2Const =
cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue();
int32_t ValConst = 1 << (ShlConst+Shl2Const);
- SDValue Val = CurDAG->getTargetConstant(-ValConst, MVT::i32);
+ SDValue Val = CurDAG->getTargetConstant(-ValConst, dl,
+ MVT::i32);
if (ConstantSDNode *CN =
dyn_cast<ConstantSDNode>(Val.getNode()))
if (isInt<9>(CN->getSExtValue())) {
@@ -1168,6 +723,37 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
//
SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
SDLoc dl(N);
+
+ SDValue Op0 = N->getOperand(0);
+ EVT OpVT = Op0.getValueType();
+ unsigned OpBW = OpVT.getSizeInBits();
+
+ // Special handling for zero-extending a vector of booleans.
+ if (OpVT.isVector() && OpVT.getVectorElementType() == MVT::i1 && OpBW <= 64) {
+ SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0);
+ unsigned NE = OpVT.getVectorNumElements();
+ EVT ExVT = N->getValueType(0);
+ unsigned ES = ExVT.getVectorElementType().getSizeInBits();
+ uint64_t MV = 0, Bit = 1;
+ for (unsigned i = 0; i < NE; ++i) {
+ MV |= Bit;
+ Bit <<= ES;
+ }
+ SDValue Ones = CurDAG->getTargetConstant(MV, dl, MVT::i64);
+ SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64_Int_Real, dl,
+ MVT::i64, Ones);
+ if (ExVT.getSizeInBits() == 32) {
+ SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64,
+ SDValue(Mask,0), SDValue(OnesReg,0));
+ SDValue SubR = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl,
+ MVT::i32);
+ return CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT,
+ SDValue(And,0), SubR);
+ }
+ return CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT,
+ SDValue(Mask,0), SDValue(OnesReg,0));
+ }
+
SDNode *IsIntrinsic = N->getOperand(0).getNode();
if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
unsigned ID =
@@ -1175,8 +761,8 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
if (doesIntrinsicReturnPredicate(ID)) {
// Now we need to differentiate target data types.
if (N->getValueType(0) == MVT::i64) {
- // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs).
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs).
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
MVT::i32,
SDValue(IsIntrinsic, 0));
@@ -1204,56 +790,227 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
return SelectCode(N);
}
+//
+// Checking for intrinsics circular load/store, and bitreverse load/store
+// instrisics in order to select the correct lowered operation.
+//
+SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ if (IntNo == Intrinsic::hexagon_circ_ldd ||
+ IntNo == Intrinsic::hexagon_circ_ldw ||
+ IntNo == Intrinsic::hexagon_circ_lduh ||
+ IntNo == Intrinsic::hexagon_circ_ldh ||
+ IntNo == Intrinsic::hexagon_circ_ldub ||
+ IntNo == Intrinsic::hexagon_circ_ldb) {
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue Base = N->getOperand(2);
+ SDValue Load = N->getOperand(3);
+ SDValue ModifierExpr = N->getOperand(4);
+ SDValue Offset = N->getOperand(5);
+
+ // We need to add the rerurn type for the load. This intrinsic has
+ // two return types, one for the load and one for the post-increment.
+ // Only the *_ld instructions push the extra return type, and bump the
+ // result node operand number correspondingly.
+ std::vector<EVT> ResTys;
+ unsigned opc;
+ unsigned memsize, align;
+ MVT MvtSize = MVT::i32;
+
+ if (IntNo == Intrinsic::hexagon_circ_ldd) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i64);
+ opc = Hexagon::L2_loadrd_pci_pseudo;
+ memsize = 8;
+ align = 8;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldw) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadri_pci_pseudo;
+ memsize = 4;
+ align = 4;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrh_pci_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_circ_lduh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadruh_pci_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldb) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrb_pci_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldub) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrub_pci_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else
+ llvm_unreachable("no opc");
+
+ ResTys.push_back(MVT::Other);
+
+ // Copy over the arguments, which are the same mostly.
+ SmallVector<SDValue, 5> Ops;
+ Ops.push_back(Base);
+ Ops.push_back(Load);
+ Ops.push_back(ModifierExpr);
+ int32_t Val = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
+ Ops.push_back(CurDAG->getTargetConstant(Val, dl, MVT::i32));
+ Ops.push_back(Chain);
+ SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
+
+ SDValue ST;
+ MachineMemOperand *Mem =
+ MF->getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore, memsize, align);
+ if (MvtSize != MVT::i32)
+ ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
+ MvtSize, Mem);
+ else
+ ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
+
+ SDNode* Store = SelectStore(ST.getNode());
+
+ const SDValue Froms[] = { SDValue(N, 0),
+ SDValue(N, 1) };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Store, 0) };
+ ReplaceUses(Froms, Tos, 2);
+ return Result;
+ }
+
+ if (IntNo == Intrinsic::hexagon_brev_ldd ||
+ IntNo == Intrinsic::hexagon_brev_ldw ||
+ IntNo == Intrinsic::hexagon_brev_ldh ||
+ IntNo == Intrinsic::hexagon_brev_lduh ||
+ IntNo == Intrinsic::hexagon_brev_ldb ||
+ IntNo == Intrinsic::hexagon_brev_ldub) {
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue Base = N->getOperand(2);
+ SDValue Load = N->getOperand(3);
+ SDValue ModifierExpr = N->getOperand(4);
+
+ // We need to add the rerurn type for the load. This intrinsic has
+ // two return types, one for the load and one for the post-increment.
+ std::vector<EVT> ResTys;
+ unsigned opc;
+ unsigned memsize, align;
+ MVT MvtSize = MVT::i32;
+
+ if (IntNo == Intrinsic::hexagon_brev_ldd) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i64);
+ opc = Hexagon::L2_loadrd_pbr_pseudo;
+ memsize = 8;
+ align = 8;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldw) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadri_pbr_pseudo;
+ memsize = 4;
+ align = 4;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrh_pbr_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_brev_lduh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadruh_pbr_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldb) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrb_pbr_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldub) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrub_pbr_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else
+ llvm_unreachable("no opc");
+
+ ResTys.push_back(MVT::Other);
+
+ // Copy over the arguments, which are the same mostly.
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(Base);
+ Ops.push_back(Load);
+ Ops.push_back(ModifierExpr);
+ Ops.push_back(Chain);
+ SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
+ SDValue ST;
+ MachineMemOperand *Mem =
+ MF->getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore, memsize, align);
+ if (MvtSize != MVT::i32)
+ ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
+ MvtSize, Mem);
+ else
+ ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
+
+ SDNode* Store = SelectStore(ST.getNode());
+
+ const SDValue Froms[] = { SDValue(N, 0),
+ SDValue(N, 1) };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Store, 0) };
+ ReplaceUses(Froms, Tos, 2);
+ return Result;
+ }
+
+ return SelectCode(N);
+}
//
// Checking for intrinsics which have predicate registers as operand(s)
// and lowering to the actual intrinsic.
//
SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
- SDLoc dl(N);
- unsigned ID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- unsigned IntrinsicWithPred = doesIntrinsicContainPredicate(ID);
-
- // We are concerned with only those intrinsics that have predicate registers
- // as at least one of the operands.
- if (IntrinsicWithPred) {
- SmallVector<SDValue, 8> Ops;
- const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
- const MCInstrDesc &MCID = TII->get(IntrinsicWithPred);
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
-
- // Iterate over all the operands of the intrinsics.
- // For PredRegs, do the transfer.
- // For Double/Int Regs, just preserve the value
- // For immediates, lower it.
- for (unsigned i = 1; i < N->getNumOperands(); ++i) {
- SDNode *Arg = N->getOperand(i).getNode();
- const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI, *MF);
-
- if (RC == &Hexagon::IntRegsRegClass ||
- RC == &Hexagon::DoubleRegsRegClass) {
- Ops.push_back(SDValue(Arg, 0));
- } else if (RC == &Hexagon::PredRegsRegClass) {
- // Do the transfer.
- SDNode *PdRs = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1,
- SDValue(Arg, 0));
- Ops.push_back(SDValue(PdRs,0));
- } else if (!RC && (dyn_cast<ConstantSDNode>(Arg) != nullptr)) {
- // This is immediate operand. Lower it here making sure that we DO have
- // const SDNode for immediate value.
- int32_t Val = cast<ConstantSDNode>(Arg)->getSExtValue();
- SDValue SDVal = CurDAG->getTargetConstant(Val, MVT::i32);
- Ops.push_back(SDVal);
- } else {
- llvm_unreachable("Unimplemented");
- }
- }
- EVT ReturnValueVT = N->getValueType(0);
- SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl,
- ReturnValueVT, Ops);
- ReplaceUses(N, Result);
- return Result;
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned Bits;
+ switch (IID) {
+ case Intrinsic::hexagon_S2_vsplatrb:
+ Bits = 8;
+ break;
+ case Intrinsic::hexagon_S2_vsplatrh:
+ Bits = 16;
+ break;
+ default:
+ return SelectCode(N);
+ }
+
+ SDValue const &V = N->getOperand(1);
+ SDValue U;
+ if (isValueExtension(V, Bits, U)) {
+ SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ N->getOperand(0), U);
+ return SelectCode(R.getNode());
}
return SelectCode(N);
}
@@ -1267,47 +1024,30 @@ SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
APFloat APF = CN->getValueAPF();
if (N->getValueType(0) == MVT::f32) {
return CurDAG->getMachineNode(Hexagon::TFRI_f, dl, MVT::f32,
- CurDAG->getTargetConstantFP(APF.convertToFloat(), MVT::f32));
+ CurDAG->getTargetConstantFP(APF.convertToFloat(), dl, MVT::f32));
}
else if (N->getValueType(0) == MVT::f64) {
return CurDAG->getMachineNode(Hexagon::CONST64_Float_Real, dl, MVT::f64,
- CurDAG->getTargetConstantFP(APF.convertToDouble(), MVT::f64));
+ CurDAG->getTargetConstantFP(APF.convertToDouble(), dl, MVT::f64));
}
return SelectCode(N);
}
-
//
// Map predicate true (encoded as -1 in LLVM) to a XOR.
//
SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
SDLoc dl(N);
if (N->getValueType(0) == MVT::i1) {
- SDNode* Result;
+ SDNode* Result = 0;
int32_t Val = cast<ConstantSDNode>(N)->getSExtValue();
if (Val == -1) {
- // Create the IntReg = 1 node.
- SDNode* IntRegTFR =
- CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
- CurDAG->getTargetConstant(0, MVT::i32));
-
- // Pd = IntReg
- SDNode* Pd = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1,
- SDValue(IntRegTFR, 0));
-
- // not(Pd)
- SDNode* NotPd = CurDAG->getMachineNode(Hexagon::C2_not, dl, MVT::i1,
- SDValue(Pd, 0));
-
- // xor(not(Pd))
- Result = CurDAG->getMachineNode(Hexagon::C2_xor, dl, MVT::i1,
- SDValue(Pd, 0), SDValue(NotPd, 0));
-
- // We have just built:
- // Rs = Pd
- // Pd = xor(not(Pd), Pd)
-
+ Result = CurDAG->getMachineNode(Hexagon::TFR_PdTrue, dl, MVT::i1);
+ } else if (Val == 0) {
+ Result = CurDAG->getMachineNode(Hexagon::TFR_PdFalse, dl, MVT::i1);
+ }
+ if (Result) {
ReplaceUses(N, Result);
return Result;
}
@@ -1343,6 +1083,175 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
return Result;
}
+//
+// Map the following, where possible.
+// AND/FABS -> clrbit
+// OR -> setbit
+// XOR/FNEG ->toggle_bit.
+//
+SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
+ SDLoc dl(N);
+ EVT ValueVT = N->getValueType(0);
+
+ // We handle only 32 and 64-bit bit ops.
+ if (!(ValueVT == MVT::i32 || ValueVT == MVT::i64 ||
+ ValueVT == MVT::f32 || ValueVT == MVT::f64))
+ return SelectCode(N);
+
+ // We handly only fabs and fneg for V5.
+ unsigned Opc = N->getOpcode();
+ if ((Opc == ISD::FABS || Opc == ISD::FNEG) && !HST->hasV5TOps())
+ return SelectCode(N);
+
+ int64_t Val = 0;
+ if (Opc != ISD::FABS && Opc != ISD::FNEG) {
+ if (N->getOperand(1).getOpcode() == ISD::Constant)
+ Val = cast<ConstantSDNode>((N)->getOperand(1))->getSExtValue();
+ else
+ return SelectCode(N);
+ }
+
+ if (Opc == ISD::AND) {
+ if (((ValueVT == MVT::i32) &&
+ (!((Val & 0x80000000) || (Val & 0x7fffffff)))) ||
+ ((ValueVT == MVT::i64) &&
+ (!((Val & 0x8000000000000000) || (Val & 0x7fffffff)))))
+ // If it's simple AND, do the normal op.
+ return SelectCode(N);
+ else
+ Val = ~Val;
+ }
+
+ // If OR or AND is being fed by shl, srl and, sra don't do this change,
+ // because Hexagon provide |= &= on shl, srl, and sra.
+ // Traverse the DAG to see if there is shl, srl and sra.
+ if (Opc == ISD::OR || Opc == ISD::AND) {
+ switch (N->getOperand(0)->getOpcode()) {
+ default: break;
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL:
+ return SelectCode(N);
+ }
+ }
+
+ // Make sure it's power of 2.
+ unsigned bitpos = 0;
+ if (Opc != ISD::FABS && Opc != ISD::FNEG) {
+ if (((ValueVT == MVT::i32) && !isPowerOf2_32(Val)) ||
+ ((ValueVT == MVT::i64) && !isPowerOf2_64(Val)))
+ return SelectCode(N);
+
+ // Get the bit position.
+ bitpos = countTrailingZeros(uint64_t(Val));
+ } else {
+ // For fabs and fneg, it's always the 31st bit.
+ bitpos = 31;
+ }
+
+ unsigned BitOpc = 0;
+ // Set the right opcode for bitwise operations.
+ switch(Opc) {
+ default: llvm_unreachable("Only bit-wise/abs/neg operations are allowed.");
+ case ISD::AND:
+ case ISD::FABS:
+ BitOpc = Hexagon::S2_clrbit_i;
+ break;
+ case ISD::OR:
+ BitOpc = Hexagon::S2_setbit_i;
+ break;
+ case ISD::XOR:
+ case ISD::FNEG:
+ BitOpc = Hexagon::S2_togglebit_i;
+ break;
+ }
+
+ SDNode *Result;
+ // Get the right SDVal for the opcode.
+ SDValue SDVal = CurDAG->getTargetConstant(bitpos, dl, MVT::i32);
+
+ if (ValueVT == MVT::i32 || ValueVT == MVT::f32) {
+ Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT,
+ N->getOperand(0), SDVal);
+ } else {
+ // 64-bit gymnastic to use REG_SEQUENCE. But it's worth it.
+ EVT SubValueVT;
+ if (ValueVT == MVT::i64)
+ SubValueVT = MVT::i32;
+ else
+ SubValueVT = MVT::f32;
+
+ SDNode *Reg = N->getOperand(0).getNode();
+ SDValue RegClass = CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID,
+ dl, MVT::i64);
+
+ SDValue SubregHiIdx = CurDAG->getTargetConstant(Hexagon::subreg_hireg, dl,
+ MVT::i32);
+ SDValue SubregLoIdx = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl,
+ MVT::i32);
+
+ SDValue SubregHI = CurDAG->getTargetExtractSubreg(Hexagon::subreg_hireg, dl,
+ MVT::i32, SDValue(Reg, 0));
+
+ SDValue SubregLO = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, dl,
+ MVT::i32, SDValue(Reg, 0));
+
+ // Clear/set/toggle hi or lo registers depending on the bit position.
+ if (SubValueVT != MVT::f32 && bitpos < 32) {
+ SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
+ SubregLO, SDVal);
+ const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx,
+ SDValue(Result0, 0), SubregLoIdx };
+ Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+ dl, ValueVT, Ops);
+ } else {
+ if (Opc != ISD::FABS && Opc != ISD::FNEG)
+ SDVal = CurDAG->getTargetConstant(bitpos - 32, dl, MVT::i32);
+ SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
+ SubregHI, SDVal);
+ const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx,
+ SubregLO, SubregLoIdx };
+ Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+ dl, ValueVT, Ops);
+ }
+ }
+
+ ReplaceUses(N, Result);
+ return Result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ const HexagonFrameLowering *HFI = HST->getFrameLowering();
+ int FX = cast<FrameIndexSDNode>(N)->getIndex();
+ unsigned StkA = HFI->getStackAlignment();
+ unsigned MaxA = MFI->getMaxAlignment();
+ SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32);
+ SDLoc DL(N);
+ SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ SDNode *R = 0;
+
+ // Use TFR_FI when:
+ // - the object is fixed, or
+ // - there are no objects with higher-than-default alignment, or
+ // - there are no dynamically allocated objects.
+ // Otherwise, use TFR_FIA.
+ if (FX < 0 || MaxA <= StkA || !MFI->hasVarSizedObjects()) {
+ R = CurDAG->getMachineNode(Hexagon::TFR_FI, DL, MVT::i32, FI, Zero);
+ } else {
+ auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
+ unsigned AR = HMFI.getStackAlignBaseVReg();
+ SDValue CH = CurDAG->getEntryNode();
+ SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
+ R = CurDAG->getMachineNode(Hexagon::TFR_FIA, DL, MVT::i32, Ops);
+ }
+
+ if (N->getHasDebugValue())
+ CurDAG->TransferDbgValues(SDValue(N, 0), SDValue(R, 0));
+ return R;
+}
+
SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
if (N->isMachineOpcode()) {
@@ -1350,7 +1259,6 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
return nullptr; // Already selected.
}
-
switch (N->getOpcode()) {
case ISD::Constant:
return SelectConstant(N);
@@ -1358,6 +1266,9 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
case ISD::ConstantFP:
return SelectConstantFP(N);
+ case ISD::FrameIndex:
+ return SelectFrameIndex(N);
+
case ISD::ADD:
return SelectAdd(N);
@@ -1370,18 +1281,22 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
case ISD::STORE:
return SelectStore(N);
- case ISD::SELECT:
- return SelectSelect(N);
-
- case ISD::TRUNCATE:
- return SelectTruncate(N);
-
case ISD::MUL:
return SelectMul(N);
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::FABS:
+ case ISD::FNEG:
+ return SelectBitOp(N);
+
case ISD::ZERO_EXTEND:
return SelectZeroExtend(N);
+ case ISD::INTRINSIC_W_CHAIN:
+ return SelectIntrinsicWChain(N);
+
case ISD::INTRINSIC_WO_CHAIN:
return SelectIntrinsicWOChain(N);
}
@@ -1389,297 +1304,217 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
return SelectCode(N);
}
+bool HexagonDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) {
+ SDValue Inp = Op, Res;
-//
-// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way
-// to define these instructions.
-//
-bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ switch (ConstraintID) {
+ default:
return true;
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_o: // Offsetable.
+ case InlineAsm::Constraint_v: // Not offsetable.
+ case InlineAsm::Constraint_m: // Memory.
+ if (SelectAddrFI(Inp, Res))
+ OutOps.push_back(Res);
+ else
+ OutOps.push_back(Inp);
+ break;
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
-}
-
-
-bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_0_Offset(Offset.getNode()));
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_0_Offset(Offset.getNode()));
-}
-
-
-bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_1_Offset(Offset.getNode()));
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_1_Offset(Offset.getNode()));
-}
-
-
-bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_2_Offset(Offset.getNode()));
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_2_Offset(Offset.getNode()));
-}
-
-bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_0_Offset(Offset.getNode()));
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_0_Offset(Offset.getNode()));
+ OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
+ return false;
}
+void HexagonDAGToDAGISel::PreprocessISelDAG() {
+ SelectionDAG &DAG = *CurDAG;
+ std::vector<SDNode*> Nodes;
+ for (auto I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I)
+ Nodes.push_back(I);
+
+ // Simplify: (or (select c x 0) z) -> (select c (or x z) z)
+ // (or (select c 0 y) z) -> (select c z (or y z))
+ // This may not be the right thing for all targets, so do it here.
+ for (auto I: Nodes) {
+ if (I->getOpcode() != ISD::OR)
+ continue;
+
+ auto IsZero = [] (const SDValue &V) -> bool {
+ if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
+ return SC->isNullValue();
+ return false;
+ };
+ auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
+ if (Op.getOpcode() != ISD::SELECT)
+ return false;
+ return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2));
+ };
-bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_1_Offset(Offset.getNode()));
+ SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
+ EVT VT = I->getValueType(0);
+ bool SelN0 = IsSelect0(N0);
+ SDValue SOp = SelN0 ? N0 : N1;
+ SDValue VOp = SelN0 ? N1 : N0;
+
+ if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
+ SDValue SC = SOp.getOperand(0);
+ SDValue SX = SOp.getOperand(1);
+ SDValue SY = SOp.getOperand(2);
+ SDLoc DLS = SOp;
+ if (IsZero(SY)) {
+ SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
+ SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
+ DAG.ReplaceAllUsesWith(I, NewSel.getNode());
+ } else if (IsZero(SX)) {
+ SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
+ SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
+ DAG.ReplaceAllUsesWith(I, NewSel.getNode());
+ }
+ }
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_1_Offset(Offset.getNode()));
}
-
-bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_2_Offset(Offset.getNode()));
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_2_Offset(Offset.getNode()));
+void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
+ auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
+ auto &HFI = *HST.getFrameLowering();
+ if (!HFI.needsAligna(*MF))
+ return;
+
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ MachineBasicBlock *EntryBB = MF->begin();
+ unsigned AR = FuncInfo->CreateReg(MVT::i32);
+ unsigned MaxA = MFI->getMaxAlignment();
+ auto &HII = *HST.getInstrInfo();
+ BuildMI(EntryBB, DebugLoc(), HII.get(Hexagon::ALIGNA), AR)
+ .addImm(MaxA);
+ MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
}
-
-bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
-
- if (Addr.getOpcode() != ISD::ADD) {
- return(SelectADDRriS11_2(Addr, Base, Offset));
- }
-
- return SelectADDRriS11_2(Addr, Base, Offset);
+// Match a frame index that can be used in an addressing mode.
+bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) {
+ if (N.getOpcode() != ISD::FrameIndex)
+ return false;
+ auto &HFI = *HST->getFrameLowering();
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ int FX = cast<FrameIndexSDNode>(N)->getIndex();
+ if (!MFI->isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
+ return false;
+ R = CurDAG->getTargetFrameIndex(FX, MVT::i32);
+ return true;
}
-
-bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_3_Offset(Offset.getNode()));
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_3_Offset(Offset.getNode()));
+inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) {
+ return SelectGlobalAddress(N, R, false);
}
-bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1,
- SDValue &R2) {
- if (Addr.getOpcode() == ISD::FrameIndex) return false;
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (Addr.getOpcode() == ISD::ADD) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- if (isInt<13>(CN->getSExtValue()))
- return false; // Let the reg+imm pattern catch this!
- R1 = Addr.getOperand(0);
- R2 = Addr.getOperand(1);
- return true;
- }
-
- R1 = Addr;
-
- return true;
+inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) {
+ return SelectGlobalAddress(N, R, true);
}
-
-// Handle generic address case. It is accessed from inlined asm =m constraints,
-// which could have any kind of pointer.
-bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr,
- SDValue &Base, SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
+bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
+ bool UseGP) {
+ switch (N.getOpcode()) {
+ case ISD::ADD: {
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ unsigned GAOpc = N0.getOpcode();
+ if (UseGP && GAOpc != HexagonISD::CONST32_GP)
+ return false;
+ if (!UseGP && GAOpc != HexagonISD::CONST32)
+ return false;
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
+ SDValue Addr = N0.getOperand(0);
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
+ if (GA->getOpcode() == ISD::TargetGlobalAddress) {
+ uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
+ R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
+ N.getValueType(), NewOff);
+ return true;
+ }
+ }
+ }
+ break;
}
-
- if (Addr.getOpcode() == ISD::ADD) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
+ case HexagonISD::CONST32:
+ // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
+ // want in the instruction.
+ if (!UseGP)
+ R = N.getOperand(0);
+ return !UseGP;
+ case HexagonISD::CONST32_GP:
+ if (UseGP)
+ R = N.getOperand(0);
+ return UseGP;
+ default:
+ return false;
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
+ return false;
}
-
-bool HexagonDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
- std::vector<SDValue> &OutOps) {
- SDValue Op0, Op1;
-
- switch (ConstraintCode) {
- case 'o': // Offsetable.
- case 'v': // Not offsetable.
- default: return true;
- case 'm': // Memory.
- if (!SelectAddr(Op.getNode(), Op, Op0, Op1))
+bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val,
+ unsigned FromBits, SDValue &Src) {
+ unsigned Opc = Val.getOpcode();
+ switch (Opc) {
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: {
+ SDValue const &Op0 = Val.getOperand(0);
+ EVT T = Op0.getValueType();
+ if (T.isInteger() && T.getSizeInBits() == FromBits) {
+ Src = Op0;
return true;
+ }
break;
}
-
- OutOps.push_back(Op0);
- OutOps.push_back(Op1);
- return false;
-}
-
-bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const {
- unsigned UseCount = 0;
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
- UseCount++;
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ if (Val.getOperand(0).getValueType().isInteger()) {
+ VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
+ if (T->getVT().getSizeInBits() == FromBits) {
+ Src = Val.getOperand(0);
+ return true;
+ }
+ }
+ break;
+ case ISD::AND: {
+ // Check if this is an AND with "FromBits" of lower bits set to 1.
+ uint64_t FromMask = (1 << FromBits) - 1;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
+ if (C->getZExtValue() == FromMask) {
+ Src = Val.getOperand(1);
+ return true;
+ }
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
+ if (C->getZExtValue() == FromMask) {
+ Src = Val.getOperand(0);
+ return true;
+ }
+ }
+ break;
}
-
- return (UseCount <= 1);
-
-}
-
-//===--------------------------------------------------------------------===//
-// Return 'true' if use count of the global address is below threshold.
-//===--------------------------------------------------------------------===//
-bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const {
- assert(N->getOpcode() == ISD::TargetGlobalAddress &&
- "Expecting a target global address");
-
- // Always try to fold the address.
- if (TM.getOptLevel() == CodeGenOpt::Aggressive)
- return true;
-
- GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
- DenseMap<const GlobalValue *, unsigned>::const_iterator GI =
- GlobalAddressUseCountMap.find(GA->getGlobal());
-
- if (GI == GlobalAddressUseCountMap.end())
- return false;
-
- return GI->second <= MaxNumOfUsesForConstExtenders;
-}
-
-//===--------------------------------------------------------------------===//
-// Return true if the non-GP-relative global address can be folded.
-//===--------------------------------------------------------------------===//
-inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) {
- return foldGlobalAddressImpl(N, R, false);
-}
-
-//===--------------------------------------------------------------------===//
-// Return true if the GP-relative global address can be folded.
-//===--------------------------------------------------------------------===//
-inline bool HexagonDAGToDAGISel::foldGlobalAddressGP(SDValue &N, SDValue &R) {
- return foldGlobalAddressImpl(N, R, true);
-}
-
-//===--------------------------------------------------------------------===//
-// Fold offset of the global address if number of uses are below threshold.
-//===--------------------------------------------------------------------===//
-bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R,
- bool ShouldLookForGP) {
- if (N.getOpcode() == ISD::ADD) {
- SDValue N0 = N.getOperand(0);
- SDValue N1 = N.getOperand(1);
- if ((ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32_GP)) ||
- (!ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32))) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1);
- GlobalAddressSDNode *GA =
- dyn_cast<GlobalAddressSDNode>(N0.getOperand(0));
-
- if (Const && GA &&
- (GA->getOpcode() == ISD::TargetGlobalAddress)) {
- if ((N0.getOpcode() == HexagonISD::CONST32) &&
- !hasNumUsesBelowThresGA(GA))
- return false;
- R = CurDAG->getTargetGlobalAddress(GA->getGlobal(),
- SDLoc(Const),
- N.getValueType(),
- GA->getOffset() +
- (uint64_t)Const->getSExtValue());
+ case ISD::OR:
+ case ISD::XOR: {
+ // OR/XOR with the lower "FromBits" bits set to 0.
+ uint64_t FromMask = (1 << FromBits) - 1;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
+ if ((C->getZExtValue() & FromMask) == 0) {
+ Src = Val.getOperand(1);
+ return true;
+ }
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
+ if ((C->getZExtValue() & FromMask) == 0) {
+ Src = Val.getOperand(0);
return true;
}
}
}
+ default:
+ break;
+ }
return false;
}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index ef5d6b97fd6f..ed5676c1fbb6 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -43,11 +43,48 @@ using namespace llvm;
static cl::opt<bool>
EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
- cl::desc("Control jump table emission on Hexagon target"));
+ cl::desc("Control jump table emission on Hexagon target"));
+
+static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable Hexagon SDNode scheduling"));
+
+static cl::opt<bool> EnableFastMath("ffast-math",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable Fast Math processing"));
+
+static cl::opt<int> MinimumJumpTables("minimum-jump-tables",
+ cl::Hidden, cl::ZeroOrMore, cl::init(5),
+ cl::desc("Set minimum jump tables"));
+
+static cl::opt<int> MaxStoresPerMemcpyCL("max-store-memcpy",
+ cl::Hidden, cl::ZeroOrMore, cl::init(6),
+ cl::desc("Max #stores to inline memcpy"));
+
+static cl::opt<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
+ cl::Hidden, cl::ZeroOrMore, cl::init(4),
+ cl::desc("Max #stores to inline memcpy"));
+
+static cl::opt<int> MaxStoresPerMemmoveCL("max-store-memmove",
+ cl::Hidden, cl::ZeroOrMore, cl::init(6),
+ cl::desc("Max #stores to inline memmove"));
+
+static cl::opt<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
+ cl::Hidden, cl::ZeroOrMore, cl::init(4),
+ cl::desc("Max #stores to inline memmove"));
+
+static cl::opt<int> MaxStoresPerMemsetCL("max-store-memset",
+ cl::Hidden, cl::ZeroOrMore, cl::init(8),
+ cl::desc("Max #stores to inline memset"));
+
+static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
+ cl::Hidden, cl::ZeroOrMore, cl::init(4),
+ cl::desc("Max #stores to inline memset"));
+
namespace {
class HexagonCCState : public CCState {
- int NumNamedVarArgParams;
+ unsigned NumNamedVarArgParams;
public:
HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
@@ -56,7 +93,7 @@ public:
: CCState(CC, isVarArg, MF, locs, C),
NumNamedVarArgParams(NumNamedVarArgParams) {}
- int getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
+ unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
};
}
@@ -97,11 +134,7 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
HexagonCCState &HState = static_cast<HexagonCCState &>(State);
- // NumNamedVarArgParams can not be zero for a VarArg function.
- assert((HState.getNumNamedVarArgParams() > 0) &&
- "NumNamedVarArgParams is not bigger than zero.");
-
- if ((int)ValNo < HState.getNumNamedVarArgParams()) {
+ if (ValNo < HState.getNumNamedVarArgParams()) {
// Deal with named arguments.
return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
}
@@ -111,9 +144,8 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
if (ArgFlags.isByVal()) {
// If pass-by-value, the size allocated on stack is decided
// by ArgFlags.getByValSize(), not by the size of LocVT.
- assert ((ArgFlags.getByValSize() > 8) &&
- "ByValSize must be bigger than 8 bytes");
- ofst = State.AllocateStack(ArgFlags.getByValSize(), 4);
+ ofst = State.AllocateStack(ArgFlags.getByValSize(),
+ ArgFlags.getByValAlign());
State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
return false;
}
@@ -148,9 +180,8 @@ CC_Hexagon (unsigned ValNo, MVT ValVT,
if (ArgFlags.isByVal()) {
// Passed on stack.
- assert ((ArgFlags.getByValSize() > 8) &&
- "ByValSize must be bigger than 8 bytes");
- unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 4);
+ unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(),
+ ArgFlags.getByValAlign());
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return false;
}
@@ -164,6 +195,12 @@ CC_Hexagon (unsigned ValNo, MVT ValVT,
LocInfo = CCValAssign::ZExt;
else
LocInfo = CCValAssign::AExt;
+ } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
+ LocVT = MVT::i32;
+ LocInfo = CCValAssign::BCvt;
+ } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
+ LocVT = MVT::i64;
+ LocInfo = CCValAssign::BCvt;
}
if (LocVT == MVT::i32 || LocVT == MVT::f32) {
@@ -188,7 +225,7 @@ static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
Hexagon::R5
};
- if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+ if (unsigned Reg = State.AllocateReg(RegList)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
@@ -213,7 +250,7 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
static const MCPhysReg RegList2[] = {
Hexagon::R1, Hexagon::R3
};
- if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) {
+ if (unsigned Reg = State.AllocateReg(RegList1, RegList2)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
@@ -239,6 +276,12 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
LocInfo = CCValAssign::ZExt;
else
LocInfo = CCValAssign::AExt;
+ } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
+ LocVT = MVT::i32;
+ LocInfo = CCValAssign::BCvt;
+ } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
+ LocVT = MVT::i64;
+ LocInfo = CCValAssign::BCvt;
}
if (LocVT == MVT::i32 || LocVT == MVT::f32) {
@@ -301,9 +344,10 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
SDLoc dl) {
- SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
/*isVolatile=*/false, /*AlwaysInline=*/false,
+ /*isTailCall=*/false,
MachinePointerInfo(), MachinePointerInfo());
}
@@ -351,8 +395,13 @@ HexagonTargetLowering::LowerReturn(SDValue Chain,
return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
}
+bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ // If either no tail call or told not to tail call at all, don't.
+ if (!CI->isTailCall() || HTM.Options.DisableTailCalls)
+ return false;
-
+ return true;
+}
/// LowerCallResult - Lower the result values of an ISD::CALL into the
/// appropriate copies out of appropriate physical registers. This assumes that
@@ -404,8 +453,10 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool &isTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
+ bool doesNotReturn = CLI.DoesNotReturn;
bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ MachineFunction &MF = DAG.getMachineFunction();
// Check for varargs.
int NumNamedVarArgParams = -1;
@@ -430,42 +481,39 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext(), NumNamedVarArgParams);
- if (NumNamedVarArgParams > 0)
+ if (isVarArg)
CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
else
CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
+ if (DAG.getTarget().Options.DisableTailCalls)
+ isTailCall = false;
- if(isTailCall) {
- bool StructAttrFlag =
- DAG.getMachineFunction().getFunction()->hasStructRetAttr();
+ if (isTailCall) {
+ bool StructAttrFlag = MF.getFunction()->hasStructRetAttr();
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, IsStructRet,
StructAttrFlag,
Outs, OutVals, Ins, DAG);
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i){
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (VA.isMemLoc()) {
isTailCall = false;
break;
}
}
- if (isTailCall) {
- DEBUG(dbgs () << "Eligible for Tail Call\n");
- } else {
- DEBUG(dbgs () <<
- "Argument must be passed on stack. Not eligible for Tail Call\n");
- }
+ DEBUG(dbgs() << (isTailCall ? "Eligible for Tail Call\n"
+ : "Argument must be passed on stack. "
+ "Not eligible for Tail Call\n"));
}
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
- const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
- DAG.getSubtarget().getRegisterInfo());
- SDValue StackPtr =
- DAG.getCopyFromReg(Chain, dl, QRI->getStackRegister(), getPointerTy());
+ auto &HRI = *Subtarget.getRegisterInfo();
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(),
+ getPointerTy());
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -478,6 +526,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
default:
// Loc info must be one of Full, SExt, ZExt, or AExt.
llvm_unreachable("Unknown loc info!");
+ case CCValAssign::BCvt:
case CCValAssign::Full:
break;
case CCValAssign::SExt:
@@ -493,41 +542,38 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (VA.isMemLoc()) {
unsigned LocMemOffset = VA.getLocMemOffset();
- SDValue PtrOff = DAG.getConstant(LocMemOffset, StackPtr.getValueType());
- PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
-
+ SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
+ StackPtr.getValueType());
+ MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
if (Flags.isByVal()) {
// The argument is a struct passed by value. According to LLVM, "Arg"
// is is pointer.
- MemOpChains.push_back(CreateCopyOfByValArgument(Arg, PtrOff, Chain,
+ MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
Flags, DAG, dl));
} else {
- // The argument is not passed by value. "Arg" is a buildin type. It is
- // not a pointer.
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo(),false, false,
- 0));
+ MachinePointerInfo LocPI = MachinePointerInfo::getStack(LocMemOffset);
+ SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI, false,
+ false, 0);
+ MemOpChains.push_back(S);
}
continue;
}
// Arguments that can be passed on register must be kept at RegsToPass
// vector.
- if (VA.isRegLoc()) {
+ if (VA.isRegLoc())
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- }
}
// Transform all store nodes into one single node because all store
// nodes are independent of each other.
- if (!MemOpChains.empty()) {
+ if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
- }
- if (!isTailCall)
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
- getPointerTy(), true),
- dl);
+ if (!isTailCall) {
+ SDValue C = DAG.getConstant(NumBytes, dl, getPointerTy(), true);
+ Chain = DAG.getCALLSEQ_START(Chain, C, dl);
+ }
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
@@ -540,10 +586,9 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
- }
-
- // For tail calls lower the arguments to the 'real' stack slot.
- if (isTailCall) {
+ } else {
+ // For tail calls lower the arguments to the 'real' stack slot.
+ //
// Force all the incoming stack arguments to be loaded from the stack
// before any new outgoing arguments are stored to the stack, because the
// outgoing stack slots may alias the incoming argument stack slots, and
@@ -558,7 +603,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
- InFlag =SDValue();
+ InFlag = SDValue();
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -567,8 +612,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (flag_aligned_memcpy) {
const char *MemcpyName =
"__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
- Callee =
- DAG.getTargetExternalSymbol(MemcpyName, getPointerTy());
+ Callee = DAG.getTargetExternalSymbol(MemcpyName, getPointerTy());
flag_aligned_memcpy = false;
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
@@ -590,19 +634,21 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
}
- if (InFlag.getNode()) {
+ if (InFlag.getNode())
Ops.push_back(InFlag);
- }
- if (isTailCall)
+ if (isTailCall) {
+ MF.getFrameInfo()->setHasTailCall();
return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
+ }
- Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
+ int OpCode = doesNotReturn ? HexagonISD::CALLv3nr : HexagonISD::CALLv3;
+ Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag, dl);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+ DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
@@ -616,7 +662,7 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
SDValue &Offset, bool &isInc,
SelectionDAG &DAG) {
if (Ptr->getOpcode() != ISD::ADD)
- return false;
+ return false;
if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
isInc = (Ptr->getOpcode() == ISD::ADD);
@@ -688,8 +734,7 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
MachineFunction &MF = DAG.getMachineFunction();
- HexagonMachineFunctionInfo *FuncInfo =
- MF.getInfo<HexagonMachineFunctionInfo>();
+ auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
switch (Node->getOpcode()) {
case ISD::INLINEASM: {
unsigned NumOps = Node->getNumOperands();
@@ -697,7 +742,7 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
--NumOps; // Ignore the flag operand.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
- if (FuncInfo->hasClobberLR())
+ if (FuncInfo.hasClobberLR())
break;
unsigned Flags =
cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
@@ -720,11 +765,9 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
cast<RegisterSDNode>(Node->getOperand(i))->getReg();
// Check it to be lr
- const HexagonRegisterInfo *QRI =
- static_cast<const HexagonRegisterInfo *>(
- DAG.getSubtarget().getRegisterInfo());
+ const HexagonRegisterInfo *QRI = Subtarget.getRegisterInfo();
if (Reg == QRI->getRARegister()) {
- FuncInfo->setHasClobberLR(true);
+ FuncInfo.setHasClobberLR(true);
break;
}
}
@@ -765,10 +808,10 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
}
- SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl,
+ SDValue JumpTableBase = DAG.getNode(HexagonISD::JT, dl,
getPointerTy(), TargetJT);
SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
- DAG.getConstant(2, MVT::i32));
+ DAG.getConstant(2, dl, MVT::i32));
SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase,
ShiftIndex);
SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress,
@@ -783,44 +826,27 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
+ SDValue Align = Op.getOperand(2);
SDLoc dl(Op);
- unsigned SPReg = getStackPointerRegisterToSaveRestore();
+ ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
+ assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
- // Get a reference to the stack pointer.
- SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+ unsigned A = AlignConst->getSExtValue();
+ auto &HFI = *Subtarget.getFrameLowering();
+ // "Zero" means natural stack alignment.
+ if (A == 0)
+ A = HFI.getStackAlignment();
- // Subtract the dynamic size from the actual stack size to
- // obtain the new stack size.
- SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+ DEBUG({
+ dbgs () << LLVM_FUNCTION_NAME << " Align: " << A << " Size: ";
+ Size.getNode()->dump(&DAG);
+ dbgs() << "\n";
+ });
- //
- // For Hexagon, the outgoing memory arguments area should be on top of the
- // alloca area on the stack i.e., the outgoing memory arguments should be
- // at a lower address than the alloca area. Move the alloca area down the
- // stack by adding back the space reserved for outgoing arguments to SP
- // here.
- //
- // We do not know what the size of the outgoing args is at this point.
- // So, we add a pseudo instruction ADJDYNALLOC that will adjust the
- // stack pointer. We patch this instruction with the correct, known
- // offset in emitPrologue().
- //
- // Use a placeholder immediate (zero) for now. This will be patched up
- // by emitPrologue().
- SDValue ArgAdjust = DAG.getNode(HexagonISD::ADJDYNALLOC, dl,
- MVT::i32,
- Sub,
- DAG.getConstant(0, MVT::i32));
-
- // The Sub result contains the new stack start address, so it
- // must be placed in the stack pointer register.
- const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
- DAG.getSubtarget().getRegisterInfo());
- SDValue CopyChain = DAG.getCopyToReg(Chain, dl, QRI->getStackRegister(), Sub);
-
- SDValue Ops[2] = { ArgAdjust, CopyChain };
- return DAG.getMergeValues(Ops, dl);
+ SDValue AC = DAG.getConstant(A, dl, MVT::i32);
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+ return DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
}
SDValue
@@ -836,9 +862,7 @@ const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- HexagonMachineFunctionInfo *FuncInfo =
- MF.getInfo<HexagonMachineFunctionInfo>();
-
+ auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -875,7 +899,7 @@ const {
RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
- } else if (RegVT == MVT::i64) {
+ } else if (RegVT == MVT::i64 || RegVT == MVT::f64) {
unsigned VReg =
RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
@@ -927,7 +951,7 @@ const {
HEXAGON_LRFP_SIZE +
CCInfo.getNextStackOffset(),
true);
- FuncInfo->setVarArgsFrameIndex(FrameIndex);
+ FuncInfo.setVarArgsFrameIndex(FrameIndex);
}
return Chain;
@@ -946,6 +970,192 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
false, 0);
}
+// Creates a SPLAT instruction for a constant value VAL.
+static SDValue createSplat(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue Val) {
+ if (VT.getSimpleVT() == MVT::v4i8)
+ return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val);
+
+ if (VT.getSimpleVT() == MVT::v4i16)
+ return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val);
+
+ return SDValue();
+}
+
+static bool isSExtFree(SDValue N) {
+ // A sign-extend of a truncate of a sign-extend is free.
+ if (N.getOpcode() == ISD::TRUNCATE &&
+ N.getOperand(0).getOpcode() == ISD::AssertSext)
+ return true;
+ // We have sign-extended loads.
+ if (N.getOpcode() == ISD::LOAD)
+ return true;
+ return false;
+}
+
+SDValue HexagonTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue InpVal = Op.getOperand(0);
+ if (isa<ConstantSDNode>(InpVal)) {
+ uint64_t V = cast<ConstantSDNode>(InpVal)->getZExtValue();
+ return DAG.getTargetConstant(countPopulation(V), dl, MVT::i64);
+ }
+ SDValue PopOut = DAG.getNode(HexagonISD::POPCOUNT, dl, MVT::i32, InpVal);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, PopOut);
+}
+
+SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue Cmp = Op.getOperand(2);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
+
+ EVT VT = Op.getValueType();
+ EVT LHSVT = LHS.getValueType();
+ EVT RHSVT = RHS.getValueType();
+
+ if (LHSVT == MVT::v2i16) {
+ assert(ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC));
+ unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS);
+ SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS);
+ SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp);
+ return SC;
+ }
+
+ // Treat all other vector types as legal.
+ if (VT.isVector())
+ return Op;
+
+ // Equals and not equals should use sign-extend, not zero-extend, since
+ // we can represent small negative values in the compare instructions.
+ // The LLVM default is to use zero-extend arbitrarily in these cases.
+ if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ (RHSVT == MVT::i8 || RHSVT == MVT::i16) &&
+ (LHSVT == MVT::i8 || LHSVT == MVT::i16)) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
+ if (C && C->getAPIntValue().isNegative()) {
+ LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
+ RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
+ return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
+ LHS, RHS, Op.getOperand(2));
+ }
+ if (isSExtFree(LHS) || isSExtFree(RHS)) {
+ LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
+ RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
+ return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
+ LHS, RHS, Op.getOperand(2));
+ }
+ }
+ return SDValue();
+}
+
+SDValue HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG)
+ const {
+ SDValue PredOp = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
+ EVT OpVT = Op1.getValueType();
+ SDLoc DL(Op);
+
+ if (OpVT == MVT::v2i16) {
+ SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1);
+ SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2);
+ SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2);
+ SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL);
+ return TR;
+ }
+
+ return SDValue();
+}
+
+// Handle only specific vector loads.
+SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+ SDValue Chain = LoadNode->getChain();
+ SDValue Ptr = Op.getOperand(1);
+ SDValue LoweredLoad;
+ SDValue Result;
+ SDValue Base = LoadNode->getBasePtr();
+ ISD::LoadExtType Ext = LoadNode->getExtensionType();
+ unsigned Alignment = LoadNode->getAlignment();
+ SDValue LoadChain;
+
+ if(Ext == ISD::NON_EXTLOAD)
+ Ext = ISD::ZEXTLOAD;
+
+ if (VT == MVT::v4i16) {
+ if (Alignment == 2) {
+ SDValue Loads[4];
+ // Base load.
+ Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // Base+2 load.
+ SDValue Increment = DAG.getConstant(2, DL, MVT::i32);
+ Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+ Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // SHL 16, then OR base and base+2.
+ SDValue ShiftAmount = DAG.getConstant(16, DL, MVT::i32);
+ SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount);
+ SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]);
+ // Base + 4.
+ Increment = DAG.getConstant(4, DL, MVT::i32);
+ Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+ Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // Base + 6.
+ Increment = DAG.getConstant(6, DL, MVT::i32);
+ Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+ Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // SHL 16, then OR base+4 and base+6.
+ Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]);
+ // Combine to i64. This could be optimised out later if we can
+ // affect reg allocation of this code.
+ Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2);
+ LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ Loads[0].getValue(1), Loads[1].getValue(1),
+ Loads[2].getValue(1), Loads[3].getValue(1));
+ } else {
+ // Perform default type expansion.
+ Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(),
+ LoadNode->isVolatile(), LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(), LoadNode->getAlignment());
+ LoadChain = Result.getValue(1);
+ }
+ } else
+ llvm_unreachable("Custom lowering unsupported load");
+
+ Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
+ // Since we pretend to lower a load, we need the original chain
+ // info attached to the result.
+ SDValue Ops[] = { Result, LoadChain };
+
+ return DAG.getMergeValues(Ops, DL);
+}
+
+
SDValue
HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
EVT ValTy = Op.getValueType();
@@ -958,15 +1168,15 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
else
Res = DAG.getTargetConstantPool(CP->getConstVal(), ValTy,
CP->getAlignment());
- return DAG.getNode(HexagonISD::CONST32, dl, ValTy, Res);
+ return DAG.getNode(HexagonISD::CP, dl, ValTy, Res);
}
SDValue
HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
- const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+ const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MFI->setReturnAddressIsTaken(true);
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MFI.setReturnAddressIsTaken(true);
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
@@ -976,29 +1186,28 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
- SDValue Offset = DAG.getConstant(4, MVT::i32);
+ SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
return DAG.getLoad(VT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
}
// Return LR, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32));
+ unsigned Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
SDValue
HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
- const HexagonRegisterInfo *TRI = static_cast<const HexagonRegisterInfo *>(
- DAG.getSubtarget().getRegisterInfo());
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- MFI->setFrameAddressIsTaken(true);
+ const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
+ MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo();
+ MFI.setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
- TRI->getFrameRegister(), VT);
+ HRI.getFrameRegister(), VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo(),
@@ -1021,15 +1230,29 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
SDLoc dl(Op);
Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
- const HexagonTargetObjectFile &TLOF =
- static_cast<const HexagonTargetObjectFile &>(getObjFileLowering());
- if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+ const HexagonTargetObjectFile *TLOF =
+ static_cast<const HexagonTargetObjectFile *>(
+ getTargetMachine().getObjFileLowering());
+ if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine())) {
return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result);
}
return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
}
+// Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
+void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) {
+ if (VT != PromotedLdStVT) {
+ setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::LOAD, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+
+ setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::STORE, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+ }
+}
+
SDValue
HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
@@ -1042,465 +1265,971 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
// TargetLowering Implementation
//===----------------------------------------------------------------------===//
-HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine)
- : TargetLowering(targetmachine),
- TM(targetmachine) {
+HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
+ const HexagonSubtarget &STI)
+ : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
+ Subtarget(STI) {
+ bool IsV4 = !Subtarget.hasV5TOps();
+ auto &HRI = *Subtarget.getRegisterInfo();
+
+ setPrefLoopAlignment(4);
+ setPrefFunctionAlignment(4);
+ setMinFunctionAlignment(2);
+ setInsertFencesForAtomic(false);
+ setExceptionPointerRegister(Hexagon::R0);
+ setExceptionSelectorRegister(Hexagon::R1);
+ setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
+
+ if (EnableHexSDNodeSched)
+ setSchedulingPreference(Sched::VLIW);
+ else
+ setSchedulingPreference(Sched::Source);
+
+ // Limits for inline expansion of memcpy/memmove
+ MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
+ MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
+ MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
+ MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
+ MaxStoresPerMemset = MaxStoresPerMemsetCL;
+ MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;
- const HexagonSubtarget &Subtarget = TM.getSubtarget<HexagonSubtarget>();
+ //
+ // Set up register classes.
+ //
- // Set up the register classes.
- addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
- addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
+ addRegisterClass(MVT::v2i1, &Hexagon::PredRegsRegClass); // bbbbaaaa
+ addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa
+ addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba
+ addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
if (Subtarget.hasV5TOps()) {
addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
}
- addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
+ //
+ // Handling of scalar operations.
+ //
+ // All operations default to "legal", except:
+ // - indexed loads and stores (pre-/post-incremented),
+ // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
+ // ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
+ // FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
+ // FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
+ // which default to "expand" for at least one type.
+
+ // Misc operations.
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // Default: expand
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand
+
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ // Custom legalize GlobalAddress nodes into CONST32.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
- computeRegisterProperties();
+ // Hexagon needs to optimize cases with negative constants.
+ setOperationAction(ISD::SETCC, MVT::i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::i16, Custom);
- // Align loop entry
- setPrefLoopAlignment(4);
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
- // Limits for inline expansion of memcpy/memmove
- MaxStoresPerMemcpy = 6;
- MaxStoresPerMemmove = 6;
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
- //
- // Library calls for unsupported operations
- //
+ if (EmitJumpTables)
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+ else
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ // Increase jump tables cutover to 5, was 4.
+ setMinimumJumpTableEntries(MinimumJumpTables);
+
+ // Hexagon has instructions for add/sub with carry. The problem with
+ // modeling these instructions is that they produce 2 results: Rdd and Px.
+ // To model the update of Px, we will have to use Defs[p0..p3] which will
+ // cause any predicate live range to spill. So, we pretend we dont't have
+ // these instructions.
+ setOperationAction(ISD::ADDE, MVT::i8, Expand);
+ setOperationAction(ISD::ADDE, MVT::i16, Expand);
+ setOperationAction(ISD::ADDE, MVT::i32, Expand);
+ setOperationAction(ISD::ADDE, MVT::i64, Expand);
+ setOperationAction(ISD::SUBE, MVT::i8, Expand);
+ setOperationAction(ISD::SUBE, MVT::i16, Expand);
+ setOperationAction(ISD::SUBE, MVT::i32, Expand);
+ setOperationAction(ISD::SUBE, MVT::i64, Expand);
+ setOperationAction(ISD::ADDC, MVT::i8, Expand);
+ setOperationAction(ISD::ADDC, MVT::i16, Expand);
+ setOperationAction(ISD::ADDC, MVT::i32, Expand);
+ setOperationAction(ISD::ADDC, MVT::i64, Expand);
+ setOperationAction(ISD::SUBC, MVT::i8, Expand);
+ setOperationAction(ISD::SUBC, MVT::i16, Expand);
+ setOperationAction(ISD::SUBC, MVT::i32, Expand);
+ setOperationAction(ISD::SUBC, MVT::i64, Expand);
- setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
- setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
+ // Only add and sub that detect overflow are the saturating ones.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setOperationAction(ISD::UADDO, VT, Expand);
+ setOperationAction(ISD::SADDO, VT, Expand);
+ setOperationAction(ISD::USUBO, VT, Expand);
+ setOperationAction(ISD::SSUBO, VT, Expand);
+ }
- setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
- setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
+ setOperationAction(ISD::CTLZ, MVT::i8, Promote);
+ setOperationAction(ISD::CTLZ, MVT::i16, Promote);
+ setOperationAction(ISD::CTTZ, MVT::i8, Promote);
+ setOperationAction(ISD::CTTZ, MVT::i16, Promote);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Promote);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Promote);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote);
+
+ // In V5, popcount can count # of 1s in i64 but returns i32.
+ // On V4 it will be expanded (set later).
+ setOperationAction(ISD::CTPOP, MVT::i8, Promote);
+ setOperationAction(ISD::CTPOP, MVT::i16, Promote);
+ setOperationAction(ISD::CTPOP, MVT::i32, Promote);
+ setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+
+ // We custom lower i64 to i64 mul, so that it is not considered as a legal
+ // operation. There is a pattern that will match i64 mul and transform it
+ // to a series of instructions.
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+
+ for (unsigned IntExpOp :
+ {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM,
+ ISD::ROTL, ISD::ROTR, ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS,
+ ISD::SRL_PARTS, ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
+ setOperationAction(IntExpOp, MVT::i32, Expand);
+ setOperationAction(IntExpOp, MVT::i64, Expand);
+ }
- setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
- setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
+ for (unsigned FPExpOp :
+ {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
+ ISD::FPOW, ISD::FCOPYSIGN}) {
+ setOperationAction(FPExpOp, MVT::f32, Expand);
+ setOperationAction(FPExpOp, MVT::f64, Expand);
+ }
- setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
- setOperationAction(ISD::SDIV, MVT::i32, Expand);
- setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3");
- setOperationAction(ISD::SREM, MVT::i32, Expand);
+ // No extending loads from i32.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
+ }
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ // Turn FP extload into load/fextend.
+ for (MVT VT : MVT::fp_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
- setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
- setOperationAction(ISD::SDIV, MVT::i64, Expand);
- setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
- setOperationAction(ISD::SREM, MVT::i64, Expand);
+ // Expand BR_CC and SELECT_CC for all integer and fp types.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setOperationAction(ISD::BR_CC, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ }
+ for (MVT VT : MVT::fp_valuetypes()) {
+ setOperationAction(ISD::BR_CC, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ }
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
- setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ //
+ // Handling of vector operations.
+ //
- setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
- setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ // Custom lower v4i16 load only. Let v4i16 store to be
+ // promoted for now.
+ promoteLdStType(MVT::v4i8, MVT::i32);
+ promoteLdStType(MVT::v2i16, MVT::i32);
+ promoteLdStType(MVT::v8i8, MVT::i64);
+ promoteLdStType(MVT::v2i32, MVT::i64);
+
+ setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i16, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::i64);
+ AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64);
+
+ // Set the action for vector operations to "expand", then override it with
+ // either "custom" or "legal" for specific cases.
+ static unsigned VectExpOps[] = {
+ // Integer arithmetic:
+ ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV,
+ ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC,
+ ISD::SUBC, ISD::SADDO, ISD::UADDO, ISD::SSUBO, ISD::USUBO,
+ ISD::SMUL_LOHI, ISD::UMUL_LOHI,
+ // Logical/bit:
+ ISD::AND, ISD::OR, ISD::XOR, ISD::ROTL, ISD::ROTR,
+ ISD::CTPOP, ISD::CTLZ, ISD::CTTZ, ISD::CTLZ_ZERO_UNDEF,
+ ISD::CTTZ_ZERO_UNDEF,
+ // Floating point arithmetic/math functions:
+ ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMA, ISD::FDIV,
+ ISD::FREM, ISD::FNEG, ISD::FABS, ISD::FSQRT, ISD::FSIN,
+ ISD::FCOS, ISD::FPOWI, ISD::FPOW, ISD::FLOG, ISD::FLOG2,
+ ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC,
+ ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR,
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
+ // Misc:
+ ISD::SELECT, ISD::ConstantPool,
+ // Vector:
+ ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR,
+ ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT,
+ ISD::EXTRACT_SUBVECTOR, ISD::INSERT_SUBVECTOR,
+ ISD::CONCAT_VECTORS, ISD::VECTOR_SHUFFLE
+ };
- setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
- setOperationAction(ISD::UREM, MVT::i32, Expand);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ for (unsigned VectExpOp : VectExpOps)
+ setOperationAction(VectExpOp, VT, Expand);
- setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
- setOperationAction(ISD::UREM, MVT::i64, Expand);
+ // Expand all extended loads and truncating stores:
+ for (MVT TargetVT : MVT::vector_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
+ setTruncStoreAction(VT, TargetVT, Expand);
+ }
- setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
- setOperationAction(ISD::FDIV, MVT::f32, Expand);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+ }
- setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
- setOperationAction(ISD::FDIV, MVT::f64, Expand);
+ // Types natively supported:
+ for (MVT NativeVT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v32i1, MVT::v64i1,
+ MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v1i32,
+ MVT::v2i32, MVT::v1i64}) {
+ setOperationAction(ISD::BUILD_VECTOR, NativeVT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, NativeVT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, NativeVT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, NativeVT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, NativeVT, Custom);
+
+ setOperationAction(ISD::ADD, NativeVT, Legal);
+ setOperationAction(ISD::SUB, NativeVT, Legal);
+ setOperationAction(ISD::MUL, NativeVT, Legal);
+ setOperationAction(ISD::AND, NativeVT, Legal);
+ setOperationAction(ISD::OR, NativeVT, Legal);
+ setOperationAction(ISD::XOR, NativeVT, Legal);
+ }
- setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- setOperationAction(ISD::FSQRT, MVT::f64, Expand);
- setOperationAction(ISD::FSIN, MVT::f32, Expand);
- setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
+ // Subtarget-specific operation actions.
+ //
if (Subtarget.hasV5TOps()) {
- // Hexagon V5 Support.
- setOperationAction(ISD::FADD, MVT::f32, Legal);
- setOperationAction(ISD::FADD, MVT::f64, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOEQ, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOEQ, MVT::f64, Legal);
- setCondCodeAction(ISD::SETUEQ, MVT::f32, Legal);
- setCondCodeAction(ISD::SETUEQ, MVT::f64, Legal);
-
- setCondCodeAction(ISD::SETOGE, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOGE, MVT::f64, Legal);
- setCondCodeAction(ISD::SETUGE, MVT::f32, Legal);
- setCondCodeAction(ISD::SETUGE, MVT::f64, Legal);
-
- setCondCodeAction(ISD::SETOGT, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOGT, MVT::f64, Legal);
- setCondCodeAction(ISD::SETUGT, MVT::f32, Legal);
- setCondCodeAction(ISD::SETUGT, MVT::f64, Legal);
-
- setCondCodeAction(ISD::SETOLE, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOLE, MVT::f64, Legal);
- setCondCodeAction(ISD::SETOLT, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOLT, MVT::f64, Legal);
-
- setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
+ } else { // V4
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
+ setOperationAction(ISD::CTPOP, MVT::i8, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i16, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
- setOperationAction(ISD::FABS, MVT::f32, Legal);
- setOperationAction(ISD::FABS, MVT::f64, Expand);
+ // Expand these operations for both f32 and f64:
+ for (unsigned FPExpOpV4 :
+ {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) {
+ setOperationAction(FPExpOpV4, MVT::f32, Expand);
+ setOperationAction(FPExpOpV4, MVT::f64, Expand);
+ }
- setOperationAction(ISD::FNEG, MVT::f32, Legal);
- setOperationAction(ISD::FNEG, MVT::f64, Expand);
- } else {
+ for (ISD::CondCode FPExpCCV4 :
+ {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
+ ISD::SETUO, ISD::SETO}) {
+ setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
+ setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
+ }
+ }
- // Expand fp<->uint.
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ // Handling of indexed loads/stores: default is "expand".
+ //
+ for (MVT LSXTy : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
+ setIndexedLoadAction(ISD::POST_INC, LSXTy, Legal);
+ setIndexedStoreAction(ISD::POST_INC, LSXTy, Legal);
+ }
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ computeRegisterProperties(&HRI);
- setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
- setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
+ //
+ // Library calls for unsupported operations
+ //
+ bool FastMath = EnableFastMath;
- setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
- setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
+ setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
+ setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
+ setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
+ setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
+ setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
+ setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
+ setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
- setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
- setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
+ setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
+ setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
+ setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
+ setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
- setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
- setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
+ if (IsV4) {
+ // Handle single-precision floating point operations on V4.
+ if (FastMath) {
+ setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3");
+ setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3");
+ setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3");
+ setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2");
+ setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2");
+ // Double-precision compares.
+ setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2");
+ setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2");
+ } else {
+ setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+ setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
+ setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
+ setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
+ setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
+ // Double-precision compares.
+ setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+ setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
+ }
+ }
+ // This is the only fast library function for sqrtd.
+ if (FastMath)
+ setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
+
+ // Prefix is: nothing for "slow-math",
+ // "fast2_" for V4 fast-math and V5+ fast-math double-precision
+ // (actually, keep fast-math and fast-math2 separate for now)
+ if (FastMath) {
+ setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
+ setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
+ setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
+ setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
+ // Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok).
+ setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
+ } else {
+ setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
+ setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
+ setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
+ setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
+ setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
+ }
+
+ if (Subtarget.hasV5TOps()) {
+ if (FastMath)
+ setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
+ else
+ setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
+ } else {
+ // V4
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
-
- setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
- setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
-
setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
-
- setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
- setOperationAction(ISD::FADD, MVT::f64, Expand);
-
- setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
- setOperationAction(ISD::FADD, MVT::f32, Expand);
-
- setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
- setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
-
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
- setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
-
setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
- setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
-
setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
- setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
-
setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
- setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
-
- setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
- setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
-
- setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
- setCondCodeAction(ISD::SETOGT, MVT::f64, Expand);
+ setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
+ setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
+ setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+ setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
+ setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
+ setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
+ setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
+ setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
+ }
- setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
- setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+ // These cause problems when the shift amount is non-constant.
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
+}
- setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
- setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
- setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
- setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch ((HexagonISD::NodeType)Opcode) {
+ case HexagonISD::ALLOCA: return "HexagonISD::ALLOCA";
+ case HexagonISD::ARGEXTEND: return "HexagonISD::ARGEXTEND";
+ case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT";
+ case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL";
+ case HexagonISD::BARRIER: return "HexagonISD::BARRIER";
+ case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
+ case HexagonISD::CALLR: return "HexagonISD::CALLR";
+ case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr";
+ case HexagonISD::CALLv3: return "HexagonISD::CALLv3";
+ case HexagonISD::COMBINE: return "HexagonISD::COMBINE";
+ case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
+ case HexagonISD::CONST32: return "HexagonISD::CONST32";
+ case HexagonISD::CP: return "HexagonISD::CP";
+ case HexagonISD::DCFETCH: return "HexagonISD::DCFETCH";
+ case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
+ case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU";
+ case HexagonISD::EXTRACTURP: return "HexagonISD::EXTRACTURP";
+ case HexagonISD::FCONST32: return "HexagonISD::FCONST32";
+ case HexagonISD::INSERT: return "HexagonISD::INSERT";
+ case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP";
+ case HexagonISD::JT: return "HexagonISD::JT";
+ case HexagonISD::PACKHL: return "HexagonISD::PACKHL";
+ case HexagonISD::PIC_ADD: return "HexagonISD::PIC_ADD";
+ case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT";
+ case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
+ case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB";
+ case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH";
+ case HexagonISD::SHUFFOB: return "HexagonISD::SHUFFOB";
+ case HexagonISD::SHUFFOH: return "HexagonISD::SHUFFOH";
+ case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
+ case HexagonISD::VCMPBEQ: return "HexagonISD::VCMPBEQ";
+ case HexagonISD::VCMPBGT: return "HexagonISD::VCMPBGT";
+ case HexagonISD::VCMPBGTU: return "HexagonISD::VCMPBGTU";
+ case HexagonISD::VCMPHEQ: return "HexagonISD::VCMPHEQ";
+ case HexagonISD::VCMPHGT: return "HexagonISD::VCMPHGT";
+ case HexagonISD::VCMPHGTU: return "HexagonISD::VCMPHGTU";
+ case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ";
+ case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT";
+ case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU";
+ case HexagonISD::VSHLH: return "HexagonISD::VSHLH";
+ case HexagonISD::VSHLW: return "HexagonISD::VSHLW";
+ case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB";
+ case HexagonISD::VSPLATH: return "HexagonISD::VSPLATH";
+ case HexagonISD::VSRAH: return "HexagonISD::VSRAH";
+ case HexagonISD::VSRAW: return "HexagonISD::VSRAW";
+ case HexagonISD::VSRLH: return "HexagonISD::VSRLH";
+ case HexagonISD::VSRLW: return "HexagonISD::VSRLW";
+ case HexagonISD::VSXTBH: return "HexagonISD::VSXTBH";
+ case HexagonISD::VSXTBW: return "HexagonISD::VSXTBW";
+ case HexagonISD::OP_END: break;
+ }
+ return nullptr;
+}
- setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
- setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ EVT MTy1 = EVT::getEVT(Ty1);
+ EVT MTy2 = EVT::getEVT(Ty2);
+ if (!MTy1.isSimple() || !MTy2.isSimple())
+ return false;
+ return (MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32);
+}
- setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
- setCondCodeAction(ISD::SETOLT, MVT::f64, Expand);
+bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isSimple() || !VT2.isSimple())
+ return false;
+ return (VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32);
+}
- setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
- setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+// shouldExpandBuildVectorWithShuffles
+// Should we expand the build vector with shuffles?
+bool
+HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
+ unsigned DefinedValues) const {
- setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
- setOperationAction(ISD::FMUL, MVT::f64, Expand);
+ // Hexagon vector shuffle operates on element sizes of bytes or halfwords
+ EVT EltVT = VT.getVectorElementType();
+ int EltBits = EltVT.getSizeInBits();
+ if ((EltBits != 8) && (EltBits != 16))
+ return false;
- setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
- setOperationAction(ISD::MUL, MVT::f32, Expand);
+ return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
+}
- setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
- setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3). V1 and
+// V2 are the two vectors to select data from, V3 is the permutation.
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
- setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+ if (V2.getOpcode() == ISD::UNDEF)
+ V2 = V1;
+
+ if (SVN->isSplat()) {
+ int Lane = SVN->getSplatIndex();
+ if (Lane == -1) Lane = 0;
+
+ // Test if V1 is a SCALAR_TO_VECTOR.
+ if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return createSplat(DAG, dl, VT, V1.getOperand(0));
+
+ // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
+ // (and probably will turn into a SCALAR_TO_VECTOR once legalization
+ // reaches it).
+ if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
+ !isa<ConstantSDNode>(V1.getOperand(0))) {
+ bool IsScalarToVector = true;
+ for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
+ if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
+ IsScalarToVector = false;
+ break;
+ }
+ if (IsScalarToVector)
+ return createSplat(DAG, dl, VT, V1.getOperand(0));
+ }
+ return createSplat(DAG, dl, VT, DAG.getConstant(Lane, dl, MVT::i32));
+ }
- setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
- setOperationAction(ISD::SUB, MVT::f64, Expand);
+ // FIXME: We need to support more general vector shuffles. See
+ // below the comment from the ARM backend that deals in the general
+ // case with the vector shuffles. For now, let expand handle these.
+ return SDValue();
- setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
- setOperationAction(ISD::SUB, MVT::f32, Expand);
+ // If the shuffle is not directly supported and it has 4 elements, use
+ // the PerfectShuffle-generated table to synthesize it from other shuffles.
+}
- setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
- setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+// If BUILD_VECTOR has same base element repeated several times,
+// report true.
+static bool isCommonSplatElement(BuildVectorSDNode *BVN) {
+ unsigned NElts = BVN->getNumOperands();
+ SDValue V0 = BVN->getOperand(0);
- setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
- setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
+ for (unsigned i = 1, e = NElts; i != e; ++i) {
+ if (BVN->getOperand(i) != V0)
+ return false;
+ }
+ return true;
+}
- setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
- setCondCodeAction(ISD::SETO, MVT::f64, Expand);
+// LowerVECTOR_SHIFT - Lower a vector shift. Try to convert
+// <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific
+// <VT> = SHL/SRA/SRL <VT> by <IT/i32>.
+static SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) {
+ BuildVectorSDNode *BVN = 0;
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDValue V3;
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
- setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
- setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+ if ((BVN = dyn_cast<BuildVectorSDNode>(V1.getNode())) &&
+ isCommonSplatElement(BVN))
+ V3 = V2;
+ else if ((BVN = dyn_cast<BuildVectorSDNode>(V2.getNode())) &&
+ isCommonSplatElement(BVN))
+ V3 = V1;
+ else
+ return SDValue();
- setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
- setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+ SDValue CommonSplat = BVN->getOperand(0);
+ SDValue Result;
- setOperationAction(ISD::FABS, MVT::f32, Expand);
- setOperationAction(ISD::FABS, MVT::f64, Expand);
- setOperationAction(ISD::FNEG, MVT::f32, Expand);
- setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ if (VT.getSimpleVT() == MVT::v4i16) {
+ switch (Op.getOpcode()) {
+ case ISD::SRA:
+ Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SHL:
+ Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SRL:
+ Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat);
+ break;
+ default:
+ return SDValue();
+ }
+ } else if (VT.getSimpleVT() == MVT::v2i32) {
+ switch (Op.getOpcode()) {
+ case ISD::SRA:
+ Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SHL:
+ Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SRL:
+ Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat);
+ break;
+ default:
+ return SDValue();
+ }
+ } else {
+ return SDValue();
}
- setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
- setOperationAction(ISD::SREM, MVT::i32, Expand);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+}
- setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
- setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
- setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
- setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal);
+SDValue
+HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
- setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
- setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
- setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
- setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal);
+ unsigned Size = VT.getSizeInBits();
- setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+ // A vector larger than 64 bits cannot be represented in Hexagon.
+ // Expand will split the vector.
+ if (Size > 64)
+ return SDValue();
- // Turn FP extload into load/fextend.
- for (MVT VT : MVT::fp_valuetypes())
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
- // Hexagon has a i1 sign extending load.
- for (MVT VT : MVT::integer_valuetypes())
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
- // Turn FP truncstore into trunc + store.
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned NElts = BVN->getNumOperands();
+
+ // Try to generate a SPLAT instruction.
+ if ((VT.getSimpleVT() == MVT::v4i8 || VT.getSimpleVT() == MVT::v4i16) &&
+ (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, true) && SplatBitSize <= 16)) {
+ unsigned SplatBits = APSplatBits.getZExtValue();
+ int32_t SextVal = ((int32_t) (SplatBits << (32 - SplatBitSize)) >>
+ (32 - SplatBitSize));
+ return createSplat(DAG, dl, VT, DAG.getConstant(SextVal, dl, MVT::i32));
+ }
- // Custom legalize GlobalAddress nodes into CONST32.
- setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
- setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
- setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
- // Truncate action?
- setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
+ // Try to generate COMBINE to build v2i32 vectors.
+ if (VT.getSimpleVT() == MVT::v2i32) {
+ SDValue V0 = BVN->getOperand(0);
+ SDValue V1 = BVN->getOperand(1);
+
+ if (V0.getOpcode() == ISD::UNDEF)
+ V0 = DAG.getConstant(0, dl, MVT::i32);
+ if (V1.getOpcode() == ISD::UNDEF)
+ V1 = DAG.getConstant(0, dl, MVT::i32);
+
+ ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(V0);
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(V1);
+ // If the element isn't a constant, it is in a register:
+ // generate a COMBINE Register Register instruction.
+ if (!C0 || !C1)
+ return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0);
+
+ // If one of the operands is an 8 bit integer constant, generate
+ // a COMBINE Immediate Immediate instruction.
+ if (isInt<8>(C0->getSExtValue()) ||
+ isInt<8>(C1->getSExtValue()))
+ return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0);
+ }
- // Hexagon doesn't have sext_inreg, replace them with shl/sra.
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ // Try to generate a S2_packhl to build v2i16 vectors.
+ if (VT.getSimpleVT() == MVT::v2i16) {
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
+ continue;
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(BVN->getOperand(i));
+ // If the element isn't a constant, it is in a register:
+ // generate a S2_packhl instruction.
+ if (!Cst) {
+ SDValue pack = DAG.getNode(HexagonISD::PACKHL, dl, MVT::v4i16,
+ BVN->getOperand(1), BVN->getOperand(0));
+
+ return DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::v2i16,
+ pack);
+ }
+ }
+ }
- // Hexagon has no REM or DIVREM operations.
- setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i64, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ // In the general case, generate a CONST32 or a CONST64 for constant vectors,
+ // and insert_vector_elt for all the other cases.
+ uint64_t Res = 0;
+ unsigned EltSize = Size / NElts;
+ SDValue ConstVal;
+ uint64_t Mask = ~uint64_t(0ULL) >> (64 - EltSize);
+ bool HasNonConstantElements = false;
+
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon's
+ // combine, const64, etc. are Big Endian.
+ unsigned OpIdx = NElts - i - 1;
+ SDValue Operand = BVN->getOperand(OpIdx);
+ if (Operand.getOpcode() == ISD::UNDEF)
+ continue;
- setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+ int64_t Val = 0;
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Operand))
+ Val = Cst->getSExtValue();
+ else
+ HasNonConstantElements = true;
- // Lower SELECT_CC to SETCC and SELECT.
- setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+ Val &= Mask;
+ Res = (Res << EltSize) | Val;
+ }
- if (Subtarget.hasV5TOps()) {
+ if (Size == 64)
+ ConstVal = DAG.getConstant(Res, dl, MVT::i64);
+ else
+ ConstVal = DAG.getConstant(Res, dl, MVT::i32);
+
+ // When there are non constant operands, add them with INSERT_VECTOR_ELT to
+ // ConstVal, the constant part of the vector.
+ if (HasNonConstantElements) {
+ EVT EltVT = VT.getVectorElementType();
+ SDValue Width = DAG.getConstant(EltVT.getSizeInBits(), dl, MVT::i64);
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, dl, MVT::i64));
+
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon
+ // is Big Endian.
+ unsigned OpIdx = NElts - i - 1;
+ SDValue Operand = BVN->getOperand(OpIdx);
+ if (isa<ConstantSDNode>(Operand))
+ // This operand is already in ConstVal.
+ continue;
+
+ if (VT.getSizeInBits() == 64 &&
+ Operand.getValueType().getSizeInBits() == 32) {
+ SDValue C = DAG.getConstant(0, dl, MVT::i32);
+ Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
+ }
- // We need to make the operation type of SELECT node to be Custom,
- // such that we don't go into the infinite loop of
- // select -> setcc -> select_cc -> select loop.
- setOperationAction(ISD::SELECT, MVT::f32, Custom);
- setOperationAction(ISD::SELECT, MVT::f64, Custom);
+ SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64);
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width);
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+ const SDValue Ops[] = {ConstVal, Operand, Combined};
- setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+ if (VT.getSizeInBits() == 32)
+ ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
+ else
+ ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
+ }
+ }
- } else {
+ return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal);
+}
- // Hexagon has no select or setcc: expand to SELECT_CC.
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::f64, Expand);
+SDValue
+HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ unsigned NElts = Op.getNumOperands();
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+ SDValue Width = DAG.getConstant(VecVT.getSizeInBits(), dl, MVT::i64);
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, dl, MVT::i64));
+ SDValue ConstVal = DAG.getConstant(0, dl, MVT::i64);
+
+ ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width);
+ ConstantSDNode *S = dyn_cast<ConstantSDNode>(Shifted);
+
+ if ((VecVT.getSimpleVT() == MVT::v2i16) && (NElts == 2) && W && S) {
+ if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) {
+ // We are trying to concat two v2i16 to a single v4i16.
+ SDValue Vec0 = Op.getOperand(1);
+ SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Combined);
+ }
}
- if (EmitJumpTables) {
- setOperationAction(ISD::BR_JT, MVT::Other, Custom);
- } else {
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ if ((VecVT.getSimpleVT() == MVT::v4i8) && (NElts == 2) && W && S) {
+ if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) {
+ // We are trying to concat two v4i8 to a single v8i8.
+ SDValue Vec0 = Op.getOperand(1);
+ SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Combined);
+ }
}
- // Increase jump tables cutover to 5, was 4.
- setMinimumJumpTableEntries(5);
-
- setOperationAction(ISD::BR_CC, MVT::f32, Expand);
- setOperationAction(ISD::BR_CC, MVT::f64, Expand);
- setOperationAction(ISD::BR_CC, MVT::i1, Expand);
- setOperationAction(ISD::BR_CC, MVT::i32, Expand);
- setOperationAction(ISD::BR_CC, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ unsigned OpIdx = NElts - i - 1;
+ SDValue Operand = Op.getOperand(OpIdx);
- setOperationAction(ISD::FSIN, MVT::f64, Expand);
- setOperationAction(ISD::FCOS, MVT::f64, Expand);
- setOperationAction(ISD::FREM, MVT::f64, Expand);
- setOperationAction(ISD::FSIN, MVT::f32, Expand);
- setOperationAction(ISD::FCOS, MVT::f32, Expand);
- setOperationAction(ISD::FREM, MVT::f32, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
-
- // In V4, we have double word add/sub with carry. The problem with
- // modelling this instruction is that it produces 2 results - Rdd and Px.
- // To model update of Px, we will have to use Defs[p0..p3] which will
- // cause any predicate live range to spill. So, we pretend we dont't
- // have these instructions.
- setOperationAction(ISD::ADDE, MVT::i8, Expand);
- setOperationAction(ISD::ADDE, MVT::i16, Expand);
- setOperationAction(ISD::ADDE, MVT::i32, Expand);
- setOperationAction(ISD::ADDE, MVT::i64, Expand);
- setOperationAction(ISD::SUBE, MVT::i8, Expand);
- setOperationAction(ISD::SUBE, MVT::i16, Expand);
- setOperationAction(ISD::SUBE, MVT::i32, Expand);
- setOperationAction(ISD::SUBE, MVT::i64, Expand);
- setOperationAction(ISD::ADDC, MVT::i8, Expand);
- setOperationAction(ISD::ADDC, MVT::i16, Expand);
- setOperationAction(ISD::ADDC, MVT::i32, Expand);
- setOperationAction(ISD::ADDC, MVT::i64, Expand);
- setOperationAction(ISD::SUBC, MVT::i8, Expand);
- setOperationAction(ISD::SUBC, MVT::i16, Expand);
- setOperationAction(ISD::SUBC, MVT::i32, Expand);
- setOperationAction(ISD::SUBC, MVT::i64, Expand);
-
- setOperationAction(ISD::CTPOP, MVT::i32, Expand);
- setOperationAction(ISD::CTPOP, MVT::i64, Expand);
- setOperationAction(ISD::CTTZ, MVT::i32, Expand);
- setOperationAction(ISD::CTTZ, MVT::i64, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
- setOperationAction(ISD::CTLZ, MVT::i32, Expand);
- setOperationAction(ISD::CTLZ, MVT::i64, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
- setOperationAction(ISD::ROTL, MVT::i32, Expand);
- setOperationAction(ISD::ROTR, MVT::i32, Expand);
- setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- setOperationAction(ISD::FPOW, MVT::f64, Expand);
- setOperationAction(ISD::FPOW, MVT::f32, Expand);
-
- setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
-
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
-
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ if (VT.getSizeInBits() == 64 &&
+ Operand.getValueType().getSizeInBits() == 32) {
+ SDValue C = DAG.getConstant(0, dl, MVT::i32);
+ Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
+ }
- setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+ SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64);
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width);
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+ const SDValue Ops[] = {ConstVal, Operand, Combined};
- if (Subtarget.isSubtargetV2()) {
- setExceptionPointerRegister(Hexagon::R20);
- setExceptionSelectorRegister(Hexagon::R21);
- } else {
- setExceptionPointerRegister(Hexagon::R0);
- setExceptionSelectorRegister(Hexagon::R1);
+ if (VT.getSizeInBits() == 32)
+ ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
+ else
+ ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
}
- // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
- setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal);
+}
- // Use the default implementation.
- setOperationAction(ISD::VAARG, MVT::Other, Expand);
- setOperationAction(ISD::VACOPY, MVT::Other, Expand);
- setOperationAction(ISD::VAEND, MVT::Other, Expand);
- setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+SDValue
+HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
+ SDLoc dl(Op);
+ SDValue Idx = Op.getOperand(1);
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ int EltSize = EltVT.getSizeInBits();
+ SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT ?
+ EltSize : VTN * EltSize, dl, MVT::i64);
+
+ // Constant element number.
+ if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Idx)) {
+ uint64_t X = CI->getZExtValue();
+ SDValue Offset = DAG.getConstant(X * EltSize, dl, MVT::i32);
+ const SDValue Ops[] = {Vec, Width, Offset};
+
+ ConstantSDNode *CW = dyn_cast<ConstantSDNode>(Width);
+ assert(CW && "Non constant width in LowerEXTRACT_VECTOR");
+
+ SDValue N;
+ MVT SVT = VecVT.getSimpleVT();
+ uint64_t W = CW->getZExtValue();
+
+ if (W == 32) {
+ // Translate this node into EXTRACT_SUBREG.
+ unsigned Subreg = (X == 0) ? Hexagon::subreg_loreg : 0;
+
+ if (X == 0)
+ Subreg = Hexagon::subreg_loreg;
+ else if (SVT == MVT::v2i32 && X == 1)
+ Subreg = Hexagon::subreg_hireg;
+ else if (SVT == MVT::v4i16 && X == 2)
+ Subreg = Hexagon::subreg_hireg;
+ else if (SVT == MVT::v8i8 && X == 4)
+ Subreg = Hexagon::subreg_hireg;
+ else
+ llvm_unreachable("Bad offset");
+ N = DAG.getTargetExtractSubreg(Subreg, dl, MVT::i32, Vec);
+
+ } else if (VecVT.getSizeInBits() == 32) {
+ N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i32, Ops);
+ } else {
+ N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i64, Ops);
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
+ }
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
- setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
+ }
- setMinFunctionAlignment(2);
+ // Variable element number.
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx,
+ DAG.getConstant(EltSize, dl, MVT::i32));
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, dl, MVT::i64));
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
- // Needed for DYNAMIC_STACKALLOC expansion.
- const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
- setStackPointerRegisterToSaveRestore(QRI->getStackRegister());
- setSchedulingPreference(Sched::VLIW);
-}
+ const SDValue Ops[] = {Vec, Combined};
-const char*
-HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default: return nullptr;
- case HexagonISD::CONST32: return "HexagonISD::CONST32";
- case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
- case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real";
- case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
- case HexagonISD::CMPICC: return "HexagonISD::CMPICC";
- case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC";
- case HexagonISD::BRICC: return "HexagonISD::BRICC";
- case HexagonISD::BRFCC: return "HexagonISD::BRFCC";
- case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC";
- case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC";
- case HexagonISD::Hi: return "HexagonISD::Hi";
- case HexagonISD::Lo: return "HexagonISD::Lo";
- case HexagonISD::FTOI: return "HexagonISD::FTOI";
- case HexagonISD::ITOF: return "HexagonISD::ITOF";
- case HexagonISD::CALL: return "HexagonISD::CALL";
- case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
- case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
- case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
- case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
+ SDValue N;
+ if (VecVT.getSizeInBits() == 32) {
+ N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i32, Ops);
+ } else {
+ N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i64, Ops);
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
}
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
}
-bool
-HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
- EVT MTy1 = EVT::getEVT(Ty1);
- EVT MTy2 = EVT::getEVT(Ty2);
- if (!MTy1.isSimple() || !MTy2.isSimple()) {
- return false;
+SDValue
+HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
+ SDLoc dl(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue Val = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ int EltSize = EltVT.getSizeInBits();
+ SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::INSERT_VECTOR_ELT ?
+ EltSize : VTN * EltSize, dl, MVT::i64);
+
+ if (ConstantSDNode *C = cast<ConstantSDNode>(Idx)) {
+ SDValue Offset = DAG.getConstant(C->getSExtValue() * EltSize, dl, MVT::i32);
+ const SDValue Ops[] = {Vec, Val, Width, Offset};
+
+ SDValue N;
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, Ops);
+ else
+ N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, Ops);
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
}
- return ((MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32));
-}
-bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
- if (!VT1.isSimple() || !VT2.isSimple()) {
- return false;
+ // Variable element number.
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx,
+ DAG.getConstant(EltSize, dl, MVT::i32));
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, dl, MVT::i64));
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+
+ if (VT.getSizeInBits() == 64 &&
+ Val.getValueType().getSizeInBits() == 32) {
+ SDValue C = DAG.getConstant(0, dl, MVT::i32);
+ Val = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Val);
}
- return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32));
+
+ const SDValue Ops[] = {Vec, Val, Combined};
+
+ SDValue N;
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
+ else
+ N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
}
bool
@@ -1532,7 +2261,7 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
DAG.getRegister(Hexagon::R30, getPointerTy()),
- DAG.getIntPtrConstant(4));
+ DAG.getIntPtrConstant(4, dl));
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
@@ -1545,43 +2274,54 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Should not custom lower this!");
- case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
- case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
- // Frame & Return address. Currently unimplemented.
- case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
- case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
- case ISD::GlobalTLSAddress:
- llvm_unreachable("TLS not implemented for Hexagon.");
- case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
- case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
- case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
- case ISD::VASTART: return LowerVASTART(Op, DAG);
- case ISD::BR_JT: return LowerBR_JT(Op, DAG);
-
- case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
- case ISD::SELECT: return Op;
- case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
-
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ default:
+#ifndef NDEBUG
+ Op.getNode()->dumpr(&DAG);
+ if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
+ errs() << "Check for a non-legal type in this operation\n";
+#endif
+ llvm_unreachable("Should not custom lower this!");
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::INSERT_SUBVECTOR: return LowerINSERT_VECTOR(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_VECTOR(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR(Op, DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::SRA:
+ case ISD::SHL:
+ case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
+ // Frame & Return address. Currently unimplemented.
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
+ case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ // Custom lower some vector loads.
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::VSELECT: return LowerVSELECT(Op, DAG);
+ case ISD::CTPOP: return LowerCTPOP(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
}
}
-
-
-//===----------------------------------------------------------------------===//
-// Hexagon Scheduler Hooks
-//===----------------------------------------------------------------------===//
MachineBasicBlock *
HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB)
-const {
+ const {
switch (MI->getOpcode()) {
- case Hexagon::ADJDYNALLOC: {
+ case Hexagon::ALLOCA: {
MachineFunction *MF = BB->getParent();
- HexagonMachineFunctionInfo *FuncInfo =
- MF->getInfo<HexagonMachineFunctionInfo>();
+ auto *FuncInfo = MF->getInfo<HexagonMachineFunctionInfo>();
FuncInfo->addAllocaAdjustInst(MI);
return BB;
}
@@ -1593,10 +2333,10 @@ const {
// Inline Assembly Support
//===----------------------------------------------------------------------===//
-std::pair<unsigned, const TargetRegisterClass*>
-HexagonTargetLowering::getRegForInlineAsmConstraint(const
- std::string &Constraint,
- MVT VT) const {
+std::pair<unsigned, const TargetRegisterClass *>
+HexagonTargetLowering::getRegForInlineAsmConstraint(
+ const TargetRegisterInfo *TRI, const std::string &Constraint,
+ MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r': // R0-R31
@@ -1617,14 +2357,14 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(const
}
}
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- return TM.getSubtarget<HexagonSubtarget>().hasV5TOps();
+ return Subtarget.hasV5TOps();
}
/// isLegalAddressingMode - Return true if the addressing mode represented by
@@ -1632,14 +2372,12 @@ bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM,
Type *Ty) const {
// Allows a signed-extended 11-bit immediate field.
- if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) {
+ if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1)
return false;
- }
// No global is ever allowed as a base.
- if (AM.BaseGV) {
+ if (AM.BaseGV)
return false;
- }
int Scale = AM.Scale;
if (Scale < 0) Scale = -Scale;
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index d03b1b8d9f4a..584c2c57c7ca 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -26,127 +26,145 @@ namespace llvm {
bool isPositiveHalfWord(SDNode *N);
namespace HexagonISD {
- enum {
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ enum NodeType : unsigned {
+ OP_BEGIN = ISD::BUILTIN_OP_END,
- CONST32,
+ CONST32 = OP_BEGIN,
CONST32_GP, // For marking data present in GP.
- CONST32_Int_Real,
FCONST32,
- SETCC,
- ADJDYNALLOC,
+ ALLOCA,
ARGEXTEND,
- CMPICC, // Compare two GPR operands, set icc.
- CMPFCC, // Compare two FP operands, set fcc.
- BRICC, // Branch to dest on icc condition
- BRFCC, // Branch to dest on fcc condition
- SELECT_ICC, // Select between two values using the current ICC flags.
- SELECT_FCC, // Select between two values using the current FCC flags.
+ PIC_ADD,
+ AT_GOT,
+ AT_PCREL,
- Hi, Lo, // Hi/Lo operations, typically on a global address.
+ CALLv3, // A V3+ call instruction.
+ CALLv3nr, // A V3+ call instruction that doesn't return.
+ CALLR,
- FTOI, // FP to Int within a FP register.
- ITOF, // Int to FP within a FP register.
-
- CALL, // A call instruction.
RET_FLAG, // Return with a flag operand.
- BR_JT, // Jump table.
- BARRIER, // Memory barrier
+ BR_JT, // Branch through jump table.
+ BARRIER, // Memory barrier.
+ JT, // Jump table.
+ CP, // Constant pool.
+
POPCOUNT,
COMBINE,
- WrapperJT,
- WrapperCP,
- WrapperCombineII,
- WrapperCombineRR,
- WrapperCombineRI_V4,
- WrapperCombineIR_V4,
- WrapperPackhl,
- WrapperSplatB,
- WrapperSplatH,
- WrapperShuffEB,
- WrapperShuffEH,
- WrapperShuffOB,
- WrapperShuffOH,
+ PACKHL,
+ VSPLATB,
+ VSPLATH,
+ SHUFFEB,
+ SHUFFEH,
+ SHUFFOB,
+ SHUFFOH,
+ VSXTBH,
+ VSXTBW,
+ VSRAW,
+ VSRAH,
+ VSRLW,
+ VSRLH,
+ VSHLW,
+ VSHLH,
+ VCMPBEQ,
+ VCMPBGT,
+ VCMPBGTU,
+ VCMPHEQ,
+ VCMPHGT,
+ VCMPHGTU,
+ VCMPWEQ,
+ VCMPWGT,
+ VCMPWGTU,
+
+ INSERT,
+ INSERTRP,
+ EXTRACTU,
+ EXTRACTURP,
TC_RETURN,
EH_RETURN,
- DCFETCH
+ DCFETCH,
+
+ OP_END
};
}
+ class HexagonSubtarget;
+
class HexagonTargetLowering : public TargetLowering {
int VarArgsFrameOffset; // Frame offset to start of varargs area.
- bool CanReturnSmallStruct(const Function* CalleeFn,
- unsigned& RetSize) const;
+ bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize)
+ const;
+ void promoteLdStType(EVT VT, EVT PromotedLdStVT);
+ const HexagonTargetMachine &HTM;
+ const HexagonSubtarget &Subtarget;
public:
- const TargetMachine &TM;
- explicit HexagonTargetLowering(const TargetMachine &targetmachine);
+ explicit HexagonTargetLowering(const TargetMachine &TM,
+ const HexagonSubtarget &ST);
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
- bool
- IsEligibleForTailCallOptimization(SDValue Callee,
- CallingConv::ID CalleeCC,
- bool isVarArg,
- bool isCalleeStructRet,
- bool isCallerStructRet,
- const
- SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const;
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet,
+ bool isCallerStructRet, const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const;
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
bool isTruncateFree(EVT VT1, EVT VT2) const override;
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
- SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ // Should we expand the build vector with shuffles?
+ bool shouldExpandBuildVectorWithShuffles(EVT VT,
+ unsigned DefinedValues) const override;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
const char *getTargetNodeName(unsigned Opcode) const override;
- SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const override;
-
+ SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals,
- const SmallVectorImpl<SDValue> &OutVals,
- SDValue Callee) const;
-
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+ const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const;
+
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- SDLoc dl, SelectionDAG &DAG) const override;
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
+ SelectionDAG &DAG) const override;
- MachineBasicBlock *
- EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *BB) const override;
+ bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const override;
- SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
EVT getSetCCResultType(LLVMContext &C, EVT VT) const override {
if (!VT.isVector())
return MVT::i1;
@@ -159,10 +177,20 @@ bool isPositiveHalfWord(SDNode *N);
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
- std::pair<unsigned, const TargetRegisterClass*>
- getRegForInlineAsmConstraint(const std::string &Constraint,
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
MVT VT) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
+ else if (ConstraintCode == "v")
+ return InlineAsm::Constraint_v;
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
// Intrinsics
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
/// isLegalAddressingMode - Return true if the addressing mode represented
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index 8373652c8f64..36a7e9f642c6 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -28,20 +28,12 @@ def TypeXTYPE : IType<8>;
def TypeENDLOOP: IType<31>;
// Maintain list of valid subtargets for each instruction.
-class SubTarget<bits<4> value> {
- bits<4> Value = value;
+class SubTarget<bits<6> value> {
+ bits<6> Value = value;
}
-def HasV2SubT : SubTarget<0xf>;
-def HasV2SubTOnly : SubTarget<0x1>;
-def NoV2SubT : SubTarget<0x0>;
-def HasV3SubT : SubTarget<0xe>;
-def HasV3SubTOnly : SubTarget<0x2>;
-def NoV3SubT : SubTarget<0x1>;
-def HasV4SubT : SubTarget<0xc>;
-def NoV4SubT : SubTarget<0x3>;
-def HasV5SubT : SubTarget<0x8>;
-def NoV5SubT : SubTarget<0x7>;
+def HasAnySubT : SubTarget<0x3f>; // 111111
+def HasV5SubT : SubTarget<0x3e>; // 111110
// Addressing modes for load/store instructions
class AddrModeType<bits<3> value> {
@@ -56,8 +48,8 @@ def BaseLongOffset : AddrModeType<4>; // Indirect with long offset
def BaseRegOffset : AddrModeType<5>; // Indirect with register offset
def PostInc : AddrModeType<6>; // Post increment addressing mode
-class MemAccessSize<bits<3> value> {
- bits<3> Value = value;
+class MemAccessSize<bits<4> value> {
+ bits<4> Value = value;
}
def NoMemAccess : MemAccessSize<0>;// Not a memory acces instruction.
@@ -84,7 +76,7 @@ class OpcodeHexagon {
class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
string cstr, InstrItinClass itin, IType type>
- : Instruction, OpcodeHexagon {
+ : Instruction {
let Namespace = "Hexagon";
dag OutOperandList = outs;
@@ -92,18 +84,18 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
let AsmString = asmstr;
let Pattern = pattern;
let Constraints = cstr;
- let Itinerary = itin;
- let Size = 4;
-
- // SoftFail is a field the disassembler can use to provide a way for
- // instructions to not match without killing the whole decode process. It is
- // mainly used for ARM, but Tablegen expects this field to exist or it fails
- // to build the decode table.
- field bits<32> SoftFail = 0;
-
- // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
-
- // Instruction type according to the ISA.
+ let Itinerary = itin;
+ let Size = 4;
+
+ // SoftFail is a field the disassembler can use to provide a way for
+ // instructions to not match without killing the whole decode process. It is
+ // mainly used for ARM, but Tablegen expects this field to exist or it fails
+ // to build the decode table.
+ field bits<32> SoftFail = 0;
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+ // Instruction type according to the ISA.
IType Type = type;
let TSFlags{4-0} = Type.Value;
@@ -157,11 +149,11 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
bits<2> opExtentAlign = 0;
let TSFlags{33-32} = opExtentAlign; // Alignment exponent before extending.
- // If an instruction is valid on a subtarget (v2-v5), set the corresponding
- // bit from validSubTargets. v2 is the least significant bit.
+ // If an instruction is valid on a subtarget, set the corresponding
+ // bit from validSubTargets.
// By default, instruction is valid on all subtargets.
- SubTarget validSubTargets = HasV2SubT;
- let TSFlags{37-34} = validSubTargets.Value;
+ SubTarget validSubTargets = HasAnySubT;
+ let TSFlags{39-34} = validSubTargets.Value;
// Addressing mode for load/store instructions.
AddrModeType addrMode = NoAddrMode;
@@ -169,7 +161,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
// Memory access size for mem access instructions (load/store)
MemAccessSize accessSize = NoMemAccess;
- let TSFlags{45-43} = accessSize.Value;
+ let TSFlags{46-43} = accessSize.Value;
bits<1> isTaken = 0;
let TSFlags {47} = isTaken; // Branch prediction.
@@ -192,7 +184,6 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
"");
let PNewValue = !if(isPredicatedNew, "new", "");
let NValueST = !if(isNVStore, "true", "false");
- let isCodeGenOnly = 1;
// *** Must match MCTargetDesc/HexagonBaseInfo.h ***
}
@@ -206,7 +197,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
let mayLoad = 1 in
class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
let mayLoad = 1 in
class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
@@ -226,7 +217,7 @@ class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayLoad = 1 in
class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
@@ -234,7 +225,7 @@ class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayStore = 1 in
class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
@@ -243,7 +234,7 @@ class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayStore = 1 in
class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
@@ -256,13 +247,14 @@ class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_3stall_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>,
+ OpcodeHexagon;
// ALU32 Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>, OpcodeHexagon;
// ALU64 Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
@@ -270,7 +262,8 @@ class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ OpcodeHexagon;
class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
@@ -283,7 +276,8 @@ class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ OpcodeHexagon;
// M Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
@@ -299,7 +293,8 @@ class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ OpcodeHexagon;
// S Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
@@ -313,34 +308,37 @@ class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Definition of the instruction class NOT CHANGED.
class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = J_tc_2early_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>, OpcodeHexagon;
// JR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = J_tc_2early_SLOT2>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>, OpcodeHexagon;
// CR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = CR_tc_2early_SLOT3>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>, OpcodeHexagon;
let isCodeGenOnly = 1, isPseudo = 1 in
class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = J_tc_2early_SLOT0123>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>,
+ OpcodeHexagon;
let isCodeGenOnly = 1, isPseudo = 1 in
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>,
+ OpcodeHexagon;
let isCodeGenOnly = 1, isPseudo = 1 in
class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr="">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>,
+ OpcodeHexagon;
//===----------------------------------------------------------------------===//
// Instruction Classes Definitions -
@@ -366,7 +364,6 @@ class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
: ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
-
//
// ALU64 patterns.
//
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index 5fec80bb570a..7f7b2c96dba7 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -17,10 +17,88 @@
// *** Must match BaseInfo.h ***
//----------------------------------------------------------------------------//
-def TypeMEMOP : IType<9>;
-def TypeNV : IType<10>;
+def TypeMEMOP : IType<9>;
+def TypeNV : IType<10>;
+def TypeDUPLEX : IType<11>;
def TypeCOMPOUND : IType<12>;
-def TypePREFIX : IType<30>;
+def TypeAG_VX : IType<28>;
+def TypeAG_VM : IType<29>;
+def TypePREFIX : IType<30>;
+
+// Duplex Instruction Class Declaration
+//===----------------------------------------------------------------------===//
+
+class OpcodeDuplex {
+ field bits<32> Inst = ?; // Default to an invalid insn.
+ bits<4> IClass = 0; // ICLASS
+ bits<13> ISubHi = 0; // Low sub-insn
+ bits<13> ISubLo = 0; // High sub-insn
+
+ let Inst{31-29} = IClass{3-1};
+ let Inst{13} = IClass{0};
+ let Inst{15-14} = 0;
+ let Inst{28-16} = ISubHi;
+ let Inst{12-0} = ISubLo;
+}
+
+class InstDuplex<bits<4> iClass, list<dag> pattern = [],
+ string cstr = "">
+ : Instruction, OpcodeDuplex {
+ let Namespace = "Hexagon";
+ IType Type = TypeDUPLEX; // uses slot 0,1
+ let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins);
+ let IClass = iClass;
+ let Constraints = cstr;
+ let Itinerary = DUPLEX;
+ let Size = 4;
+
+ // SoftFail is a field the disassembler can use to provide a way for
+ // instructions to not match without killing the whole decode process. It is
+ // mainly used for ARM, but Tablegen expects this field to exist or it fails
+ // to build the decode table.
+ field bits<32> SoftFail = 0;
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+ let TSFlags{4-0} = Type.Value;
+
+ // Predicated instructions.
+ bits<1> isPredicated = 0;
+ let TSFlags{6} = isPredicated;
+ bits<1> isPredicatedFalse = 0;
+ let TSFlags{7} = isPredicatedFalse;
+ bits<1> isPredicatedNew = 0;
+ let TSFlags{8} = isPredicatedNew;
+
+ // New-value insn helper fields.
+ bits<1> isNewValue = 0;
+ let TSFlags{9} = isNewValue; // New-value consumer insn.
+ bits<1> hasNewValue = 0;
+ let TSFlags{10} = hasNewValue; // New-value producer insn.
+ bits<3> opNewValue = 0;
+ let TSFlags{13-11} = opNewValue; // New-value produced operand.
+ bits<1> isNVStorable = 0;
+ let TSFlags{14} = isNVStorable; // Store that can become new-value store.
+ bits<1> isNVStore = 0;
+ let TSFlags{15} = isNVStore; // New-value store insn.
+
+ // Immediate extender helper fields.
+ bits<1> isExtendable = 0;
+ let TSFlags{16} = isExtendable; // Insn may be extended.
+ bits<1> isExtended = 0;
+ let TSFlags{17} = isExtended; // Insn must be extended.
+ bits<3> opExtendable = 0;
+ let TSFlags{20-18} = opExtendable; // Which operand may be extended.
+ bits<1> isExtentSigned = 0;
+ let TSFlags{21} = isExtentSigned; // Signed or unsigned range.
+ bits<5> opExtentBits = 0;
+ let TSFlags{26-22} = opExtentBits; //Number of bits of range before extending.
+ bits<2> opExtentAlign = 0;
+ let TSFlags{28-27} = opExtentAlign; // Alignment exponent before extending.
+}
//----------------------------------------------------------------------------//
// Instruction Classes Definitions
@@ -31,7 +109,7 @@ def TypePREFIX : IType<30>;
//
class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>, OpcodeHexagon;
class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
@@ -56,7 +134,8 @@ class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayLoad = 1, mayStore = 1 in
class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>,
+ OpcodeHexagon;
class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
@@ -65,8 +144,9 @@ class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let isCodeGenOnly = 1 in
class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
: InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123,
- TypePREFIX>;
+ TypePREFIX>, OpcodeHexagon;
class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>,
+ OpcodeHexagon;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 5d962590a705..49b4517698d5 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -62,10 +62,8 @@ const int Hexagon_MEMB_AUTOINC_MIN = -8;
void HexagonInstrInfo::anchor() {}
HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
- : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
- RI(ST), Subtarget(ST) {
-}
-
+ : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
+ RI(), Subtarget(ST) {}
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
@@ -117,68 +115,172 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
+// Find the hardware loop instruction used to set-up the specified loop.
+// On Hexagon, we have two instructions used to set-up the hardware loop
+// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
+// to indicate the end of a loop.
+static MachineInstr *
+findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
+ SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
+ int LOOPi;
+ int LOOPr;
+ if (EndLoopOp == Hexagon::ENDLOOP0) {
+ LOOPi = Hexagon::J2_loop0i;
+ LOOPr = Hexagon::J2_loop0r;
+ } else { // EndLoopOp == Hexagon::EndLOOP1
+ LOOPi = Hexagon::J2_loop1i;
+ LOOPr = Hexagon::J2_loop1r;
+ }
-unsigned
-HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const{
-
- int BOpc = Hexagon::J2_jump;
- int BccOpc = Hexagon::J2_jumpt;
-
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-
- int regPos = 0;
- // Check if ReverseBranchCondition has asked to reverse this branch
- // If we want to reverse the branch an odd number of times, we want
- // JMP_f.
- if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
- BccOpc = Hexagon::J2_jumpf;
- regPos = 1;
+ // The loop set-up instruction will be in a predecessor block
+ for (MachineBasicBlock::pred_iterator PB = BB->pred_begin(),
+ PE = BB->pred_end(); PB != PE; ++PB) {
+ // If this has been visited, already skip it.
+ if (!Visited.insert(*PB).second)
+ continue;
+ if (*PB == BB)
+ continue;
+ for (MachineBasicBlock::reverse_instr_iterator I = (*PB)->instr_rbegin(),
+ E = (*PB)->instr_rend(); I != E; ++I) {
+ int Opc = I->getOpcode();
+ if (Opc == LOOPi || Opc == LOOPr)
+ return &*I;
+ // We've reached a different loop, which means the loop0 has been removed.
+ if (Opc == EndLoopOp)
+ return 0;
}
+ // Check the predecessors for the LOOP instruction.
+ MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited);
+ if (loop)
+ return loop;
+ }
+ return 0;
+}
- if (!FBB) {
- if (Cond.empty()) {
- // Due to a bug in TailMerging/CFG Optimization, we need to add a
- // special case handling of a predicated jump followed by an
- // unconditional jump. If not, Tail Merging and CFG Optimization go
- // into an infinite loop.
- MachineBasicBlock *NewTBB, *NewFBB;
- SmallVector<MachineOperand, 4> Cond;
- MachineInstr *Term = MBB.getFirstTerminator();
- if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond,
- false)) {
- MachineBasicBlock *NextBB =
- std::next(MachineFunction::iterator(&MBB));
- if (NewTBB == NextBB) {
- ReverseBranchCondition(Cond);
- RemoveBranch(MBB);
- return InsertBranch(MBB, TBB, nullptr, Cond, DL);
- }
+unsigned HexagonInstrInfo::InsertBranch(
+ MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
+
+ Opcode_t BOpc = Hexagon::J2_jump;
+ Opcode_t BccOpc = Hexagon::J2_jumpt;
+
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ // Check if ReverseBranchCondition has asked to reverse this branch
+ // If we want to reverse the branch an odd number of times, we want
+ // J2_jumpf.
+ if (!Cond.empty() && Cond[0].isImm())
+ BccOpc = Cond[0].getImm();
+
+ if (!FBB) {
+ if (Cond.empty()) {
+ // Due to a bug in TailMerging/CFG Optimization, we need to add a
+ // special case handling of a predicated jump followed by an
+ // unconditional jump. If not, Tail Merging and CFG Optimization go
+ // into an infinite loop.
+ MachineBasicBlock *NewTBB, *NewFBB;
+ SmallVector<MachineOperand, 4> Cond;
+ MachineInstr *Term = MBB.getFirstTerminator();
+ if (Term != MBB.end() && isPredicated(Term) &&
+ !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) {
+ MachineBasicBlock *NextBB =
+ std::next(MachineFunction::iterator(&MBB));
+ if (NewTBB == NextBB) {
+ ReverseBranchCondition(Cond);
+ RemoveBranch(MBB);
+ return InsertBranch(MBB, TBB, nullptr, Cond, DL);
}
- BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
- } else {
- BuildMI(&MBB, DL,
- get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
}
- return 1;
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ } else if (isEndLoopN(Cond[0].getImm())) {
+ int EndLoopOp = Cond[0].getImm();
+ assert(Cond[1].isMBB());
+ // Since we're adding an ENDLOOP, there better be a LOOP instruction.
+ // Check for it, and change the BB target if needed.
+ SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+ MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+ assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
+ Loop->getOperand(0).setMBB(TBB);
+ // Add the ENDLOOP after the finding the LOOP0.
+ BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
+ } else if (isNewValueJump(Cond[0].getImm())) {
+ assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump");
+ // New value jump
+ // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset)
+ // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset)
+ unsigned Flags1 = getUndefRegState(Cond[1].isUndef());
+ DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber(););
+ if (Cond[2].isReg()) {
+ unsigned Flags2 = getUndefRegState(Cond[2].isUndef());
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
+ addReg(Cond[2].getReg(), Flags2).addMBB(TBB);
+ } else if(Cond[2].isImm()) {
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
+ addImm(Cond[2].getImm()).addMBB(TBB);
+ } else
+ llvm_unreachable("Invalid condition for branching");
+ } else {
+ assert((Cond.size() == 2) && "Malformed cond vector");
+ const MachineOperand &RO = Cond[1];
+ unsigned Flags = getUndefRegState(RO.isUndef());
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
}
+ return 1;
+ }
+ assert((!Cond.empty()) &&
+ "Cond. cannot be empty when multiple branchings are required");
+ assert((!isNewValueJump(Cond[0].getImm())) &&
+ "NV-jump cannot be inserted with another branch");
+ // Special case for hardware loops. The condition is a basic block.
+ if (isEndLoopN(Cond[0].getImm())) {
+ int EndLoopOp = Cond[0].getImm();
+ assert(Cond[1].isMBB());
+ // Since we're adding an ENDLOOP, there better be a LOOP instruction.
+ // Check for it, and change the BB target if needed.
+ SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+ MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+ assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
+ Loop->getOperand(0).setMBB(TBB);
+ // Add the ENDLOOP after the finding the LOOP0.
+ BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
+ } else {
+ const MachineOperand &RO = Cond[1];
+ unsigned Flags = getUndefRegState(RO.isUndef());
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+ }
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
- BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
- BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
-
- return 2;
+ return 2;
}
+/// This function can analyze one/two way branching only and should (mostly) be
+/// called by target independent side.
+/// First entry is always the opcode of the branching instruction, except when
+/// the Cond vector is supposed to be empty, e.g., when AnalyzeBranch fails, a
+/// BB with only unconditional jump. Subsequent entries depend upon the opcode,
+/// e.g. Jump_c p will have
+/// Cond[0] = Jump_c
+/// Cond[1] = p
+/// HW-loop ENDLOOP:
+/// Cond[0] = ENDLOOP
+/// Cond[1] = MBB
+/// New value jump:
+/// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode
+/// Cond[1] = R
+/// Cond[2] = Imm
+/// @note Related function is \fn findInstrPredicate which fills in
+/// Cond. vector when a predicated instruction is passed to it.
+/// We follow same protocol in that case too.
+///
bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
TBB = nullptr;
FBB = nullptr;
+ Cond.clear();
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::instr_iterator I = MBB.instr_end();
@@ -200,6 +302,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
do {
--I;
if (I->isEHLabel())
+ // Don't analyze EH branches.
return true;
} while (I != MBB.instr_begin());
@@ -211,9 +314,11 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
--I;
}
-
- // Delete the JMP if it's equivalent to a fall-through.
- if (AllowModify && I->getOpcode() == Hexagon::J2_jump &&
+
+ bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump &&
+ I->getOperand(0).isMBB();
+ // Delete the J2_jump if it's equivalent to a fall-through.
+ if (AllowModify && JumpToBlock &&
MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
I->eraseFromParent();
@@ -243,9 +348,17 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
} while(I);
int LastOpcode = LastInst->getOpcode();
+ int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0;
+ // If the branch target is not a basic block, it could be a tail call.
+ // (It is, if the target is a function.)
+ if (LastOpcode == Hexagon::J2_jump && !LastInst->getOperand(0).isMBB())
+ return true;
+ if (SecLastOpcode == Hexagon::J2_jump &&
+ !SecondLastInst->getOperand(0).isMBB())
+ return true;
bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode);
- bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode);
+ bool LastOpcodeHasNVJump = isNewValueJump(LastInst);
// If there is only one terminator instruction, process it.
if (LastInst && !SecondLastInst) {
@@ -253,32 +366,50 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
TBB = LastInst->getOperand(0).getMBB();
return false;
}
- if (LastOpcode == Hexagon::ENDLOOP0) {
+ if (isEndLoopN(LastOpcode)) {
TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
Cond.push_back(LastInst->getOperand(0));
return false;
}
if (LastOpcodeHasJMP_c) {
TBB = LastInst->getOperand(1).getMBB();
- if (LastOpcodeHasNot) {
- Cond.push_back(MachineOperand::CreateImm(0));
- }
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
Cond.push_back(LastInst->getOperand(0));
return false;
}
+ // Only supporting rr/ri versions of new-value jumps.
+ if (LastOpcodeHasNVJump && (LastInst->getNumExplicitOperands() == 3)) {
+ TBB = LastInst->getOperand(2).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ }
+ DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber()
+ << " with one jump\n";);
// Otherwise, don't know what this is.
return true;
}
- int SecLastOpcode = SecondLastInst->getOpcode();
-
bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode);
- bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode);
+ bool SecLastOpcodeHasNVJump = isNewValueJump(SecondLastInst);
if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) {
TBB = SecondLastInst->getOperand(1).getMBB();
- if (SecLastOpcodeHasNot)
- Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // Only supporting rr/ri versions of new-value jumps.
+ if (SecLastOpcodeHasNVJump &&
+ (SecondLastInst->getNumExplicitOperands() == 3) &&
+ (LastOpcode == Hexagon::J2_jump)) {
+ TBB = SecondLastInst->getOperand(2).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
FBB = LastInst->getOperand(0).getMBB();
return false;
}
@@ -293,48 +424,40 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
}
- // If the block ends with an ENDLOOP, and JMP, handle it.
- if (SecLastOpcode == Hexagon::ENDLOOP0 &&
- LastOpcode == Hexagon::J2_jump) {
+ // If the block ends with an ENDLOOP, and J2_jump, handle it.
+ if (isEndLoopN(SecLastOpcode) && LastOpcode == Hexagon::J2_jump) {
TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
Cond.push_back(SecondLastInst->getOperand(0));
FBB = LastInst->getOperand(0).getMBB();
return false;
}
-
+ DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber()
+ << " with two jumps";);
// Otherwise, can't handle this.
return true;
}
-
unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- int BOpc = Hexagon::J2_jump;
- int BccOpc = Hexagon::J2_jumpt;
- int BccOpcNot = Hexagon::J2_jumpf;
-
+ DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber());
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin()) return 0;
- --I;
- if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc &&
- I->getOpcode() != BccOpcNot)
- return 0;
-
- // Remove the branch.
- I->eraseFromParent();
-
- I = MBB.end();
-
- if (I == MBB.begin()) return 1;
- --I;
- if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot)
- return 1;
-
- // Remove the branch.
- I->eraseFromParent();
- return 2;
+ unsigned Count = 0;
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ // Only removing branches from end of MBB.
+ if (!I->isBranch())
+ return Count;
+ if (Count && (I->getOpcode() == Hexagon::J2_jump))
+ llvm_unreachable("Malformed basic block: unconditional branch not last");
+ MBB.erase(&MBB.back());
+ I = MBB.end();
+ ++Count;
+ }
+ return Count;
}
-
/// \brief For a comparison instruction, return the source registers in
/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
@@ -346,33 +469,39 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
// Set mask and the first source register.
switch (Opc) {
- case Hexagon::C2_cmpeqp:
- case Hexagon::C2_cmpeqi:
case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpeqp:
+ case Hexagon::C2_cmpgt:
case Hexagon::C2_cmpgtp:
- case Hexagon::C2_cmpgtup:
- case Hexagon::C2_cmpgtui:
case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgtup:
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmplte:
+ case Hexagon::C4_cmplteu:
+ case Hexagon::C2_cmpeqi:
case Hexagon::C2_cmpgti:
- case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C4_cmpneqi:
+ case Hexagon::C4_cmplteui:
+ case Hexagon::C4_cmpltei:
SrcReg = MI->getOperand(1).getReg();
Mask = ~0;
break;
- case Hexagon::CMPbEQri_V4:
- case Hexagon::CMPbEQrr_sbsb_V4:
- case Hexagon::CMPbEQrr_ubub_V4:
- case Hexagon::CMPbGTUri_V4:
- case Hexagon::CMPbGTUrr_V4:
- case Hexagon::CMPbGTrr_V4:
+ case Hexagon::A4_cmpbeq:
+ case Hexagon::A4_cmpbgt:
+ case Hexagon::A4_cmpbgtu:
+ case Hexagon::A4_cmpbeqi:
+ case Hexagon::A4_cmpbgti:
+ case Hexagon::A4_cmpbgtui:
SrcReg = MI->getOperand(1).getReg();
Mask = 0xFF;
break;
- case Hexagon::CMPhEQri_V4:
- case Hexagon::CMPhEQrr_shl_V4:
- case Hexagon::CMPhEQrr_xor_V4:
- case Hexagon::CMPhGTUri_V4:
- case Hexagon::CMPhGTUrr_V4:
- case Hexagon::CMPhGTrr_shl_V4:
+ case Hexagon::A4_cmpheq:
+ case Hexagon::A4_cmphgt:
+ case Hexagon::A4_cmphgtu:
+ case Hexagon::A4_cmpheqi:
+ case Hexagon::A4_cmphgti:
+ case Hexagon::A4_cmphgtui:
SrcReg = MI->getOperand(1).getReg();
Mask = 0xFFFF;
break;
@@ -380,30 +509,36 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
// Set the value/second source register.
switch (Opc) {
- case Hexagon::C2_cmpeqp:
case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpeqp:
+ case Hexagon::C2_cmpgt:
case Hexagon::C2_cmpgtp:
- case Hexagon::C2_cmpgtup:
case Hexagon::C2_cmpgtu:
- case Hexagon::C2_cmpgt:
- case Hexagon::CMPbEQrr_sbsb_V4:
- case Hexagon::CMPbEQrr_ubub_V4:
- case Hexagon::CMPbGTUrr_V4:
- case Hexagon::CMPbGTrr_V4:
- case Hexagon::CMPhEQrr_shl_V4:
- case Hexagon::CMPhEQrr_xor_V4:
- case Hexagon::CMPhGTUrr_V4:
- case Hexagon::CMPhGTrr_shl_V4:
+ case Hexagon::C2_cmpgtup:
+ case Hexagon::A4_cmpbeq:
+ case Hexagon::A4_cmpbgt:
+ case Hexagon::A4_cmpbgtu:
+ case Hexagon::A4_cmpheq:
+ case Hexagon::A4_cmphgt:
+ case Hexagon::A4_cmphgtu:
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmplte:
+ case Hexagon::C4_cmplteu:
SrcReg2 = MI->getOperand(2).getReg();
return true;
case Hexagon::C2_cmpeqi:
case Hexagon::C2_cmpgtui:
case Hexagon::C2_cmpgti:
- case Hexagon::CMPbEQri_V4:
- case Hexagon::CMPbGTUri_V4:
- case Hexagon::CMPhEQri_V4:
- case Hexagon::CMPhGTUri_V4:
+ case Hexagon::C4_cmpneqi:
+ case Hexagon::C4_cmplteui:
+ case Hexagon::C4_cmpltei:
+ case Hexagon::A4_cmpbeqi:
+ case Hexagon::A4_cmpbgti:
+ case Hexagon::A4_cmpbgtui:
+ case Hexagon::A4_cmpheqi:
+ case Hexagon::A4_cmphgti:
+ case Hexagon::A4_cmphgtui:
SrcReg2 = 0;
Value = MI->getOperand(2).getImm();
return true;
@@ -553,12 +688,101 @@ void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
llvm_unreachable("Unimplemented");
}
+bool
+HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ const HexagonRegisterInfo &TRI = getRegisterInfo();
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Opc = MI->getOpcode();
+
+ switch (Opc) {
+ case Hexagon::ALIGNA:
+ BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg())
+ .addReg(TRI.getFrameRegister())
+ .addImm(-MI->getOperand(1).getImm());
+ MBB.erase(MI);
+ return true;
+ case Hexagon::TFR_PdTrue: {
+ unsigned Reg = MI->getOperand(0).getReg();
+ BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::TFR_PdFalse: {
+ unsigned Reg = MI->getOperand(0).getReg();
+ BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::VMULW: {
+ // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies.
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned Src1Reg = MI->getOperand(1).getReg();
+ unsigned Src2Reg = MI->getOperand(2).getReg();
+ unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+ unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+ unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+ unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
+ TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+ .addReg(Src2SubHi);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
+ TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+ .addReg(Src2SubLo);
+ MBB.erase(MI);
+ MRI.clearKillFlags(Src1SubHi);
+ MRI.clearKillFlags(Src1SubLo);
+ MRI.clearKillFlags(Src2SubHi);
+ MRI.clearKillFlags(Src2SubLo);
+ return true;
+ }
+ case Hexagon::VMULW_ACC: {
+ // Expand 64-bit vector multiply with addition into 2 scalar multiplies.
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned Src1Reg = MI->getOperand(1).getReg();
+ unsigned Src2Reg = MI->getOperand(2).getReg();
+ unsigned Src3Reg = MI->getOperand(3).getReg();
+ unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+ unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+ unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+ unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+ unsigned Src3SubHi = TRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
+ unsigned Src3SubLo = TRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
+ TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+ .addReg(Src2SubHi).addReg(Src3SubHi);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
+ TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+ .addReg(Src2SubLo).addReg(Src3SubLo);
+ MBB.erase(MI);
+ MRI.clearKillFlags(Src1SubHi);
+ MRI.clearKillFlags(Src1SubLo);
+ MRI.clearKillFlags(Src2SubHi);
+ MRI.clearKillFlags(Src2SubLo);
+ MRI.clearKillFlags(Src3SubHi);
+ MRI.clearKillFlags(Src3SubLo);
+ return true;
+ }
+ case Hexagon::TCRETURNi:
+ MI->setDesc(get(Hexagon::J2_jump));
+ return true;
+ case Hexagon::TCRETURNr:
+ MI->setDesc(get(Hexagon::J2_jumpr));
+ return true;
+ }
+ return false;
+}
MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FI) const {
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FI) const {
// Hexagon_TODO: Implement.
return nullptr;
}
@@ -582,10 +806,6 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
}
bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
- // Constant extenders are allowed only for V4 and above.
- if (!Subtarget.hasV4TOps())
- return false;
-
const MCInstrDesc &MID = MI->getDesc();
const uint64_t F = MID.TSFlags;
if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
@@ -635,6 +855,16 @@ bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
return false;
}
+bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
+bool HexagonInstrInfo::isNewValue(Opcode_t Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4;
}
@@ -649,7 +879,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
switch(Opc) {
case Hexagon::A2_tfrsi:
- return isInt<12>(MI->getOperand(1).getImm());
+ return (isOperandExtended(MI, 1) && isConstExtended(MI)) || isInt<12>(MI->getOperand(1).getImm());
case Hexagon::S2_storerd_io:
return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
@@ -700,7 +930,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
return (isUInt<6>(MI->getOperand(1).getImm()) &&
isInt<6>(MI->getOperand(2).getImm()));
- case Hexagon::ADD_ri:
+ case Hexagon::A2_addi:
return isInt<8>(MI->getOperand(2).getImm());
case Hexagon::A2_aslh:
@@ -709,7 +939,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
case Hexagon::A2_sxth:
case Hexagon::A2_zxtb:
case Hexagon::A2_zxth:
- return Subtarget.hasV4TOps();
+ return true;
}
return true;
@@ -755,8 +985,7 @@ bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask);
}
-int HexagonInstrInfo::
-getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
+int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
enum Hexagon::PredSense inPredSense;
inPredSense = invertPredicate ? Hexagon::PredSense_false :
Hexagon::PredSense_true;
@@ -774,14 +1003,6 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
return !invertPredicate ? Hexagon::C2_ccombinewt :
Hexagon::C2_ccombinewf;
- // Word.
- case Hexagon::STriw_f:
- return !invertPredicate ? Hexagon::S2_pstorerit_io:
- Hexagon::S2_pstorerif_io;
- case Hexagon::STriw_indexed_f:
- return !invertPredicate ? Hexagon::S2_pstorerit_io:
- Hexagon::S2_pstorerif_io;
-
// DEALLOC_RETURN.
case Hexagon::L4_return:
return !invertPredicate ? Hexagon::L4_return_t:
@@ -794,148 +1015,51 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
bool HexagonInstrInfo::
PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Cond) const {
+ if (Cond.empty() || isEndLoopN(Cond[0].getImm())) {
+ DEBUG(dbgs() << "\nCannot predicate:"; MI->dump(););
+ return false;
+ }
int Opc = MI->getOpcode();
assert (isPredicable(MI) && "Expected predicable instruction");
- bool invertJump = (!Cond.empty() && Cond[0].isImm() &&
- (Cond[0].getImm() == 0));
-
- // This will change MI's opcode to its predicate version.
- // However, its operand list is still the old one, i.e. the
- // non-predicate one.
- MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
-
- int oper = -1;
- unsigned int GAIdx = 0;
-
- // Indicates whether the current MI has a GlobalAddress operand
- bool hasGAOpnd = false;
- std::vector<MachineOperand> tmpOpnds;
-
- // Indicates whether we need to shift operands to right.
- bool needShift = true;
-
- // The predicate is ALWAYS the FIRST input operand !!!
- if (MI->getNumOperands() == 0) {
- // The non-predicate version of MI does not take any operands,
- // i.e. no outs and no ins. In this condition, the predicate
- // operand will be directly placed at Operands[0]. No operand
- // shift is needed.
- // Example: BARRIER
- needShift = false;
- oper = -1;
- }
- else if ( MI->getOperand(MI->getNumOperands()-1).isReg()
- && MI->getOperand(MI->getNumOperands()-1).isDef()
- && !MI->getOperand(MI->getNumOperands()-1).isImplicit()) {
- // The non-predicate version of MI does not have any input operands.
- // In this condition, we extend the length of Operands[] by one and
- // copy the original last operand to the newly allocated slot.
- // At this moment, it is just a place holder. Later, we will put
- // predicate operand directly into it. No operand shift is needed.
- // Example: r0=BARRIER (this is a faked insn used here for illustration)
- MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
- needShift = false;
- oper = MI->getNumOperands() - 2;
- }
- else {
- // We need to right shift all input operands by one. Duplicate the
- // last operand into the newly allocated slot.
- MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
- }
-
- if (needShift)
- {
- // Operands[ MI->getNumOperands() - 2 ] has been copied into
- // Operands[ MI->getNumOperands() - 1 ], so we start from
- // Operands[ MI->getNumOperands() - 3 ].
- // oper is a signed int.
- // It is ok if "MI->getNumOperands()-3" is -3, -2, or -1.
- for (oper = MI->getNumOperands() - 3; oper >= 0; --oper)
- {
- MachineOperand &MO = MI->getOperand(oper);
-
- // Opnd[0] Opnd[1] Opnd[2] Opnd[3] Opnd[4] Opnd[5] Opnd[6] Opnd[7]
- // <Def0> <Def1> <Use0> <Use1> <ImpDef0> <ImpDef1> <ImpUse0> <ImpUse1>
- // /\~
- // /||\~
- // ||
- // Predicate Operand here
- if (MO.isReg() && !MO.isUse() && !MO.isImplicit()) {
- break;
- }
- if (MO.isReg()) {
- MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
- MO.isImplicit(), MO.isKill(),
- MO.isDead(), MO.isUndef(),
- MO.isDebug());
- }
- else if (MO.isImm()) {
- MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
- }
- else if (MO.isGlobal()) {
- // MI can not have more than one GlobalAddress operand.
- assert(hasGAOpnd == false && "MI can only have one GlobalAddress opnd");
-
- // There is no member function called "ChangeToGlobalAddress" in the
- // MachineOperand class (not like "ChangeToRegister" and
- // "ChangeToImmediate"). So we have to remove them from Operands[] list
- // first, and then add them back after we have inserted the predicate
- // operand. tmpOpnds[] is to remember these operands before we remove
- // them.
- tmpOpnds.push_back(MO);
-
- // Operands[oper] is a GlobalAddress operand;
- // Operands[oper+1] has been copied into Operands[oper+2];
- hasGAOpnd = true;
- GAIdx = oper;
- continue;
- }
- else {
- llvm_unreachable("Unexpected operand type");
- }
- }
+ bool invertJump = predOpcodeHasNot(Cond);
+
+ // We have to predicate MI "in place", i.e. after this function returns,
+ // MI will need to be transformed into a predicated form. To avoid com-
+ // plicated manipulations with the operands (handling tied operands,
+ // etc.), build a new temporary instruction, then overwrite MI with it.
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned PredOpc = getCondOpcode(Opc, invertJump);
+ MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc));
+ unsigned NOp = 0, NumOps = MI->getNumOperands();
+ while (NOp < NumOps) {
+ MachineOperand &Op = MI->getOperand(NOp);
+ if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
+ break;
+ T.addOperand(Op);
+ NOp++;
}
- int regPos = invertJump ? 1 : 0;
- MachineOperand PredMO = Cond[regPos];
+ unsigned PredReg, PredRegPos, PredRegFlags;
+ bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags);
+ (void)GotPredReg;
+ assert(GotPredReg);
+ T.addReg(PredReg, PredRegFlags);
+ while (NOp < NumOps)
+ T.addOperand(MI->getOperand(NOp++));
- // [oper] now points to the last explicit Def. Predicate operand must be
- // located at [oper+1]. See diagram above.
- // This assumes that the predicate is always the first operand,
- // i.e. Operands[0+numResults], in the set of inputs
- // It is better to have an assert here to check this. But I don't know how
- // to write this assert because findFirstPredOperandIdx() would return -1
- if (oper < -1) oper = -1;
+ MI->setDesc(get(PredOpc));
+ while (unsigned n = MI->getNumOperands())
+ MI->RemoveOperand(n-1);
+ for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i)
+ MI->addOperand(T->getOperand(i));
- MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
- PredMO.isImplicit(), false,
- PredMO.isDead(), PredMO.isUndef(),
- PredMO.isDebug());
+ MachineBasicBlock::instr_iterator TI = &*T;
+ B.erase(TI);
- MachineRegisterInfo &RegInfo = MI->getParent()->getParent()->getRegInfo();
- RegInfo.clearKillFlags(PredMO.getReg());
-
- if (hasGAOpnd)
- {
- unsigned int i;
-
- // Operands[GAIdx] is the original GlobalAddress operand, which is
- // already copied into tmpOpnds[0].
- // Operands[GAIdx] now stores a copy of Operands[GAIdx-1]
- // Operands[GAIdx+1] has already been copied into Operands[GAIdx+2],
- // so we start from [GAIdx+2]
- for (i = GAIdx + 2; i < MI->getNumOperands(); ++i)
- tmpOpnds.push_back(MI->getOperand(i));
-
- // Remove all operands in range [ (GAIdx+1) ... (MI->getNumOperands()-1) ]
- // It is very important that we always remove from the end of Operands[]
- // MI->getNumOperands() is at least 2 if program goes to here.
- for (i = MI->getNumOperands() - 1; i > GAIdx; --i)
- MI->RemoveOperand(i);
-
- for (i = 0; i < tmpOpnds.size(); ++i)
- MI->addOperand(tmpOpnds[i]);
- }
+ MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
+ MRI.clearKillFlags(PredReg);
return true;
}
@@ -1014,12 +1138,10 @@ bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const {
// Returns true, if a ST insn can be promoted to a new-value store.
bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const {
- const HexagonRegisterInfo& QRI = getRegisterInfo();
const uint64_t F = MI->getDesc().TSFlags;
return ((F >> HexagonII::mayNVStorePos) &
- HexagonII::mayNVStoreMask &
- QRI.Subtarget.hasV4TOps());
+ HexagonII::mayNVStoreMask);
}
bool
@@ -1050,15 +1172,20 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
//
// We indicate that we want to reverse the branch by
-// inserting a 0 at the beginning of the Cond vector.
+// inserting the reversed branching opcode.
//
-bool HexagonInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
- if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
- Cond.erase(Cond.begin());
- } else {
- Cond.insert(Cond.begin(), MachineOperand::CreateImm(0));
- }
+bool HexagonInstrInfo::ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ if (Cond.empty())
+ return true;
+ assert(Cond[0].isImm() && "First entry in the cond vector not imm-val");
+ Opcode_t opcode = Cond[0].getImm();
+ //unsigned temp;
+ assert(get(opcode).isBranch() && "Should be a branching condition.");
+ if (isEndLoopN(opcode))
+ return true;
+ Opcode_t NewOpcode = getInvertedPredicatedOpcode(opcode);
+ Cond[0].setImm(NewOpcode);
return false;
}
@@ -1084,10 +1211,10 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
}
-bool HexagonInstrInfo::
-isValidOffset(const int Opcode, const int Offset) const {
+bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
+ bool Extend) const {
// This function is to check whether the "Offset" is in the correct range of
- // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is
+ // the given "Opcode". If "Offset" is not in the correct range, "A2_addi" is
// inserted to calculate the final address. Due to this reason, the function
// assumes that the "Offset" has correct alignment.
// We used to assert if the offset was not properly aligned, however,
@@ -1095,19 +1222,23 @@ isValidOffset(const int Opcode, const int Offset) const {
// problem, and we need to allow for it. The front end warns of such
// misaligns with respect to load size.
- switch(Opcode) {
+ switch (Opcode) {
+ case Hexagon::J2_loop0i:
+ case Hexagon::J2_loop1i:
+ return isUInt<10>(Offset);
+ }
+
+ if (Extend)
+ return true;
+ switch (Opcode) {
case Hexagon::L2_loadri_io:
- case Hexagon::LDriw_f:
case Hexagon::S2_storeri_io:
- case Hexagon::STriw_f:
return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
(Offset <= Hexagon_MEMW_OFFSET_MAX);
case Hexagon::L2_loadrd_io:
- case Hexagon::LDrid_f:
case Hexagon::S2_storerd_io:
- case Hexagon::STrid_f:
return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
(Offset <= Hexagon_MEMD_OFFSET_MAX);
@@ -1123,8 +1254,7 @@ isValidOffset(const int Opcode, const int Offset) const {
return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
(Offset <= Hexagon_MEMB_OFFSET_MAX);
- case Hexagon::ADD_ri:
- case Hexagon::TFR_FI:
+ case Hexagon::A2_addi:
return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
(Offset <= Hexagon_ADDI_OFFSET_MAX);
@@ -1158,10 +1288,8 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::LDriw_pred:
return true;
- case Hexagon::J2_loop0i:
- return isUInt<10>(Offset);
-
- // INLINEASM is very special.
+ case Hexagon::TFR_FI:
+ case Hexagon::TFR_FIA:
case Hexagon::INLINEASM:
return true;
}
@@ -1324,8 +1452,8 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
case Hexagon::A4_pzxthfnew:
case Hexagon::A4_pzxtht:
case Hexagon::A4_pzxthtnew:
- case Hexagon::ADD_ri_cPt:
- case Hexagon::ADD_ri_cNotPt:
+ case Hexagon::A2_paddit:
+ case Hexagon::A2_paddif:
case Hexagon::C2_ccombinewt:
case Hexagon::C2_ccombinewf:
return true;
@@ -1334,7 +1462,6 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
bool HexagonInstrInfo::
isConditionalLoad (const MachineInstr* MI) const {
- const HexagonRegisterInfo& QRI = getRegisterInfo();
switch (MI->getOpcode())
{
default: return false;
@@ -1350,7 +1477,6 @@ isConditionalLoad (const MachineInstr* MI) const {
case Hexagon::L2_ploadruhf_io:
case Hexagon::L2_ploadrubt_io:
case Hexagon::L2_ploadrubf_io:
- return true;
case Hexagon::L2_ploadrdt_pi:
case Hexagon::L2_ploadrdf_pi:
case Hexagon::L2_ploadrit_pi:
@@ -1363,7 +1489,6 @@ isConditionalLoad (const MachineInstr* MI) const {
case Hexagon::L2_ploadruhf_pi:
case Hexagon::L2_ploadrubt_pi:
case Hexagon::L2_ploadrubf_pi:
- return QRI.Subtarget.hasV4TOps();
case Hexagon::L4_ploadrdt_rr:
case Hexagon::L4_ploadrdf_rr:
case Hexagon::L4_ploadrbt_rr:
@@ -1376,7 +1501,7 @@ isConditionalLoad (const MachineInstr* MI) const {
case Hexagon::L4_ploadruhf_rr:
case Hexagon::L4_ploadrit_rr:
case Hexagon::L4_ploadrif_rr:
- return QRI.Subtarget.hasV4TOps();
+ return true;
}
}
@@ -1416,7 +1541,6 @@ isConditionalLoad (const MachineInstr* MI) const {
// is not valid for new-value stores.
bool HexagonInstrInfo::
isConditionalStore (const MachineInstr* MI) const {
- const HexagonRegisterInfo& QRI = getRegisterInfo();
switch (MI->getOpcode())
{
default: return false;
@@ -1450,7 +1574,6 @@ isConditionalStore (const MachineInstr* MI) const {
case Hexagon::S4_pstorerif_rr:
case Hexagon::S2_pstorerit_pi:
case Hexagon::S2_pstorerif_pi:
- return QRI.Subtarget.hasV4TOps();
// V4 global address store before promoting to dot new.
case Hexagon::S4_pstorerdt_abs:
@@ -1461,7 +1584,7 @@ isConditionalStore (const MachineInstr* MI) const {
case Hexagon::S4_pstorerhf_abs:
case Hexagon::S4_pstorerit_abs:
case Hexagon::S4_pstorerif_abs:
- return QRI.Subtarget.hasV4TOps();
+ return true;
// Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
// from the "Conditional Store" list. Because a predicated new value store
@@ -1500,13 +1623,12 @@ bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
return false;
}
-bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
- return (getAddrMode(MI) == HexagonII::PostInc);
+bool HexagonInstrInfo::isNewValueJump(Opcode_t Opcode) const {
+ return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
}
-bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
- const uint64_t F = MI->getDesc().TSFlags;
- return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
+ return (getAddrMode(MI) == HexagonII::PostInc);
}
// Returns true, if any one of the operands is a dot new
@@ -1548,22 +1670,29 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
switch (MI->getOpcode()) {
default: llvm_unreachable("Unknown .new type");
- // store new value byte
- case Hexagon::STrib_shl_V4:
- return Hexagon::STrib_shl_nv_V4;
+ case Hexagon::S4_storerb_ur:
+ return Hexagon::S4_storerbnew_ur;
- case Hexagon::STrih_shl_V4:
- return Hexagon::STrih_shl_nv_V4;
+ case Hexagon::S4_storerh_ur:
+ return Hexagon::S4_storerhnew_ur;
- case Hexagon::STriw_f:
- return Hexagon::S2_storerinew_io;
+ case Hexagon::S4_storeri_ur:
+ return Hexagon::S4_storerinew_ur;
- case Hexagon::STriw_indexed_f:
- return Hexagon::S4_storerinew_rr;
+ case Hexagon::S2_storerb_pci:
+ return Hexagon::S2_storerb_pci;
- case Hexagon::STriw_shl_V4:
- return Hexagon::STriw_shl_nv_V4;
+ case Hexagon::S2_storeri_pci:
+ return Hexagon::S2_storeri_pci;
+ case Hexagon::S2_storerh_pci:
+ return Hexagon::S2_storerh_pci;
+
+ case Hexagon::S2_storerd_pci:
+ return Hexagon::S2_storerd_pci;
+
+ case Hexagon::S2_storerf_pci:
+ return Hexagon::S2_storerf_pci;
}
return 0;
}
@@ -1652,19 +1781,14 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
return false;
}
-bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
-
- // Constant extenders are allowed only for V4 and above.
- if (!Subtarget.hasV4TOps())
- return false;
-
+bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
const uint64_t F = MI->getDesc().TSFlags;
unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
if (isExtended) // Instruction must be extended.
return true;
- unsigned isExtendable = (F >> HexagonII::ExtendablePos)
- & HexagonII::ExtendableMask;
+ unsigned isExtendable =
+ (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
if (!isExtendable)
return false;
@@ -1685,7 +1809,8 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
// We currently only handle isGlobal() because it is the only kind of
// object we are going to end up with here for now.
// In the future we probably should add isSymbol(), etc.
- if (MO.isGlobal() || MO.isSymbol())
+ if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() ||
+ MO.isJTI() || MO.isCPI())
return true;
// If the extendable operand is not 'Immediate' type, the instruction should
@@ -1699,6 +1824,27 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
return (ImmValue < MinValue || ImmValue > MaxValue);
}
+// Return the number of bytes required to encode the instruction.
+// Hexagon instructions are fixed length, 4 bytes, unless they
+// use a constant extender, which requires another 4 bytes.
+// For debug instructions and prolog labels, return 0.
+unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const {
+
+ if (MI->isDebugValue() || MI->isPosition())
+ return 0;
+
+ unsigned Size = MI->getDesc().getSize();
+ if (!Size)
+ // Assume the default insn size in case it cannot be determined
+ // for whatever reason.
+ Size = HEXAGON_INSTR_SIZE;
+
+ if (isConstExtended(MI) || isExtended(MI))
+ Size += HEXAGON_INSTR_SIZE;
+
+ return Size;
+}
+
// Returns the opcode to use when converting MI, which is a conditional jump,
// into a conditional instruction which uses the .new value of the predicate.
// We also use branch probabilities to add a hint to the jump.
@@ -1730,10 +1876,6 @@ HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI,
// Returns true if a particular operand is extendable for an instruction.
bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
unsigned short OperandNum) const {
- // Constant extenders are allowed only for V4 and above.
- if (!Subtarget.hasV4TOps())
- return false;
-
const uint64_t F = MI->getDesc().TSFlags;
return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
@@ -1841,8 +1983,36 @@ bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const {
(Opcode == Hexagon::J2_jumpf);
}
-bool HexagonInstrInfo::PredOpcodeHasNot(Opcode_t Opcode) const {
- return (Opcode == Hexagon::J2_jumpf) ||
- (Opcode == Hexagon::J2_jumpfnewpt) ||
- (Opcode == Hexagon::J2_jumpfnew);
+bool HexagonInstrInfo::predOpcodeHasNot(
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ if (Cond.empty() || !isPredicated(Cond[0].getImm()))
+ return false;
+ return !isPredicatedTrue(Cond[0].getImm());
+}
+
+bool HexagonInstrInfo::isEndLoopN(Opcode_t Opcode) const {
+ return (Opcode == Hexagon::ENDLOOP0 ||
+ Opcode == Hexagon::ENDLOOP1);
}
+
+bool HexagonInstrInfo::getPredReg(const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned &PredReg, unsigned &PredRegPos,
+ unsigned &PredRegFlags) const {
+ if (Cond.empty())
+ return false;
+ assert(Cond.size() == 2);
+ if (isNewValueJump(Cond[0].getImm()) || Cond[1].isMBB()) {
+ DEBUG(dbgs() << "No predregs for new-value jumps/endloop");
+ return false;
+ }
+ PredReg = Cond[1].getReg();
+ PredRegPos = 1;
+ // See IfConversion.cpp why we add RegState::Implicit | RegState::Undef
+ PredRegFlags = 0;
+ if (Cond[1].isImplicit())
+ PredRegFlags = RegState::Implicit;
+ if (Cond[1].isUndef())
+ PredRegFlags |= RegState::Undef;
+ return true;
+}
+
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 6acfbec24709..0239cabe9e52 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -1,3 +1,4 @@
+
//===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
@@ -26,14 +27,15 @@
namespace llvm {
struct EVT;
-
+class HexagonSubtarget;
class HexagonInstrInfo : public HexagonGenInstrInfo {
virtual void anchor();
const HexagonRegisterInfo RI;
const HexagonSubtarget &Subtarget;
- typedef unsigned Opcode_t;
public:
+ typedef unsigned Opcode_t;
+
explicit HexagonInstrInfo(HexagonSubtarget &ST);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
@@ -102,15 +104,21 @@ public:
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
+ /// expandPostRAPseudo - This function is called for all pseudo instructions
+ /// that remain after register allocation. Many pseudo instructions are
+ /// created to help register allocation. This is the place to convert them
+ /// into real instructions. The target can edit MI in place, or it can insert
+ /// new instructions and erase MI. The function should return true if
+ /// anything was changed.
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
int FrameIndex) const override;
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const override {
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const override {
return nullptr;
}
@@ -154,7 +162,7 @@ public:
bool isSchedulingBoundary(const MachineInstr *MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const override;
- bool isValidOffset(const int Opcode, const int Offset) const;
+ bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const;
bool isValidAutoIncImm(const EVT VT, const int Offset) const;
bool isMemOp(const MachineInstr *MI) const;
bool isSpillPredRegOp(const MachineInstr *MI) const;
@@ -178,6 +186,7 @@ public:
bool isConditionalStore(const MachineInstr* MI) const;
bool isNewValueInst(const MachineInstr* MI) const;
bool isNewValue(const MachineInstr* MI) const;
+ bool isNewValue(Opcode_t Opcode) const;
bool isDotNewInst(const MachineInstr* MI) const;
int GetDotOldOp(const int opc) const;
int GetDotNewOp(const MachineInstr* MI) const;
@@ -193,11 +202,13 @@ public:
bool isNewValueStore(const MachineInstr* MI) const;
bool isNewValueStore(unsigned Opcode) const;
bool isNewValueJump(const MachineInstr* MI) const;
+ bool isNewValueJump(Opcode_t Opcode) const;
bool isNewValueJumpCandidate(const MachineInstr *MI) const;
void immediateExtend(MachineInstr *MI) const;
- bool isConstExtended(MachineInstr *MI) const;
+ bool isConstExtended(const MachineInstr *MI) const;
+ unsigned getSize(const MachineInstr *MI) const;
int getDotNewPredJumpOp(MachineInstr *MI,
const MachineBranchProbabilityInfo *MBPI) const;
unsigned getAddrMode(const MachineInstr* MI) const;
@@ -209,10 +220,12 @@ public:
bool NonExtEquivalentExists (const MachineInstr *MI) const;
short getNonExtOpcode(const MachineInstr *MI) const;
bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
- bool PredOpcodeHasNot(Opcode_t Opcode) const;
-
-private:
- int getMatchingCondBranchOpcode(int Opc, bool sense) const;
+ bool predOpcodeHasNot(const SmallVectorImpl<MachineOperand> &Cond) const;
+ bool isEndLoopN(Opcode_t Opcode) const;
+ bool getPredReg(const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned &PredReg, unsigned &PredRegPos,
+ unsigned &PredRegFlags) const;
+ int getCondOpcode(int Opc, bool sense) const;
};
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index 7ce65f345cab..3b32c10ed5b0 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -29,8 +29,36 @@ def F64 : PatLeaf<(f64 DoubleRegs:$R)>;
// 64-bit value.
def LoReg: OutPatFrag<(ops node:$Rs),
(EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>;
+def HiReg: OutPatFrag<(ops node:$Rs),
+ (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>;
-//===----------------------------------------------------------------------===//
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
+ // Return the byte immediate const-1 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformSToSM1Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-2.
+def DEC2_CONST_SIGNED : SDNodeXForm<imm, [{
+ // Return the byte immediate const-2 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformSToSM2Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-3.
+def DEC3_CONST_SIGNED : SDNodeXForm<imm, [{
+ // Return the byte immediate const-3 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformSToSM3Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
+ // Return the byte immediate const-1 as an SDNode.
+ uint32_t imm = N->getZExtValue();
+ return XformUToUM1Imm(imm, SDLoc(N));
+}]>;
//===----------------------------------------------------------------------===//
// Compare
@@ -76,10 +104,16 @@ def : T_CMP_pat <C2_cmpgtui, setugt, u9ImmPred>;
//===----------------------------------------------------------------------===//
// ALU32/ALU +
//===----------------------------------------------------------------------===//
+// Add.
+
+def SDT_Int32Leaf : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
+def SDT_Int32Unary : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
[SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
+def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>;
let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in
class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev,
@@ -140,12 +174,10 @@ class T_ALU32_combineh<string Op1, string Op2, bits<3> MajOp, bits<3> MinOp,
let AsmString = "$Rd = combine($Rs"#Op1#", $Rt"#Op2#")";
}
-let isCodeGenOnly = 0 in {
def A2_combine_hh : T_ALU32_combineh<".h", ".h", 0b011, 0b100, 1>;
def A2_combine_hl : T_ALU32_combineh<".h", ".l", 0b011, 0b101, 1>;
def A2_combine_lh : T_ALU32_combineh<".l", ".h", 0b011, 0b110, 1>;
def A2_combine_ll : T_ALU32_combineh<".l", ".l", 0b011, 0b111, 1>;
-}
class T_ALU32_3op_sfx<string mnemonic, string suffix, bits<3> MajOp,
bits<3> MinOp, bit OpsRev, bit IsComm>
@@ -153,12 +185,24 @@ class T_ALU32_3op_sfx<string mnemonic, string suffix, bits<3> MajOp,
let AsmString = "$Rd = "#mnemonic#"($Rs, $Rt)"#suffix;
}
-let Defs = [USR_OVF], Itinerary = ALU32_3op_tc_2_SLOT0123,
- isCodeGenOnly = 0 in {
+def A2_svaddh : T_ALU32_3op<"vaddh", 0b110, 0b000, 0, 1>;
+def A2_svsubh : T_ALU32_3op<"vsubh", 0b110, 0b100, 1, 0>;
+
+let Defs = [USR_OVF], Itinerary = ALU32_3op_tc_2_SLOT0123 in {
+ def A2_svaddhs : T_ALU32_3op_sfx<"vaddh", ":sat", 0b110, 0b001, 0, 1>;
def A2_addsat : T_ALU32_3op_sfx<"add", ":sat", 0b110, 0b010, 0, 1>;
+ def A2_svadduhs : T_ALU32_3op_sfx<"vadduh", ":sat", 0b110, 0b011, 0, 1>;
+ def A2_svsubhs : T_ALU32_3op_sfx<"vsubh", ":sat", 0b110, 0b101, 1, 0>;
def A2_subsat : T_ALU32_3op_sfx<"sub", ":sat", 0b110, 0b110, 1, 0>;
+ def A2_svsubuhs : T_ALU32_3op_sfx<"vsubuh", ":sat", 0b110, 0b111, 1, 0>;
}
+let Itinerary = ALU32_3op_tc_2_SLOT0123 in
+def A2_svavghs : T_ALU32_3op_sfx<"vavgh", ":rnd", 0b111, 0b001, 0, 1>;
+
+def A2_svavgh : T_ALU32_3op<"vavgh", 0b111, 0b000, 0, 1>;
+def A2_svnavgh : T_ALU32_3op<"vnavgh", 0b111, 0b011, 1, 0>;
+
multiclass T_ALU32_3op_p<string mnemonic, bits<3> MajOp, bits<3> MinOp,
bit OpsRev> {
def t : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 0>;
@@ -174,13 +218,11 @@ multiclass T_ALU32_3op_A2<string mnemonic, bits<3> MajOp, bits<3> MinOp,
defm A2_p#NAME : T_ALU32_3op_p<mnemonic, MajOp, MinOp, OpsRev>;
}
-let isCodeGenOnly = 0 in {
defm add : T_ALU32_3op_A2<"add", 0b011, 0b000, 0, 1>;
defm and : T_ALU32_3op_A2<"and", 0b001, 0b000, 0, 1>;
defm or : T_ALU32_3op_A2<"or", 0b001, 0b001, 0, 1>;
defm sub : T_ALU32_3op_A2<"sub", 0b011, 0b001, 1, 0>;
defm xor : T_ALU32_3op_A2<"xor", 0b001, 0b011, 0, 1>;
-}
// Pats for instruction selection.
class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT>
@@ -194,8 +236,7 @@ def: BinOp32_pat<sub, A2_sub, i32>;
def: BinOp32_pat<xor, A2_xor, i32>;
// A few special cases producing register pairs:
-let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0,
- isCodeGenOnly = 0 in {
+let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in {
def S2_packhl : T_ALU32_3op <"packhl", 0b101, 0b100, 0, 0>;
let isPredicable = 1 in
@@ -208,6 +249,9 @@ let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0,
def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>;
}
+def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>;
+def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>;
+
let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in
class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm>
: ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
@@ -229,7 +273,7 @@ class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm>
let Inst{1-0} = Pd;
}
-let Itinerary = ALU32_3op_tc_2early_SLOT0123, isCodeGenOnly = 0 in {
+let Itinerary = ALU32_3op_tc_2early_SLOT0123 in {
def C2_cmpeq : T_ALU32_3op_cmp< "cmp.eq", 0b00, 0, 1>;
def C2_cmpgt : T_ALU32_3op_cmp< "cmp.gt", 0b10, 0, 0>;
def C2_cmpgtu : T_ALU32_3op_cmp< "cmp.gtu", 0b11, 0, 0>;
@@ -252,8 +296,7 @@ def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>;
def: T_cmp32_rr_pat<C2_cmpgt, RevCmp<setlt>, i1>;
def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>;
-let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1,
- isCodeGenOnly = 0 in
+let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1 in
def C2_mux: ALU32_rr<(outs IntRegs:$Rd),
(ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
"$Rd = mux($Pu, $Rs, $Rt)", [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel {
@@ -283,11 +326,11 @@ def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
isExtentSigned = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 1,
- AddedComplexity = 75, isCodeGenOnly = 0 in
+ AddedComplexity = 75 in
def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8),
"$Rdd = combine(#$s8, #$S8)",
[(set (i64 DoubleRegs:$Rdd),
- (i64 (HexagonCOMBINE(i32 s8ExtPred:$s8), (i32 s8ImmPred:$S8))))]> {
+ (i64 (HexagonCOMBINE(i32 s32ImmPred:$s8), (i32 s8ImmPred:$S8))))]> {
bits<5> Rdd;
bits<8> s8;
bits<8> S8;
@@ -303,7 +346,7 @@ def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8),
//===----------------------------------------------------------------------===//
// Template class for predicated ADD of a reg and an Immediate value.
//===----------------------------------------------------------------------===//
-let hasNewValue = 1 in
+let hasNewValue = 1, hasSideEffects = 0 in
class T_Addri_Pred <bit PredNot, bit PredNew>
: ALU32_ri <(outs IntRegs:$Rd),
(ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8),
@@ -329,13 +372,11 @@ class T_Addri_Pred <bit PredNot, bit PredNew>
//===----------------------------------------------------------------------===//
// A2_addi: Add a signed immediate to a register.
//===----------------------------------------------------------------------===//
-let hasNewValue = 1 in
-class T_Addri <Operand immOp, list<dag> pattern = [] >
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_Addri <Operand immOp>
: ALU32_ri <(outs IntRegs:$Rd),
(ins IntRegs:$Rs, immOp:$s16),
- "$Rd = add($Rs, #$s16)", pattern,
- //[(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs), (s16ExtPred:$s16)))],
- "", ALU32_ADDI_tc_1_SLOT0123> {
+ "$Rd = add($Rs, #$s16)", [], "", ALU32_ADDI_tc_1_SLOT0123> {
bits<5> Rd;
bits<5> Rs;
bits<16> s16;
@@ -353,31 +394,29 @@ class T_Addri <Operand immOp, list<dag> pattern = [] >
//===----------------------------------------------------------------------===//
multiclass Addri_Pred<string mnemonic, bit PredNot> {
let isPredicatedFalse = PredNot in {
- def _c#NAME : T_Addri_Pred<PredNot, 0>;
+ def NAME : T_Addri_Pred<PredNot, 0>;
// Predicate new
- def _cdn#NAME : T_Addri_Pred<PredNot, 1>;
+ def NAME#new : T_Addri_Pred<PredNot, 1>;
}
}
-let isExtendable = 1, InputType = "imm" in
+let isExtendable = 1, isExtentSigned = 1, InputType = "imm" in
multiclass Addri_base<string mnemonic, SDNode OpNode> {
let CextOpcode = mnemonic, BaseOpcode = mnemonic#_ri in {
- let opExtendable = 2, isExtentSigned = 1, opExtentBits = 16,
- isPredicable = 1 in
- def NAME : T_Addri< s16Ext, // Rd=add(Rs,#s16)
- [(set (i32 IntRegs:$Rd),
- (add IntRegs:$Rs, s16ExtPred:$s16))]>;
-
- let opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
- hasSideEffects = 0, isPredicated = 1 in {
- defm Pt : Addri_Pred<mnemonic, 0>;
- defm NotPt : Addri_Pred<mnemonic, 1>;
+ let opExtendable = 2, opExtentBits = 16, isPredicable = 1 in
+ def A2_#NAME : T_Addri<s16Ext>;
+
+ let opExtendable = 3, opExtentBits = 8, isPredicated = 1 in {
+ defm A2_p#NAME#t : Addri_Pred<mnemonic, 0>;
+ defm A2_p#NAME#f : Addri_Pred<mnemonic, 1>;
}
}
}
-let isCodeGenOnly = 0 in
-defm ADD_ri : Addri_base<"add", add>, ImmRegRel, PredNewRel;
+defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel;
+
+def: Pat<(i32 (add I32:$Rs, s32ImmPred:$s16)),
+ (i32 (A2_addi I32:$Rs, imm:$s16))>;
//===----------------------------------------------------------------------===//
// Template class used for the following ALU32 instructions.
@@ -390,7 +429,7 @@ class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp>
: ALU32_ri <(outs IntRegs:$Rd),
(ins IntRegs:$Rs, s10Ext:$s10),
"$Rd = "#mnemonic#"($Rs, #$s10)" ,
- [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10))]> {
+ [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10))]> {
bits<5> Rd;
bits<5> Rs;
bits<10> s10;
@@ -406,19 +445,15 @@ class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp>
let Inst{4-0} = Rd;
}
-let isCodeGenOnly = 0 in {
-def OR_ri : T_ALU32ri_logical<"or", or, 0b10>, ImmRegRel;
-def AND_ri : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel;
-}
+def A2_orir : T_ALU32ri_logical<"or", or, 0b10>, ImmRegRel;
+def A2_andir : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel;
// Subtract register from immediate
// Rd32=sub(#s10,Rs32)
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 10,
-CextOpcode = "sub", InputType = "imm", hasNewValue = 1, isCodeGenOnly = 0 in
-def SUB_ri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs),
- "$Rd = sub(#$s10, $Rs)" ,
- [(set IntRegs:$Rd, (sub s10ExtPred:$s10, IntRegs:$Rs))] > ,
- ImmRegRel {
+let isExtendable = 1, CextOpcode = "sub", opExtendable = 1, isExtentSigned = 1,
+ opExtentBits = 10, InputType = "imm", hasNewValue = 1, hasSideEffects = 0 in
+def A2_subri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs),
+ "$Rd = sub(#$s10, $Rs)", []>, ImmRegRel {
bits<5> Rd;
bits<10> s10;
bits<5> Rs;
@@ -433,14 +468,18 @@ def SUB_ri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs),
}
// Nop.
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0 in
def A2_nop: ALU32Inst <(outs), (ins), "nop" > {
let IClass = 0b0111;
let Inst{27-24} = 0b1111;
}
+
+def: Pat<(sub s32ImmPred:$s10, IntRegs:$Rs),
+ (A2_subri imm:$s10, IntRegs:$Rs)>;
+
// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
-def : Pat<(not (i32 IntRegs:$src1)),
- (SUB_ri -1, (i32 IntRegs:$src1))>;
+def: Pat<(not (i32 IntRegs:$src1)),
+ (A2_subri -1, IntRegs:$src1)>;
let hasSideEffects = 0, hasNewValue = 1 in
class T_tfr16<bit isHi>
@@ -459,10 +498,8 @@ class T_tfr16<bit isHi>
let Inst{13-0} = u16{13-0};
}
-let isCodeGenOnly = 0 in {
def A2_tfril: T_tfr16<0>;
def A2_tfrih: T_tfr16<1>;
-}
// Conditional transfer is an alias to conditional "Rd = add(Rs, #0)".
let isPredicated = 1, hasNewValue = 1, opNewValue = 0 in
@@ -575,20 +612,17 @@ class T_TFRI_Pred<bit PredNot, bit PredNew>
let Inst{4-0} = Rd;
}
-let isCodeGenOnly = 0 in {
def C2_cmoveit : T_TFRI_Pred<0, 0>;
def C2_cmoveif : T_TFRI_Pred<1, 0>;
def C2_cmovenewit : T_TFRI_Pred<0, 1>;
def C2_cmovenewif : T_TFRI_Pred<1, 1>;
-}
let InputType = "imm", isExtendable = 1, isExtentSigned = 1,
CextOpcode = "TFR", BaseOpcode = "TFRI", hasNewValue = 1, opNewValue = 0,
isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1,
- isPredicated = 0, isPredicable = 1, isReMaterializable = 1,
- isCodeGenOnly = 0 in
+ isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in
def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16",
- [(set (i32 IntRegs:$Rd), s16ExtPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>,
+ [(set (i32 IntRegs:$Rd), s32ImmPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>,
ImmRegRel, PredRel {
bits<5> Rd;
bits<16> s16;
@@ -599,20 +633,26 @@ def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16",
let Inst{4-0} = Rd;
}
-let isCodeGenOnly = 0 in
defm A2_tfr : tfr_base<"TFR">, ImmRegRel, PredNewRel;
+let isAsmParserOnly = 1 in
defm A2_tfrp : TFR64_base<"TFR64">, PredNewRel;
// Assembler mapped
-let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+ isAsmParserOnly = 1 in
def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
"$dst = #$src1",
[(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>;
// TODO: see if this instruction can be deleted..
-let isExtendable = 1, opExtendable = 1, opExtentBits = 6 in
-def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u6Ext:$src1),
+let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
+ isAsmParserOnly = 1 in {
+def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64Imm:$src1),
"$dst = #$src1">;
+def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst),
+ (ins s8Ext:$src1, s8Imm:$src2),
+ "$dst = combine(##$src1, #$src2)">;
+}
//===----------------------------------------------------------------------===//
// ALU32/ALU -
@@ -642,28 +682,28 @@ class T_MUX1 <bit MajOp, dag ins, string AsmStr>
let Inst{4-0} = Rd;
}
-let opExtendable = 2, isCodeGenOnly = 0 in
+let opExtendable = 2 in
def C2_muxri : T_MUX1<0b1, (ins PredRegs:$Pu, s8Ext:$s8, IntRegs:$Rs),
"$Rd = mux($Pu, #$s8, $Rs)">;
-let opExtendable = 3, isCodeGenOnly = 0 in
+let opExtendable = 3 in
def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8),
"$Rd = mux($Pu, $Rs, #$s8)">;
-def : Pat<(i32 (select I1:$Pu, s8ExtPred:$s8, I32:$Rs)),
- (C2_muxri I1:$Pu, s8ExtPred:$s8, I32:$Rs)>;
+def : Pat<(i32 (select I1:$Pu, s32ImmPred:$s8, I32:$Rs)),
+ (C2_muxri I1:$Pu, s32ImmPred:$s8, I32:$Rs)>;
-def : Pat<(i32 (select I1:$Pu, I32:$Rs, s8ExtPred:$s8)),
- (C2_muxir I1:$Pu, I32:$Rs, s8ExtPred:$s8)>;
+def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32ImmPred:$s8)),
+ (C2_muxir I1:$Pu, I32:$Rs, s32ImmPred:$s8)>;
// C2_muxii: Scalar mux immediates.
let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1,
- opExtentBits = 8, opExtendable = 2, isCodeGenOnly = 0 in
+ opExtentBits = 8, opExtendable = 2 in
def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
(ins PredRegs:$Pu, s8Ext:$s8, s8Imm:$S8),
"$Rd = mux($Pu, #$s8, #$S8)" ,
[(set (i32 IntRegs:$Rd),
- (i32 (select I1:$Pu, s8ExtPred:$s8, s8ImmPred:$S8)))] > {
+ (i32 (select I1:$Pu, s32ImmPred:$s8, s8ImmPred:$S8)))] > {
bits<5> Rd;
bits<2> Pu;
bits<8> s8;
@@ -679,14 +719,20 @@ def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
let Inst{4-0} = Rd;
}
+let isCodeGenOnly = 1, isPseudo = 1 in
+def MUX64_rr : ALU64_rr<(outs DoubleRegs:$Rd),
+ (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+ ".error \"should not emit\" ", []>;
+
+
//===----------------------------------------------------------------------===//
// template class for non-predicated alu32_2op instructions
// - aslh, asrh, sxtb, sxth, zxth
//===----------------------------------------------------------------------===//
let hasNewValue = 1, opNewValue = 0 in
class T_ALU32_2op <string mnemonic, bits<3> minOp> :
- ALU32Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rs),
- "$Rd = "#mnemonic#"($Rs)", [] > {
+ ALU32Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs),
+ "$Rd = "#mnemonic#"($Rs)", [] > {
bits<5> Rd;
bits<5> Rs;
@@ -703,13 +749,12 @@ class T_ALU32_2op <string mnemonic, bits<3> minOp> :
// template class for predicated alu32_2op instructions
// - aslh, asrh, sxtb, sxth, zxtb, zxth
//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, validSubTargets = HasV4SubT,
- hasNewValue = 1, opNewValue = 0 in
-class T_ALU32_2op_Pred <string mnemonic, bits<3> minOp, bit isPredNot,
- bit isPredNew > :
- ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs),
- !if(isPredNot, "if (!$Pu", "if ($Pu")
- #!if(isPredNew, ".new) ",") ")#"$Rd = "#mnemonic#"($Rs)"> {
+let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_ALU32_2op_Pred <string mnemonic, bits<3> minOp, bit isPredNot,
+ bit isPredNew > :
+ ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs),
+ !if(isPredNot, "if (!$Pu", "if ($Pu")
+ #!if(isPredNew, ".new) ",") ")#"$Rd = "#mnemonic#"($Rs)"> {
bits<5> Rd;
bits<2> Pu;
bits<5> Rs;
@@ -741,20 +786,18 @@ multiclass ALU32_2op_base<string mnemonic, bits<3> minOp> {
let isPredicable = 1, hasSideEffects = 0 in
def A2_#NAME : T_ALU32_2op<mnemonic, minOp>;
- let validSubTargets = HasV4SubT, isPredicated = 1, hasSideEffects = 0 in {
+ let isPredicated = 1, hasSideEffects = 0 in {
defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>;
defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>;
}
}
}
-let isCodeGenOnly = 0 in {
defm aslh : ALU32_2op_base<"aslh", 0b000>, PredNewRel;
defm asrh : ALU32_2op_base<"asrh", 0b001>, PredNewRel;
defm sxtb : ALU32_2op_base<"sxtb", 0b101>, PredNewRel;
defm sxth : ALU32_2op_base<"sxth", 0b111>, PredNewRel;
defm zxth : ALU32_2op_base<"zxth", 0b110>, PredNewRel;
-}
// Rd=zxtb(Rs): assembler mapped to Rd=and(Rs,#255).
// Compiler would want to generate 'zxtb' instead of 'and' becuase 'zxtb' has
@@ -784,14 +827,13 @@ multiclass ZXTB_base <string mnemonic, bits<3> minOp> {
let isPredicable = 1, hasSideEffects = 0 in
def A2_#NAME : T_ZXTB;
- let validSubTargets = HasV4SubT, isPredicated = 1, hasSideEffects = 0 in {
+ let isPredicated = 1, hasSideEffects = 0 in {
defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>;
defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>;
}
}
}
-let isCodeGenOnly=0 in
defm zxtb : ZXTB_base<"zxtb",0b100>, PredNewRel;
def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>;
@@ -799,44 +841,182 @@ def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>;
def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>;
def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>;
-// Mux.
-def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
- DoubleRegs:$src2,
- DoubleRegs:$src3),
- "$dst = vmux($src1, $src2, $src3)",
- []>;
+//===----------------------------------------------------------------------===//
+// Template class for vector add and avg
+//===----------------------------------------------------------------------===//
+class T_VectALU_64 <string opc, bits<3> majOp, bits<3> minOp,
+ bit isSat, bit isRnd, bit isCrnd, bit SwapOps >
+ : ALU64_rr < (outs DoubleRegs:$Rdd),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rdd = "#opc#"($Rss, $Rtt)"#!if(isRnd, ":rnd", "")
+ #!if(isCrnd,":crnd","")
+ #!if(isSat, ":sat", ""),
+ [], "", ALU64_tc_2_SLOT23 > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1101;
+
+ let Inst{27-24} = 0b0011;
+ let Inst{23-21} = majOp;
+ let Inst{20-16} = !if (SwapOps, Rtt, Rss);
+ let Inst{12-8} = !if (SwapOps, Rss, Rtt);
+ let Inst{7-5} = minOp;
+ let Inst{4-0} = Rdd;
+ }
+
+// ALU64 - Vector add
+// Rdd=vadd[u][bhw](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+ def A2_vaddub : T_VectALU_64 < "vaddub", 0b000, 0b000, 0, 0, 0, 0>;
+ def A2_vaddh : T_VectALU_64 < "vaddh", 0b000, 0b010, 0, 0, 0, 0>;
+ def A2_vaddw : T_VectALU_64 < "vaddw", 0b000, 0b101, 0, 0, 0, 0>;
+}
+
+// Rdd=vadd[u][bhw](Rss,Rtt):sat
+let Defs = [USR_OVF] in {
+ def A2_vaddubs : T_VectALU_64 < "vaddub", 0b000, 0b001, 1, 0, 0, 0>;
+ def A2_vaddhs : T_VectALU_64 < "vaddh", 0b000, 0b011, 1, 0, 0, 0>;
+ def A2_vadduhs : T_VectALU_64 < "vadduh", 0b000, 0b100, 1, 0, 0, 0>;
+ def A2_vaddws : T_VectALU_64 < "vaddw", 0b000, 0b110, 1, 0, 0, 0>;
+}
+
+// ALU64 - Vector average
+// Rdd=vavg[u][bhw](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+ def A2_vavgub : T_VectALU_64 < "vavgub", 0b010, 0b000, 0, 0, 0, 0>;
+ def A2_vavgh : T_VectALU_64 < "vavgh", 0b010, 0b010, 0, 0, 0, 0>;
+ def A2_vavguh : T_VectALU_64 < "vavguh", 0b010, 0b101, 0, 0, 0, 0>;
+ def A2_vavgw : T_VectALU_64 < "vavgw", 0b011, 0b000, 0, 0, 0, 0>;
+ def A2_vavguw : T_VectALU_64 < "vavguw", 0b011, 0b011, 0, 0, 0, 0>;
+}
+
+// Rdd=vavg[u][bhw](Rss,Rtt)[:rnd|:crnd]
+def A2_vavgubr : T_VectALU_64 < "vavgub", 0b010, 0b001, 0, 1, 0, 0>;
+def A2_vavghr : T_VectALU_64 < "vavgh", 0b010, 0b011, 0, 1, 0, 0>;
+def A2_vavghcr : T_VectALU_64 < "vavgh", 0b010, 0b100, 0, 0, 1, 0>;
+def A2_vavguhr : T_VectALU_64 < "vavguh", 0b010, 0b110, 0, 1, 0, 0>;
+
+def A2_vavgwr : T_VectALU_64 < "vavgw", 0b011, 0b001, 0, 1, 0, 0>;
+def A2_vavgwcr : T_VectALU_64 < "vavgw", 0b011, 0b010, 0, 0, 1, 0>;
+def A2_vavguwr : T_VectALU_64 < "vavguw", 0b011, 0b100, 0, 1, 0, 0>;
+
+// Rdd=vnavg[bh](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+ def A2_vnavgh : T_VectALU_64 < "vnavgh", 0b100, 0b000, 0, 0, 0, 1>;
+ def A2_vnavgw : T_VectALU_64 < "vnavgw", 0b100, 0b011, 0, 0, 0, 1>;
+}
+
+// Rdd=vnavg[bh](Rss,Rtt)[:rnd|:crnd]:sat
+let Defs = [USR_OVF] in {
+ def A2_vnavghr : T_VectALU_64 < "vnavgh", 0b100, 0b001, 1, 1, 0, 1>;
+ def A2_vnavghcr : T_VectALU_64 < "vnavgh", 0b100, 0b010, 1, 0, 1, 1>;
+ def A2_vnavgwr : T_VectALU_64 < "vnavgw", 0b100, 0b100, 1, 1, 0, 1>;
+ def A2_vnavgwcr : T_VectALU_64 < "vnavgw", 0b100, 0b110, 1, 0, 1, 1>;
+}
+
+// Rdd=vsub[u][bh](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+ def A2_vsubub : T_VectALU_64 < "vsubub", 0b001, 0b000, 0, 0, 0, 1>;
+ def A2_vsubh : T_VectALU_64 < "vsubh", 0b001, 0b010, 0, 0, 0, 1>;
+ def A2_vsubw : T_VectALU_64 < "vsubw", 0b001, 0b101, 0, 0, 0, 1>;
+}
+
+// Rdd=vsub[u][bh](Rss,Rtt):sat
+let Defs = [USR_OVF] in {
+ def A2_vsububs : T_VectALU_64 < "vsubub", 0b001, 0b001, 1, 0, 0, 1>;
+ def A2_vsubhs : T_VectALU_64 < "vsubh", 0b001, 0b011, 1, 0, 0, 1>;
+ def A2_vsubuhs : T_VectALU_64 < "vsubuh", 0b001, 0b100, 1, 0, 0, 1>;
+ def A2_vsubws : T_VectALU_64 < "vsubw", 0b001, 0b110, 1, 0, 0, 1>;
+}
+
+// Rdd=vmax[u][bhw](Rss,Rtt)
+def A2_vmaxb : T_VectALU_64 < "vmaxb", 0b110, 0b110, 0, 0, 0, 1>;
+def A2_vmaxub : T_VectALU_64 < "vmaxub", 0b110, 0b000, 0, 0, 0, 1>;
+def A2_vmaxh : T_VectALU_64 < "vmaxh", 0b110, 0b001, 0, 0, 0, 1>;
+def A2_vmaxuh : T_VectALU_64 < "vmaxuh", 0b110, 0b010, 0, 0, 0, 1>;
+def A2_vmaxw : T_VectALU_64 < "vmaxw", 0b110, 0b011, 0, 0, 0, 1>;
+def A2_vmaxuw : T_VectALU_64 < "vmaxuw", 0b101, 0b101, 0, 0, 0, 1>;
+
+// Rdd=vmin[u][bhw](Rss,Rtt)
+def A2_vminb : T_VectALU_64 < "vminb", 0b110, 0b111, 0, 0, 0, 1>;
+def A2_vminub : T_VectALU_64 < "vminub", 0b101, 0b000, 0, 0, 0, 1>;
+def A2_vminh : T_VectALU_64 < "vminh", 0b101, 0b001, 0, 0, 0, 1>;
+def A2_vminuh : T_VectALU_64 < "vminuh", 0b101, 0b010, 0, 0, 0, 1>;
+def A2_vminw : T_VectALU_64 < "vminw", 0b101, 0b011, 0, 0, 0, 1>;
+def A2_vminuw : T_VectALU_64 < "vminuw", 0b101, 0b100, 0, 0, 0, 1>;
//===----------------------------------------------------------------------===//
-// ALU32/PERM -
+// Template class for vector compare
//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_vcmp <string Str, bits<4> minOp>
+ : ALU64_rr <(outs PredRegs:$Pd),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Pd = "#Str#"($Rss, $Rtt)", [],
+ "", ALU64_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b00100;
+ let Inst{13} = minOp{3};
+ let Inst{7-5} = minOp{2-0};
+ let Inst{1-0} = Pd;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T>
+ : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))),
+ (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>;
+
+// Vector compare bytes
+def A2_vcmpbeq : T_vcmp <"vcmpb.eq", 0b0110>;
+def A2_vcmpbgtu : T_vcmp <"vcmpb.gtu", 0b0111>;
+
+// Vector compare halfwords
+def A2_vcmpheq : T_vcmp <"vcmph.eq", 0b0011>;
+def A2_vcmphgt : T_vcmp <"vcmph.gt", 0b0100>;
+def A2_vcmphgtu : T_vcmp <"vcmph.gtu", 0b0101>;
+
+// Vector compare words
+def A2_vcmpweq : T_vcmp <"vcmpw.eq", 0b0000>;
+def A2_vcmpwgt : T_vcmp <"vcmpw.gt", 0b0001>;
+def A2_vcmpwgtu : T_vcmp <"vcmpw.gtu", 0b0010>;
+def: T_vcmp_pat<A2_vcmpbeq, seteq, v8i8>;
+def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>;
+def: T_vcmp_pat<A2_vcmpheq, seteq, v4i16>;
+def: T_vcmp_pat<A2_vcmphgt, setgt, v4i16>;
+def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>;
+def: T_vcmp_pat<A2_vcmpweq, seteq, v2i32>;
+def: T_vcmp_pat<A2_vcmpwgt, setgt, v2i32>;
+def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>;
//===----------------------------------------------------------------------===//
-// ALU32/PRED +
+// ALU32/PERM -
//===----------------------------------------------------------------------===//
-// SDNode for converting immediate C to C-1.
-def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
- // Return the byte immediate const-1 as an SDNode.
- int32_t imm = N->getSExtValue();
- return XformSToSM1Imm(imm);
-}]>;
-
-// SDNode for converting immediate C to C-1.
-def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
- // Return the byte immediate const-1 as an SDNode.
- uint32_t imm = N->getZExtValue();
- return XformUToUM1Imm(imm);
-}]>;
-def CTLZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
- "$dst = cl0($src1)",
- [(set (i32 IntRegs:$dst), (i32 (trunc (ctlz (i64 DoubleRegs:$src1)))))]>;
+//===----------------------------------------------------------------------===//
+// ALU32/PRED +
+//===----------------------------------------------------------------------===//
+// No bits needed. If cmp.ge is found the assembler parser will
+// transform it to cmp.gt subtracting 1 from the immediate.
+let isPseudo = 1 in {
+def C2_cmpgei: ALU32Inst <
+ (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8Ext:$s8),
+ "$Pd = cmp.ge($Rs, #$s8)">;
+def C2_cmpgeui: ALU32Inst <
+ (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8Ext:$s8),
+ "$Pd = cmp.geu($Rs, #$s8)">;
+}
-def CTTZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
- "$dst = ct0($src1)",
- [(set (i32 IntRegs:$dst), (i32 (trunc (cttz (i64 DoubleRegs:$src1)))))]>;
//===----------------------------------------------------------------------===//
// ALU32/PRED -
@@ -845,7 +1025,8 @@ def CTTZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
//===----------------------------------------------------------------------===//
// ALU64/ALU +
-//===----------------------------------------------------------------------===//// Add.
+//===----------------------------------------------------------------------===//
+// Add.
//===----------------------------------------------------------------------===//
// Template Class
// Add/Subtract halfword
@@ -879,18 +1060,14 @@ class T_XTYPE_ADD_SUB <bits<2> LHbits, bit isSat, bit hasShift, bit isSub>
}
//Rd=sub(Rt.L,Rs.[LH])
-let isCodeGenOnly = 0 in {
def A2_subh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 1>;
def A2_subh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 1>;
-}
-let isCodeGenOnly = 0 in {
//Rd=add(Rt.L,Rs.[LH])
def A2_addh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 0>;
def A2_addh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 0>;
-}
-let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in {
+let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in {
//Rd=sub(Rt.L,Rs.[LH]):sat
def A2_subh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 1>;
def A2_subh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 1>;
@@ -901,22 +1078,18 @@ let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in {
}
//Rd=sub(Rt.[LH],Rs.[LH]):<<16
-let isCodeGenOnly = 0 in {
def A2_subh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 1>;
def A2_subh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 1>;
def A2_subh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 1>;
def A2_subh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 1>;
-}
//Rd=add(Rt.[LH],Rs.[LH]):<<16
-let isCodeGenOnly = 0 in {
def A2_addh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 0>;
def A2_addh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 0>;
def A2_addh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 0>;
def A2_addh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 0>;
-}
-let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in {
+let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in {
//Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16
def A2_subh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 1>;
def A2_subh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 1>;
@@ -947,7 +1120,7 @@ def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16),
def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)),
(A2_subh_h16_ll I32:$src1, I32:$src2)>;
-let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+let hasSideEffects = 0, hasNewValue = 1 in
def S2_parityp: ALU64Inst<(outs IntRegs:$Rd),
(ins DoubleRegs:$Rs, DoubleRegs:$Rt),
"$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
@@ -981,12 +1154,10 @@ class T_XTYPE_MIN_MAX < bit isMax, bit isUnsigned >
let Inst{20-16} = !if(isMax, Rt, Rs);
}
-let isCodeGenOnly = 0 in {
def A2_min : T_XTYPE_MIN_MAX < 0, 0 >;
def A2_minu : T_XTYPE_MIN_MAX < 0, 1 >;
def A2_max : T_XTYPE_MIN_MAX < 1, 0 >;
def A2_maxu : T_XTYPE_MIN_MAX < 1, 1 >;
-}
// Here, depending on the operand being selected, we'll either generate a
// min or max instruction.
@@ -1053,11 +1224,9 @@ class T_cmp64_rr<string mnemonic, bits<3> MinOp, bit IsComm>
let Inst{1-0} = Pd;
}
-let isCodeGenOnly = 0 in {
def C2_cmpeqp : T_cmp64_rr<"cmp.eq", 0b000, 1>;
def C2_cmpgtp : T_cmp64_rr<"cmp.gt", 0b010, 0>;
def C2_cmpgtup : T_cmp64_rr<"cmp.gtu", 0b100, 0>;
-}
class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp>
: Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))),
@@ -1069,6 +1238,24 @@ def: T_cmp64_rr_pat<C2_cmpgtup, setugt>;
def: T_cmp64_rr_pat<C2_cmpgtp, RevCmp<setlt>>;
def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>;
+def C2_vmux : ALU64_rr<(outs DoubleRegs:$Rd),
+ (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+ "$Rd = vmux($Pu, $Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
+ let hasSideEffects = 0;
+
+ bits<5> Rd;
+ bits<2> Pu;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = 0b0001;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{6-5} = Pu;
+ let Inst{4-0} = Rd;
+}
+
class T_ALU64_rr<string mnemonic, string suffix, bits<4> RegType,
bits<3> MajOp, bits<3> MinOp, bit OpsRev, bit IsComm,
string Op2Pfx>
@@ -1096,10 +1283,8 @@ class T_ALU64_arith<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit IsSat,
: T_ALU64_rr<mnemonic, !if(IsSat,":sat",""), 0b0011, MajOp, MinOp, OpsRev,
IsComm, "">;
-let isCodeGenOnly = 0 in {
def A2_addp : T_ALU64_arith<"add", 0b000, 0b111, 0, 0, 1>;
def A2_subp : T_ALU64_arith<"sub", 0b001, 0b111, 0, 1, 0>;
-}
def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>;
def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>;
@@ -1109,11 +1294,9 @@ class T_ALU64_logical<string mnemonic, bits<3> MinOp, bit OpsRev, bit IsComm,
: T_ALU64_rr<mnemonic, "", 0b0011, 0b111, MinOp, OpsRev, IsComm,
!if(IsNeg,"~","")>;
-let isCodeGenOnly = 0 in {
def A2_andp : T_ALU64_logical<"and", 0b000, 0, 1, 0>;
def A2_orp : T_ALU64_logical<"or", 0b010, 0, 1, 0>;
def A2_xorp : T_ALU64_logical<"xor", 0b100, 0, 1, 0>;
-}
def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>;
def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>;
@@ -1165,11 +1348,9 @@ class T_LOGICAL_1OP<string MnOp, bits<2> OpBits>
let Inst{1-0} = Pd;
}
-let isCodeGenOnly = 0 in {
def C2_any8 : T_LOGICAL_1OP<"any8", 0b00>;
def C2_all8 : T_LOGICAL_1OP<"all8", 0b01>;
def C2_not : T_LOGICAL_1OP<"not", 0b10>;
-}
def: Pat<(i1 (not (i1 PredRegs:$Ps))),
(C2_not PredRegs:$Ps)>;
@@ -1193,13 +1374,11 @@ class T_LOGICAL_2OP<string MnOp, bits<3> OpBits, bit IsNeg, bit Rev>
let Inst{1-0} = Pd;
}
-let isCodeGenOnly = 0 in {
def C2_and : T_LOGICAL_2OP<"and", 0b000, 0, 1>;
def C2_or : T_LOGICAL_2OP<"or", 0b001, 0, 1>;
def C2_xor : T_LOGICAL_2OP<"xor", 0b010, 0, 0>;
def C2_andn : T_LOGICAL_2OP<"and", 0b011, 1, 1>;
def C2_orn : T_LOGICAL_2OP<"or", 0b111, 1, 1>;
-}
def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>;
def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>;
@@ -1207,7 +1386,7 @@ def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>;
def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>;
def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>;
-let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+let hasSideEffects = 0, hasNewValue = 1 in
def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt),
"$Rd = vitpack($Ps, $Pt)", [], "", S_2op_tc_1_SLOT23> {
bits<5> Rd;
@@ -1222,7 +1401,7 @@ def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt),
let Inst{4-0} = Rd;
}
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0 in
def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt),
"$Rd = mask($Pt)", [], "", S_2op_tc_1_SLOT23> {
bits<5> Rd;
@@ -1234,18 +1413,6 @@ def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt),
let Inst{4-0} = Rd;
}
-def VALIGN_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2,
- PredRegs:$src3),
- "$dst = valignb($src1, $src2, $src3)",
- []>;
-
-def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2,
- PredRegs:$src3),
- "$dst = vspliceb($src1, $src2, $src3)",
- []>;
-
// User control register transfer.
//===----------------------------------------------------------------------===//
// CR -
@@ -1256,7 +1423,7 @@ def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
//===----------------------------------------------------------------------===//
def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
@@ -1266,7 +1433,7 @@ class CondStr<string CReg, bit True, bit New> {
string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
}
class JumpOpcStr<string Mnemonic, bit New, bit Taken> {
- string S = Mnemonic # !if(New, !if(Taken,":t",":nt"), "");
+ string S = Mnemonic # !if(Taken, ":t", !if(New, ":nt", ""));
}
let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0,
@@ -1304,7 +1471,7 @@ class T_JMP_c<bit PredNot, bit isPredNew, bit isTak, string ExtStr>
let Inst{27-24} = 0b1100;
let Inst{21} = PredNot;
- let Inst{12} = !if(isPredNew, isTak, zero);
+ let Inst{12} = isTak;
let Inst{11} = isPredNew;
let Inst{9-8} = src;
let Inst{23-22} = dst{16-15};
@@ -1314,7 +1481,7 @@ class T_JMP_c<bit PredNot, bit isPredNew, bit isTak, string ExtStr>
}
multiclass JMP_Pred<bit PredNot, string ExtStr> {
- def NAME : T_JMP_c<PredNot, 0, 0, ExtStr>;
+ def NAME : T_JMP_c<PredNot, 0, 0, ExtStr>; // not taken
// Predicate new
def NAME#newpt : T_JMP_c<PredNot, 1, 1, ExtStr>; // taken
def NAME#new : T_JMP_c<PredNot, 1, 0, ExtStr>; // not taken
@@ -1361,13 +1528,13 @@ class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak>
let Inst{27-22} = 0b001101;
let Inst{21} = PredNot;
let Inst{20-16} = dst;
- let Inst{12} = !if(isPredNew, isTak, zero);
+ let Inst{12} = isTak;
let Inst{11} = isPredNew;
let Inst{9-8} = src;
}
multiclass JMPR_Pred<bit PredNot> {
- def NAME: T_JMPr_c<PredNot, 0, 0>;
+ def NAME : T_JMPr_c<PredNot, 0, 0>; // not taken
// Predicate new
def NAME#newpt : T_JMPr_c<PredNot, 1, 1>; // taken
def NAME#new : T_JMPr_c<PredNot, 1, 0>; // not taken
@@ -1404,12 +1571,12 @@ class JUMPR_MISC_CALLR<bit isPred, bit isPredNot,
}
-let Defs = VolatileV3.Regs, isCodeGenOnly = 0 in {
+let Defs = VolatileV3.Regs in {
def J2_callrt : JUMPR_MISC_CALLR<1, 0, (ins PredRegs:$Pu, IntRegs:$Rs)>;
def J2_callrf : JUMPR_MISC_CALLR<1, 1, (ins PredRegs:$Pu, IntRegs:$Rs)>;
}
-let isTerminator = 1, hasSideEffects = 0, isCodeGenOnly = 0 in {
+let isTerminator = 1, hasSideEffects = 0 in {
defm J2_jump : JMP_base<"JMP", "">, PredNewRel;
// Deal with explicit assembly
@@ -1451,6 +1618,8 @@ def: Pat<(brind (i32 IntRegs:$dst)),
//===----------------------------------------------------------------------===//
// LD +
//===----------------------------------------------------------------------===//
+
+// Load - Base with Immediate offset addressing mode
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, AddedComplexity = 20 in
class T_load_io <string mnemonic, RegisterClass RC, bits<4> MajOp,
Operand ImmOp>
@@ -1471,7 +1640,7 @@ class T_load_io <string mnemonic, RegisterClass RC, bits<4> MajOp,
!if (!eq(ImmOpStr, "s11_2Ext"), 13,
!if (!eq(ImmOpStr, "s11_1Ext"), 12,
/* s11_0Ext */ 11)));
- let hasNewValue = !if (!eq(ImmOpStr, "s11_3Ext"), 0, 1);
+ let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
let IClass = 0b1001;
@@ -1542,60 +1711,100 @@ multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC,
}
}
-let accessSize = ByteAccess, isCodeGenOnly = 0 in {
+let accessSize = ByteAccess in {
defm loadrb: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext, 0b1000>;
defm loadrub: LD_Idxd <"memub", "LDriub", IntRegs, s11_0Ext, u6_0Ext, 0b1001>;
}
-let accessSize = HalfWordAccess, opExtentAlign = 1, isCodeGenOnly = 0 in {
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
defm loadrh: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext, 0b1010>;
defm loadruh: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext, 0b1011>;
}
-let accessSize = WordAccess, opExtentAlign = 2, isCodeGenOnly = 0 in
+let accessSize = WordAccess, opExtentAlign = 2 in
defm loadri: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext, 0b1100>;
-let accessSize = DoubleWordAccess, opExtentAlign = 3, isCodeGenOnly = 0 in
+let accessSize = DoubleWordAccess, opExtentAlign = 3 in
defm loadrd: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext, 0b1110>;
-def : Pat < (i32 (sextloadi8 ADDRriS11_0:$addr)),
- (L2_loadrb_io AddrFI:$addr, 0) >;
-
-def : Pat < (i32 (zextloadi8 ADDRriS11_0:$addr)),
- (L2_loadrub_io AddrFI:$addr, 0) >;
-
-def : Pat < (i32 (sextloadi16 ADDRriS11_1:$addr)),
- (L2_loadrh_io AddrFI:$addr, 0) >;
-
-def : Pat < (i32 (zextloadi16 ADDRriS11_1:$addr)),
- (L2_loadruh_io AddrFI:$addr, 0) >;
-
-def : Pat < (i32 (load ADDRriS11_2:$addr)),
- (L2_loadri_io AddrFI:$addr, 0) >;
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
+ def L2_loadbsw2_io: T_load_io<"membh", IntRegs, 0b0001, s11_1Ext>;
+ def L2_loadbzw2_io: T_load_io<"memubh", IntRegs, 0b0011, s11_1Ext>;
+}
-def : Pat < (i64 (load ADDRriS11_3:$addr)),
- (L2_loadrd_io AddrFI:$addr, 0) >;
+let accessSize = WordAccess, opExtentAlign = 2 in {
+ def L2_loadbzw4_io: T_load_io<"memubh", DoubleRegs, 0b0101, s11_2Ext>;
+ def L2_loadbsw4_io: T_load_io<"membh", DoubleRegs, 0b0111, s11_2Ext>;
+}
-let AddedComplexity = 20 in {
-def : Pat < (i32 (sextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))),
- (L2_loadrb_io IntRegs:$src1, s11_0ExtPred:$offset) >;
+let addrMode = BaseImmOffset, isExtendable = 1, hasSideEffects = 0,
+ opExtendable = 3, isExtentSigned = 1 in
+class T_loadalign_io <string str, bits<4> MajOp, Operand ImmOp>
+ : LDInst<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+ "$dst = "#str#"($src2 + #$offset)", [],
+ "$src1 = $dst">, AddrModeRel {
+ bits<4> name;
+ bits<5> dst;
+ bits<5> src2;
+ bits<12> offset;
+ bits<11> offsetBits;
-def : Pat < (i32 (zextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))),
- (L2_loadrub_io IntRegs:$src1, s11_0ExtPred:$offset) >;
+ let offsetBits = !if (!eq(!cast<string>(ImmOp), "s11_1Ext"), offset{11-1},
+ /* s11_0Ext */ offset{10-0});
+ let IClass = 0b1001;
-def : Pat < (i32 (sextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))),
- (L2_loadrh_io IntRegs:$src1, s11_1ExtPred:$offset) >;
+ let Inst{27} = 0b0;
+ let Inst{26-25} = offsetBits{10-9};
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13-5} = offsetBits{8-0};
+ let Inst{4-0} = dst;
+ }
-def : Pat < (i32 (zextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))),
- (L2_loadruh_io IntRegs:$src1, s11_1ExtPred:$offset) >;
+let accessSize = HalfWordAccess, opExtentBits = 12, opExtentAlign = 1 in
+def L2_loadalignh_io: T_loadalign_io <"memh_fifo", 0b0010, s11_1Ext>;
-def : Pat < (i32 (load (add IntRegs:$src1, s11_2ExtPred:$offset))),
- (L2_loadri_io IntRegs:$src1, s11_2ExtPred:$offset) >;
+let accessSize = ByteAccess, opExtentBits = 11 in
+def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>;
-def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))),
- (L2_loadrd_io IntRegs:$src1, s11_3ExtPred:$offset) >;
+// Patterns to select load-indexed (i.e. load from base+offset).
+multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
+ InstHexagon MI> {
+ def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
+ def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
+ (VT (MI AddrFI:$fi, imm:$Off))>;
+ def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))),
+ (VT (MI IntRegs:$Rs, imm:$Off))>;
+ def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>;
}
+let AddedComplexity = 20 in {
+ defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>;
+ defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>;
+ defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>;
+ defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>;
+ defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>;
+
+ defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
+ defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>;
+ defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>;
+ defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
+ // No sextloadi1.
+}
+
+// Sign-extending loads of i1 need to replicate the lowest bit throughout
+// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should
+// do the trick.
+let AddedComplexity = 20 in
+def: Pat<(i32 (sextloadi1 (i32 IntRegs:$Rs))),
+ (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>;
+
//===----------------------------------------------------------------------===//
// Post increment load
//===----------------------------------------------------------------------===//
@@ -1696,51 +1905,70 @@ multiclass LD_PostInc <string mnemonic, string BaseOp, RegisterClass RC,
}
// post increment byte loads with immediate offset
-let accessSize = ByteAccess, isCodeGenOnly = 0 in {
+let accessSize = ByteAccess in {
defm loadrb : LD_PostInc <"memb", "LDrib", IntRegs, s4_0Imm, 0b1000>;
defm loadrub : LD_PostInc <"memub", "LDriub", IntRegs, s4_0Imm, 0b1001>;
}
// post increment halfword loads with immediate offset
-let accessSize = HalfWordAccess, opExtentAlign = 1, isCodeGenOnly = 0 in {
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
defm loadrh : LD_PostInc <"memh", "LDrih", IntRegs, s4_1Imm, 0b1010>;
defm loadruh : LD_PostInc <"memuh", "LDriuh", IntRegs, s4_1Imm, 0b1011>;
}
// post increment word loads with immediate offset
-let accessSize = WordAccess, opExtentAlign = 2, isCodeGenOnly = 0 in
+let accessSize = WordAccess, opExtentAlign = 2 in
defm loadri : LD_PostInc <"memw", "LDriw", IntRegs, s4_2Imm, 0b1100>;
// post increment doubleword loads with immediate offset
-let accessSize = DoubleWordAccess, opExtentAlign = 3, isCodeGenOnly = 0 in
+let accessSize = DoubleWordAccess, opExtentAlign = 3 in
defm loadrd : LD_PostInc <"memd", "LDrid", DoubleRegs, s4_3Imm, 0b1110>;
-def : Pat< (i32 (extloadi1 ADDRriS11_0:$addr)),
- (i32 (L2_loadrb_io AddrFI:$addr, 0)) >;
-
-// Load byte any-extend.
-def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)),
- (i32 (L2_loadrb_io AddrFI:$addr, 0)) >;
+// Rd=memb[u]h(Rx++#s4:1)
+// Rdd=memb[u]h(Rx++#s4:2)
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
+ def L2_loadbsw2_pi : T_load_pi <"membh", IntRegs, s4_1Imm, 0b0001>;
+ def L2_loadbzw2_pi : T_load_pi <"memubh", IntRegs, s4_1Imm, 0b0011>;
+}
+let accessSize = WordAccess, opExtentAlign = 2, hasNewValue = 0 in {
+ def L2_loadbsw4_pi : T_load_pi <"membh", DoubleRegs, s4_2Imm, 0b0111>;
+ def L2_loadbzw4_pi : T_load_pi <"memubh", DoubleRegs, s4_2Imm, 0b0101>;
+}
-// Indexed load byte any-extend.
-let AddedComplexity = 20 in
-def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))),
- (i32 (L2_loadrb_io IntRegs:$src1, s11_0ImmPred:$offset)) >;
+//===----------------------------------------------------------------------===//
+// Template class for post increment fifo loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, addrMode = PostInc in
+class T_loadalign_pi <string mnemonic, Operand ImmOp, bits<4> MajOp >
+ : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$dst2),
+ (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+ "$dst = "#mnemonic#"($src2++#$offset)" ,
+ [], "$src2 = $dst2, $src1 = $dst" > ,
+ PredNewRel {
+ bits<5> dst;
+ bits<5> src2;
+ bits<5> offset;
+ bits<4> offsetBits;
-def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)),
- (i32 (L2_loadrh_io AddrFI:$addr, 0))>;
+ let offsetBits = !if (!eq(!cast<string>(ImmOp), "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0});
+ let IClass = 0b1001;
-let AddedComplexity = 20 in
-def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))),
- (i32 (L2_loadrh_io IntRegs:$src1, s11_1ImmPred:$offset)) >;
+ let Inst{27-25} = 0b101;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13-12} = 0b00;
+ let Inst{8-5} = offsetBits;
+ let Inst{4-0} = dst;
+ }
-let AddedComplexity = 10 in
-def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)),
- (i32 (L2_loadrub_io AddrFI:$addr, 0))>;
+// Ryy=memh_fifo(Rx++#s4:1)
+// Ryy=memb_fifo(Rx++#s4:0)
+let accessSize = ByteAccess in
+def L2_loadalignb_pi : T_loadalign_pi <"memb_fifo", s4_0Imm, 0b0100>;
-let AddedComplexity = 20 in
-def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))),
- (i32 (L2_loadrub_io IntRegs:$src1, s11_0ImmPred:$offset))>;
+let accessSize = HalfWordAccess, opExtentAlign = 1 in
+def L2_loadalignh_pi : T_loadalign_pi <"memh_fifo", s4_1Imm, 0b0010>;
//===----------------------------------------------------------------------===//
// Template class for post increment loads with register offset.
@@ -1768,26 +1996,27 @@ class T_load_pr <string mnemonic, RegisterClass RC, bits<4> MajOp,
let Inst{4-0} = dst;
}
-let hasNewValue = 1, isCodeGenOnly = 0 in {
+let hasNewValue = 1 in {
def L2_loadrb_pr : T_load_pr <"memb", IntRegs, 0b1000, ByteAccess>;
def L2_loadrub_pr : T_load_pr <"memub", IntRegs, 0b1001, ByteAccess>;
def L2_loadrh_pr : T_load_pr <"memh", IntRegs, 0b1010, HalfWordAccess>;
def L2_loadruh_pr : T_load_pr <"memuh", IntRegs, 0b1011, HalfWordAccess>;
def L2_loadri_pr : T_load_pr <"memw", IntRegs, 0b1100, WordAccess>;
+
+ def L2_loadbzw2_pr : T_load_pr <"memubh", IntRegs, 0b0011, HalfWordAccess>;
}
-let isCodeGenOnly = 0 in
def L2_loadrd_pr : T_load_pr <"memd", DoubleRegs, 0b1110, DoubleWordAccess>;
+def L2_loadbzw4_pr : T_load_pr <"memubh", DoubleRegs, 0b0101, WordAccess>;
// Load predicate.
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
-isPseudo = 1, Defs = [R10,R11,D5], hasSideEffects = 0 in
-def LDriw_pred : LDInst2<(outs PredRegs:$dst),
- (ins MEMri:$addr),
- "Error; should not emit",
- []>;
+ isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def LDriw_pred : LDInst<(outs PredRegs:$dst),
+ (ins IntRegs:$addr, s11_2Ext:$off),
+ ".error \"should not emit\"", []>;
-let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0, isCodeGenOnly = 0 in
+let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0 in
def L2_deallocframe : LDInst<(outs), (ins),
"deallocframe",
[]> {
@@ -1799,7 +2028,7 @@ let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0, isCodeGenOnly = 0
}
// Load / Post increment circular addressing mode.
-let Uses = [CS], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+let Uses = [CS], hasSideEffects = 0 in
class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp>
: LDInst <(outs RC:$dst, IntRegs:$_dst_),
(ins IntRegs:$Rz, ModRegs:$Mu),
@@ -1809,6 +2038,7 @@ class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp>
bits<5> Rz;
bit Mu;
+ let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
let IClass = 0b1001;
let Inst{27-25} = 0b100;
@@ -1821,27 +2051,60 @@ class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp>
let Inst{4-0} = dst;
}
-let accessSize = ByteAccess, isCodeGenOnly = 0 in {
+let accessSize = ByteAccess in {
def L2_loadrb_pcr : T_load_pcr <"memb", IntRegs, 0b1000>;
def L2_loadrub_pcr : T_load_pcr <"memub", IntRegs, 0b1001>;
}
-let accessSize = HalfWordAccess, isCodeGenOnly = 0 in {
+let accessSize = HalfWordAccess in {
def L2_loadrh_pcr : T_load_pcr <"memh", IntRegs, 0b1010>;
def L2_loadruh_pcr : T_load_pcr <"memuh", IntRegs, 0b1011>;
+ def L2_loadbsw2_pcr : T_load_pcr <"membh", IntRegs, 0b0001>;
+ def L2_loadbzw2_pcr : T_load_pcr <"memubh", IntRegs, 0b0011>;
}
-let accessSize = WordAccess, isCodeGenOnly = 0 in {
+let accessSize = WordAccess in {
def L2_loadri_pcr : T_load_pcr <"memw", IntRegs, 0b1100>;
+ let hasNewValue = 0 in {
+ def L2_loadbzw4_pcr : T_load_pcr <"memubh", DoubleRegs, 0b0101>;
+ def L2_loadbsw4_pcr : T_load_pcr <"membh", DoubleRegs, 0b0111>;
+ }
}
-let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+let accessSize = DoubleWordAccess in
def L2_loadrd_pcr : T_load_pcr <"memd", DoubleRegs, 0b1110>;
+// Load / Post increment circular addressing mode.
+let Uses = [CS], hasSideEffects = 0 in
+class T_loadalign_pcr<string mnemonic, bits<4> MajOp, MemAccessSize AccessSz >
+ : LDInst <(outs DoubleRegs:$dst, IntRegs:$_dst_),
+ (ins DoubleRegs:$_src_, IntRegs:$Rz, ModRegs:$Mu),
+ "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [],
+ "$Rz = $_dst_, $dst = $_src_" > {
+ bits<5> dst;
+ bits<5> Rz;
+ bit Mu;
+
+ let accessSize = AccessSz;
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b100;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12} = 0b0;
+ let Inst{9} = 0b1;
+ let Inst{7} = 0b0;
+ let Inst{4-0} = dst;
+ }
+
+def L2_loadalignb_pcr : T_loadalign_pcr <"memb_fifo", 0b0100, ByteAccess>;
+def L2_loadalignh_pcr : T_loadalign_pcr <"memh_fifo", 0b0010, HalfWordAccess>;
+
//===----------------------------------------------------------------------===//
// Circular loads with immediate offset.
//===----------------------------------------------------------------------===//
-let Uses = [CS], mayLoad = 1, hasSideEffects = 0, hasNewValue = 1 in
+let Uses = [CS], mayLoad = 1, hasSideEffects = 0 in
class T_load_pci <string mnemonic, RegisterClass RC,
Operand ImmOp, bits<4> MajOp>
: LDInstPI<(outs RC:$dst, IntRegs:$_dst_),
@@ -1855,6 +2118,7 @@ class T_load_pci <string mnemonic, RegisterClass RC,
bits<4> offsetBits;
string ImmOpStr = !cast<string>(ImmOp);
+ let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
!if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
!if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
@@ -1871,24 +2135,62 @@ class T_load_pci <string mnemonic, RegisterClass RC,
}
// Byte variants of circ load
-let accessSize = ByteAccess, isCodeGenOnly = 0 in {
+let accessSize = ByteAccess in {
def L2_loadrb_pci : T_load_pci <"memb", IntRegs, s4_0Imm, 0b1000>;
def L2_loadrub_pci : T_load_pci <"memub", IntRegs, s4_0Imm, 0b1001>;
}
// Half word variants of circ load
-let accessSize = HalfWordAccess, isCodeGenOnly = 0 in {
+let accessSize = HalfWordAccess in {
def L2_loadrh_pci : T_load_pci <"memh", IntRegs, s4_1Imm, 0b1010>;
def L2_loadruh_pci : T_load_pci <"memuh", IntRegs, s4_1Imm, 0b1011>;
+ def L2_loadbzw2_pci : T_load_pci <"memubh", IntRegs, s4_1Imm, 0b0011>;
+ def L2_loadbsw2_pci : T_load_pci <"membh", IntRegs, s4_1Imm, 0b0001>;
}
// Word variants of circ load
-let accessSize = WordAccess, isCodeGenOnly = 0 in
+let accessSize = WordAccess in
def L2_loadri_pci : T_load_pci <"memw", IntRegs, s4_2Imm, 0b1100>;
-let accessSize = DoubleWordAccess, hasNewValue = 0, isCodeGenOnly = 0 in
+let accessSize = WordAccess, hasNewValue = 0 in {
+ def L2_loadbzw4_pci : T_load_pci <"memubh", DoubleRegs, s4_2Imm, 0b0101>;
+ def L2_loadbsw4_pci : T_load_pci <"membh", DoubleRegs, s4_2Imm, 0b0111>;
+}
+
+let accessSize = DoubleWordAccess, hasNewValue = 0 in
def L2_loadrd_pci : T_load_pci <"memd", DoubleRegs, s4_3Imm, 0b1110>;
+//===----------------------------------------------------------------------===//
+// Circular loads - Pseudo
+//
+// Please note that the input operand order in the pseudo instructions
+// doesn't match with the real instructions. Pseudo instructions operand
+// order should mimics the ordering in the intrinsics. Also, 'src2' doesn't
+// appear in the AsmString because it's same as 'dst'.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_load_pci_pseudo <string opc, RegisterClass RC>
+ : LDInstPI<(outs IntRegs:$_dst_, RC:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4Imm:$src4),
+ ".error \"$dst = "#opc#"($src1++#$src4:circ($src3))\"",
+ [], "$src1 = $_dst_">;
+
+def L2_loadrb_pci_pseudo : T_load_pci_pseudo <"memb", IntRegs>;
+def L2_loadrub_pci_pseudo : T_load_pci_pseudo <"memub", IntRegs>;
+def L2_loadrh_pci_pseudo : T_load_pci_pseudo <"memh", IntRegs>;
+def L2_loadruh_pci_pseudo : T_load_pci_pseudo <"memuh", IntRegs>;
+def L2_loadri_pci_pseudo : T_load_pci_pseudo <"memw", IntRegs>;
+def L2_loadrd_pci_pseudo : T_load_pci_pseudo <"memd", DoubleRegs>;
+
+
+// TODO: memb_fifo and memh_fifo must take destination register as input.
+// One-off circ loads - not enough in common to break into a class.
+let accessSize = ByteAccess in
+def L2_loadalignb_pci : T_load_pci <"memb_fifo", DoubleRegs, s4_0Imm, 0b0100>;
+
+let accessSize = HalfWordAccess, opExtentAlign = 1 in
+def L2_loadalignh_pci : T_load_pci <"memh_fifo", DoubleRegs, s4_1Imm, 0b0010>;
+
// L[24]_load[wd]_locked: Load word/double with lock.
let isSoloAX = 1 in
class T_load_locked <string mnemonic, RegisterClass RC>
@@ -1901,12 +2203,38 @@ class T_load_locked <string mnemonic, RegisterClass RC>
let Inst{27-21} = 0b0010000;
let Inst{20-16} = src;
let Inst{13-12} = !if (!eq(mnemonic, "memd_locked"), 0b01, 0b00);
+ let Inst{5} = 0;
let Inst{4-0} = dst;
}
-let hasNewValue = 1, accessSize = WordAccess, opNewValue = 0, isCodeGenOnly = 0 in
+let hasNewValue = 1, accessSize = WordAccess, opNewValue = 0 in
def L2_loadw_locked : T_load_locked <"memw_locked", IntRegs>;
-let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+let accessSize = DoubleWordAccess in
def L4_loadd_locked : T_load_locked <"memd_locked", DoubleRegs>;
+
+// S[24]_store[wd]_locked: Store word/double conditionally.
+let isSoloAX = 1, isPredicateLate = 1 in
+class T_store_locked <string mnemonic, RegisterClass RC>
+ : ST0Inst <(outs PredRegs:$Pd), (ins IntRegs:$Rs, RC:$Rt),
+ mnemonic#"($Rs, $Pd) = $Rt"> {
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1010;
+ let Inst{27-23} = 0b00001;
+ let Inst{22} = !if (!eq(mnemonic, "memw_locked"), 0b0, 0b1);
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{1-0} = Pd;
+}
+
+let accessSize = WordAccess in
+def S2_storew_locked : T_store_locked <"memw_locked", IntRegs>;
+
+let accessSize = DoubleWordAccess in
+def S4_stored_locked : T_store_locked <"memd_locked", DoubleRegs>;
+
//===----------------------------------------------------------------------===//
// Bit-reversed loads with auto-increment register
//===----------------------------------------------------------------------===//
@@ -1936,17 +2264,44 @@ class T_load_pbr<string mnemonic, RegisterClass RC,
let Inst{4-0} = dst;
}
-let hasNewValue =1, opNewValue = 0, isCodeGenOnly = 0 in {
+let hasNewValue =1, opNewValue = 0 in {
def L2_loadrb_pbr : T_load_pbr <"memb", IntRegs, ByteAccess, 0b1000>;
def L2_loadrub_pbr : T_load_pbr <"memub", IntRegs, ByteAccess, 0b1001>;
def L2_loadrh_pbr : T_load_pbr <"memh", IntRegs, HalfWordAccess, 0b1010>;
def L2_loadruh_pbr : T_load_pbr <"memuh", IntRegs, HalfWordAccess, 0b1011>;
+ def L2_loadbsw2_pbr : T_load_pbr <"membh", IntRegs, HalfWordAccess, 0b0001>;
+ def L2_loadbzw2_pbr : T_load_pbr <"memubh", IntRegs, HalfWordAccess, 0b0011>;
def L2_loadri_pbr : T_load_pbr <"memw", IntRegs, WordAccess, 0b1100>;
}
-let isCodeGenOnly = 0 in
+def L2_loadbzw4_pbr : T_load_pbr <"memubh", DoubleRegs, WordAccess, 0b0101>;
+def L2_loadbsw4_pbr : T_load_pbr <"membh", DoubleRegs, WordAccess, 0b0111>;
def L2_loadrd_pbr : T_load_pbr <"memd", DoubleRegs, DoubleWordAccess, 0b1110>;
+def L2_loadalignb_pbr :T_load_pbr <"memb_fifo", DoubleRegs, ByteAccess, 0b0100>;
+def L2_loadalignh_pbr :T_load_pbr <"memh_fifo", DoubleRegs,
+ HalfWordAccess, 0b0010>;
+
+//===----------------------------------------------------------------------===//
+// Bit-reversed loads - Pseudo
+//
+// Please note that 'src2' doesn't appear in the AsmString because
+// it's same as 'dst'.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_load_pbr_pseudo <string opc, RegisterClass RC>
+ : LDInstPI<(outs IntRegs:$_dst_, RC:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ ".error \"$dst = "#opc#"($src1++$src3:brev)\"",
+ [], "$src1 = $_dst_">;
+
+def L2_loadrb_pbr_pseudo : T_load_pbr_pseudo <"memb", IntRegs>;
+def L2_loadrub_pbr_pseudo : T_load_pbr_pseudo <"memub", IntRegs>;
+def L2_loadrh_pbr_pseudo : T_load_pbr_pseudo <"memh", IntRegs>;
+def L2_loadruh_pbr_pseudo : T_load_pbr_pseudo <"memuh", IntRegs>;
+def L2_loadri_pbr_pseudo : T_load_pbr_pseudo <"memw", IntRegs>;
+def L2_loadrd_pbr_pseudo : T_load_pbr_pseudo <"memd", DoubleRegs>;
+
//===----------------------------------------------------------------------===//
// LD -
//===----------------------------------------------------------------------===//
@@ -2003,7 +2358,6 @@ class T_M2_mpy < bits<2> LHbits, bit isSat, bit isRnd,
}
//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]
-let isCodeGenOnly = 0 in {
def M2_mpy_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 0>;
def M2_mpy_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 0>;
def M2_mpy_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 0>;
@@ -2012,10 +2366,8 @@ def M2_mpy_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 0>;
def M2_mpy_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 0>;
def M2_mpy_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 0>;
def M2_mpy_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 0>;
-}
//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
-let isCodeGenOnly = 0 in {
def M2_mpyu_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 1>;
def M2_mpyu_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 1>;
def M2_mpyu_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 1>;
@@ -2024,10 +2376,8 @@ def M2_mpyu_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 1>;
def M2_mpyu_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 1>;
def M2_mpyu_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 1>;
def M2_mpyu_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 1>;
-}
//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]:rnd
-let isCodeGenOnly = 0 in {
def M2_mpy_rnd_ll_s1: T_M2_mpy <0b00, 0, 1, 1, 0>;
def M2_mpy_rnd_ll_s0: T_M2_mpy <0b00, 0, 1, 0, 0>;
def M2_mpy_rnd_lh_s1: T_M2_mpy <0b01, 0, 1, 1, 0>;
@@ -2036,11 +2386,10 @@ def M2_mpy_rnd_hl_s1: T_M2_mpy <0b10, 0, 1, 1, 0>;
def M2_mpy_rnd_hl_s0: T_M2_mpy <0b10, 0, 1, 0, 0>;
def M2_mpy_rnd_hh_s1: T_M2_mpy <0b11, 0, 1, 1, 0>;
def M2_mpy_rnd_hh_s0: T_M2_mpy <0b11, 0, 1, 0, 0>;
-}
//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:sat]
//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
-let Defs = [USR_OVF], isCodeGenOnly = 0 in {
+let Defs = [USR_OVF] in {
def M2_mpy_sat_ll_s1: T_M2_mpy <0b00, 1, 0, 1, 0>;
def M2_mpy_sat_ll_s0: T_M2_mpy <0b00, 1, 0, 0, 0>;
def M2_mpy_sat_lh_s1: T_M2_mpy <0b01, 1, 0, 1, 0>;
@@ -2094,7 +2443,6 @@ class T_M2_mpy_acc < bits<2> LHbits, bit isSat, bit isNac,
}
//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]
-let isCodeGenOnly = 0 in {
def M2_mpy_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 0>;
def M2_mpy_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 0>;
def M2_mpy_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 0>;
@@ -2103,10 +2451,8 @@ def M2_mpy_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 0>;
def M2_mpy_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 0>;
def M2_mpy_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 0>;
def M2_mpy_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 0>;
-}
//Rx += mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
-let isCodeGenOnly = 0 in {
def M2_mpyu_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 1>;
def M2_mpyu_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 1>;
def M2_mpyu_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 1>;
@@ -2115,10 +2461,8 @@ def M2_mpyu_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 1>;
def M2_mpyu_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 1>;
def M2_mpyu_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 1>;
def M2_mpyu_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 1>;
-}
//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]
-let isCodeGenOnly = 0 in {
def M2_mpy_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 0>;
def M2_mpy_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 0>;
def M2_mpy_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 0>;
@@ -2127,10 +2471,8 @@ def M2_mpy_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 0>;
def M2_mpy_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 0>;
def M2_mpy_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 0>;
def M2_mpy_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 0>;
-}
//Rx -= mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
-let isCodeGenOnly = 0 in {
def M2_mpyu_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 1>;
def M2_mpyu_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 1>;
def M2_mpyu_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 1>;
@@ -2139,10 +2481,8 @@ def M2_mpyu_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 1>;
def M2_mpyu_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 1>;
def M2_mpyu_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 1>;
def M2_mpyu_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 1>;
-}
//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat
-let isCodeGenOnly = 0 in {
def M2_mpy_acc_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 0, 1, 0>;
def M2_mpy_acc_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 0, 0, 0>;
def M2_mpy_acc_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 0, 1, 0>;
@@ -2151,10 +2491,8 @@ def M2_mpy_acc_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 0, 1, 0>;
def M2_mpy_acc_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 0, 0, 0>;
def M2_mpy_acc_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 0, 1, 0>;
def M2_mpy_acc_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 0, 0, 0>;
-}
//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat
-let isCodeGenOnly = 0 in {
def M2_mpy_nac_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 1, 1, 0>;
def M2_mpy_nac_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 1, 0, 0>;
def M2_mpy_nac_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 1, 1, 0>;
@@ -2163,7 +2501,6 @@ def M2_mpy_nac_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 1, 1, 0>;
def M2_mpy_nac_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 1, 0, 0>;
def M2_mpy_nac_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 1, 1, 0>;
def M2_mpy_nac_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 1, 0, 0>;
-}
//===----------------------------------------------------------------------===//
// Template Class
@@ -2197,7 +2534,6 @@ class T_M2_mpyd_acc < bits<2> LHbits, bit isNac, bit hasShift, bit isUnsigned>
let Inst{12-8} = Rt;
}
-let isCodeGenOnly = 0 in {
def M2_mpyd_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 0>;
def M2_mpyd_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 0>;
def M2_mpyd_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 0>;
@@ -2237,6 +2573,72 @@ def M2_mpyud_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 1>;
def M2_mpyud_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 1>;
def M2_mpyud_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 1>;
def M2_mpyud_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 1>;
+
+//===----------------------------------------------------------------------===//
+// Template Class -- Vector Multipy
+// Used for complex multiply real or imaginary, dual multiply and even halfwords
+//===----------------------------------------------------------------------===//
+class T_M2_vmpy < string opc, bits<3> MajOp, bits<3> MinOp, bit hasShift,
+ bit isRnd, bit isSat >
+ : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rdd = "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
+ #!if(isRnd,":rnd","")
+ #!if(isSat,":sat",""),
+ [] > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1000;
+ let Inst{23-21} = MajOp;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rdd;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat
+let Defs = [USR_OVF] in {
+def M2_vcmpy_s1_sat_i: T_M2_vmpy <"vcmpyi", 0b110, 0b110, 1, 0, 1>;
+def M2_vcmpy_s0_sat_i: T_M2_vmpy <"vcmpyi", 0b010, 0b110, 0, 0, 1>;
+
+// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat
+def M2_vcmpy_s1_sat_r: T_M2_vmpy <"vcmpyr", 0b101, 0b110, 1, 0, 1>;
+def M2_vcmpy_s0_sat_r: T_M2_vmpy <"vcmpyr", 0b001, 0b110, 0, 0, 1>;
+
+// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat
+def M2_vdmpys_s1: T_M2_vmpy <"vdmpy", 0b100, 0b100, 1, 0, 1>;
+def M2_vdmpys_s0: T_M2_vmpy <"vdmpy", 0b000, 0b100, 0, 0, 1>;
+
+// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat
+def M2_vmpy2es_s1: T_M2_vmpy <"vmpyeh", 0b100, 0b110, 1, 0, 1>;
+def M2_vmpy2es_s0: T_M2_vmpy <"vmpyeh", 0b000, 0b110, 0, 0, 1>;
+
+//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyh_s0: T_M2_vmpy <"vmpywoh", 0b000, 0b111, 0, 0, 1>;
+def M2_mmpyh_s1: T_M2_vmpy <"vmpywoh", 0b100, 0b111, 1, 0, 1>;
+def M2_mmpyh_rs0: T_M2_vmpy <"vmpywoh", 0b001, 0b111, 0, 1, 1>;
+def M2_mmpyh_rs1: T_M2_vmpy <"vmpywoh", 0b101, 0b111, 1, 1, 1>;
+
+//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyl_s0: T_M2_vmpy <"vmpyweh", 0b000, 0b101, 0, 0, 1>;
+def M2_mmpyl_s1: T_M2_vmpy <"vmpyweh", 0b100, 0b101, 1, 0, 1>;
+def M2_mmpyl_rs0: T_M2_vmpy <"vmpyweh", 0b001, 0b101, 0, 1, 1>;
+def M2_mmpyl_rs1: T_M2_vmpy <"vmpyweh", 0b101, 0b101, 1, 1, 1>;
+
+//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyuh_s0: T_M2_vmpy <"vmpywouh", 0b010, 0b111, 0, 0, 1>;
+def M2_mmpyuh_s1: T_M2_vmpy <"vmpywouh", 0b110, 0b111, 1, 0, 1>;
+def M2_mmpyuh_rs0: T_M2_vmpy <"vmpywouh", 0b011, 0b111, 0, 1, 1>;
+def M2_mmpyuh_rs1: T_M2_vmpy <"vmpywouh", 0b111, 0b111, 1, 1, 1>;
+
+//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyul_s0: T_M2_vmpy <"vmpyweuh", 0b010, 0b101, 0, 0, 1>;
+def M2_mmpyul_s1: T_M2_vmpy <"vmpyweuh", 0b110, 0b101, 1, 0, 1>;
+def M2_mmpyul_rs0: T_M2_vmpy <"vmpyweuh", 0b011, 0b101, 0, 1, 1>;
+def M2_mmpyul_rs1: T_M2_vmpy <"vmpyweuh", 0b111, 0b101, 1, 1, 1>;
}
let hasNewValue = 1, opNewValue = 0 in
@@ -2265,6 +2667,9 @@ class T_MType_mpy <string mnemonic, bits<4> RegTyBits, RegisterClass RC,
let Inst{4-0} = dst;
}
+class T_MType_vrcmpy <string mnemonic, bits<3> MajOp, bits<3> MinOp, bit isHi>
+ : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, 1, 1, "", 1, isHi>;
+
class T_MType_dd <string mnemonic, bits<3> MajOp, bits<3> MinOp,
bit isSat = 0, bit isRnd = 0 >
: T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, isSat, isRnd>;
@@ -2277,30 +2682,37 @@ class T_MType_rr2 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
bit isSat = 0, bit isRnd = 0, string op2str = "" >
: T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd, op2str>;
-let CextOpcode = "mpyi", InputType = "reg", isCodeGenOnly = 0 in
+def M2_vradduh : T_MType_dd <"vradduh", 0b000, 0b001, 0, 0>;
+def M2_vdmpyrs_s0 : T_MType_dd <"vdmpy", 0b000, 0b000, 1, 1>;
+def M2_vdmpyrs_s1 : T_MType_dd <"vdmpy", 0b100, 0b000, 1, 1>;
+
+let CextOpcode = "mpyi", InputType = "reg" in
def M2_mpyi : T_MType_rr1 <"mpyi", 0b000, 0b000>, ImmRegRel;
-let isCodeGenOnly = 0 in {
def M2_mpy_up : T_MType_rr1 <"mpy", 0b000, 0b001>;
def M2_mpyu_up : T_MType_rr1 <"mpyu", 0b010, 0b001>;
-}
-let isCodeGenOnly = 0 in
def M2_dpmpyss_rnd_s0 : T_MType_rr1 <"mpy", 0b001, 0b001, 0, 1>;
-let isCodeGenOnly = 0 in {
+def M2_vmpy2s_s0pack : T_MType_rr1 <"vmpyh", 0b001, 0b111, 1, 1>;
+def M2_vmpy2s_s1pack : T_MType_rr1 <"vmpyh", 0b101, 0b111, 1, 1>;
+
def M2_hmmpyh_rs1 : T_MType_rr2 <"mpy", 0b101, 0b100, 1, 1, ".h">;
def M2_hmmpyl_rs1 : T_MType_rr2 <"mpy", 0b111, 0b100, 1, 1, ".l">;
-}
+
+def M2_cmpyrs_s0 : T_MType_rr2 <"cmpy", 0b001, 0b110, 1, 1>;
+def M2_cmpyrs_s1 : T_MType_rr2 <"cmpy", 0b101, 0b110, 1, 1>;
+def M2_cmpyrsc_s0 : T_MType_rr2 <"cmpy", 0b011, 0b110, 1, 1, "*">;
+def M2_cmpyrsc_s1 : T_MType_rr2 <"cmpy", 0b111, 0b110, 1, 1, "*">;
// V4 Instructions
-let isCodeGenOnly = 0 in {
+def M2_vraddh : T_MType_dd <"vraddh", 0b001, 0b111, 0>;
def M2_mpysu_up : T_MType_rr1 <"mpysu", 0b011, 0b001, 0>;
+def M2_mpy_up_s1 : T_MType_rr1 <"mpy", 0b101, 0b010, 0>;
def M2_mpy_up_s1_sat : T_MType_rr1 <"mpy", 0b111, 0b000, 1>;
def M2_hmmpyh_s1 : T_MType_rr2 <"mpy", 0b101, 0b000, 1, 0, ".h">;
def M2_hmmpyl_s1 : T_MType_rr2 <"mpy", 0b101, 0b001, 1, 0, ".l">;
-}
def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>;
def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>;
@@ -2325,11 +2737,10 @@ class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern>
let Inst{12-5} = u8;
}
-let isExtendable = 1, opExtentBits = 8, opExtendable = 2, isCodeGenOnly = 0 in
+let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in
def M2_mpysip : T_MType_mpy_ri <0, u8Ext,
- [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u8ExtPred:$u8))]>;
+ [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u32ImmPred:$u8))]>;
-let isCodeGenOnly = 0 in
def M2_mpysin : T_MType_mpy_ri <1, u8Imm,
[(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs,
u8ImmPred:$u8)))]>;
@@ -2345,11 +2756,12 @@ def M2_mpyui : MInst<(outs IntRegs:$dst),
// Assembler maps to either Rd=+mpyi(Rs,#u8) or Rd=-mpyi(Rs,#u8)
// depending on the value of m9. See Arch Spec.
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9,
- CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1 in
+ CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1,
+ isAsmParserOnly = 1 in
def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2),
"$dst = mpyi($src1, #$src2)",
[(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- s9ExtPred:$src2))]>, ImmRegRel;
+ s32ImmPred:$src2))]>, ImmRegRel;
let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3,
InputType = "imm" in
@@ -2397,10 +2809,10 @@ class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp,
let Inst{4-0} = dst;
}
-let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in {
+let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in {
def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8Ext,
[(set (i32 IntRegs:$dst),
- (add (mul IntRegs:$src2, u8ExtPred:$src3),
+ (add (mul IntRegs:$src2, u32ImmPred:$src3),
IntRegs:$src1))]>, ImmRegRel;
def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0,
@@ -2409,11 +2821,11 @@ let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in {
IntRegs:$src1))]>, ImmRegRel;
}
-let CextOpcode = "ADD_acc", isCodeGenOnly = 0 in {
+let CextOpcode = "ADD_acc" in {
let isExtentSigned = 1 in
def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext,
[(set (i32 IntRegs:$dst),
- (add (add (i32 IntRegs:$src2), s8_16ExtPred:$src3),
+ (add (add (i32 IntRegs:$src2), s16_16ImmPred:$src3),
(i32 IntRegs:$src1)))]>, ImmRegRel;
def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0,
@@ -2422,20 +2834,18 @@ let CextOpcode = "ADD_acc", isCodeGenOnly = 0 in {
(i32 IntRegs:$src1)))]>, ImmRegRel;
}
-let CextOpcode = "SUB_acc", isCodeGenOnly = 0 in {
+let CextOpcode = "SUB_acc" in {
let isExtentSigned = 1 in
def M2_naccii : T_MType_acc_ri <"-= add", 0b101, s8Ext>, ImmRegRel;
def M2_nacci : T_MType_acc_rr <"-= add", 0b100, 0b001, 0>, ImmRegRel;
}
-let Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in
+let Itinerary = M_tc_3x_SLOT23 in
def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8Ext>;
-let isCodeGenOnly = 0 in {
def M2_xor_xacc : T_MType_acc_rr < "^= xor", 0b100, 0b011, 0>;
def M2_subacc : T_MType_acc_rr <"+= sub", 0b000, 0b011, 1>;
-}
class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp,
PatLeaf ImmPred>
@@ -2447,10 +2857,187 @@ class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
(MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
-def : T_MType_acc_pat1 <M2_macsin, mul, sub, u8ExtPred>;
+def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>;
-def : T_MType_acc_pat1 <M2_naccii, add, sub, s8_16ExtPred>;
+def : T_MType_acc_pat1 <M2_naccii, add, sub, s16_16ImmPred>;
def : T_MType_acc_pat2 <M2_nacci, add, sub>;
+
+//===----------------------------------------------------------------------===//
+// Template Class -- XType Vector Instructions
+//===----------------------------------------------------------------------===//
+class T_XTYPE_Vect < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj >
+ : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rdd = "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"),
+ [] > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1000;
+ let Inst{23-21} = MajOp;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rdd;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+class T_XTYPE_Vect_acc < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj >
+ : MInst <(outs DoubleRegs:$Rdd),
+ (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rdd += "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"),
+ [], "$dst2 = $Rdd",M_tc_3x_SLOT23 > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1010;
+ let Inst{23-21} = MajOp;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rdd;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+class T_XTYPE_Vect_diff < bits<3> MajOp, string opc >
+ : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rtt, DoubleRegs:$Rss),
+ "$Rdd = "#opc#"($Rtt, $Rss)",
+ [], "",M_tc_2_SLOT23 > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1000;
+ let Inst{23-21} = MajOp;
+ let Inst{7-5} = 0b000;
+ let Inst{4-0} = Rdd;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+// Vector reduce add unsigned bytes: Rdd32=vrmpybu(Rss32,Rtt32)
+def A2_vraddub: T_XTYPE_Vect <"vraddub", 0b010, 0b001, 0>;
+def A2_vraddub_acc: T_XTYPE_Vect_acc <"vraddub", 0b010, 0b001, 0>;
+
+// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt)
+def A2_vrsadub: T_XTYPE_Vect <"vrsadub", 0b010, 0b010, 0>;
+def A2_vrsadub_acc: T_XTYPE_Vect_acc <"vrsadub", 0b010, 0b010, 0>;
+
+// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss)
+def M2_vabsdiffh: T_XTYPE_Vect_diff<0b011, "vabsdiffh">;
+
+// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss)
+def M2_vabsdiffw: T_XTYPE_Vect_diff<0b001, "vabsdiffw">;
+
+// Vector reduce complex multiply real or imaginary:
+// Rdd[+]=vrcmpy[ir](Rss,Rtt[*])
+def M2_vrcmpyi_s0: T_XTYPE_Vect <"vrcmpyi", 0b000, 0b000, 0>;
+def M2_vrcmpyi_s0c: T_XTYPE_Vect <"vrcmpyi", 0b010, 0b000, 1>;
+def M2_vrcmaci_s0: T_XTYPE_Vect_acc <"vrcmpyi", 0b000, 0b000, 0>;
+def M2_vrcmaci_s0c: T_XTYPE_Vect_acc <"vrcmpyi", 0b010, 0b000, 1>;
+
+def M2_vrcmpyr_s0: T_XTYPE_Vect <"vrcmpyr", 0b000, 0b001, 0>;
+def M2_vrcmpyr_s0c: T_XTYPE_Vect <"vrcmpyr", 0b011, 0b001, 1>;
+def M2_vrcmacr_s0: T_XTYPE_Vect_acc <"vrcmpyr", 0b000, 0b001, 0>;
+def M2_vrcmacr_s0c: T_XTYPE_Vect_acc <"vrcmpyr", 0b011, 0b001, 1>;
+
+// Vector reduce halfwords:
+// Rdd[+]=vrmpyh(Rss,Rtt)
+def M2_vrmpy_s0: T_XTYPE_Vect <"vrmpyh", 0b000, 0b010, 0>;
+def M2_vrmac_s0: T_XTYPE_Vect_acc <"vrmpyh", 0b000, 0b010, 0>;
+
+//===----------------------------------------------------------------------===//
+// Template Class -- Vector Multipy with accumulation.
+// Used for complex multiply real or imaginary, dual multiply and even halfwords
+//===----------------------------------------------------------------------===//
+let Defs = [USR_OVF] in
+class T_M2_vmpy_acc_sat < string opc, bits<3> MajOp, bits<3> MinOp,
+ bit hasShift, bit isRnd >
+ : MInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
+ #!if(isRnd,":rnd","")#":sat",
+ [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1010;
+ let Inst{23-21} = MajOp;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rxx;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+class T_M2_vmpy_acc < string opc, bits<3> MajOp, bits<3> MinOp,
+ bit hasShift, bit isRnd >
+ : MInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
+ #!if(isRnd,":rnd",""),
+ [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1010;
+ let Inst{23-21} = MajOp;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rxx;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+// Vector multiply word by signed half with accumulation
+// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmacls_s1: T_M2_vmpy_acc_sat <"vmpyweh", 0b100, 0b101, 1, 0>;
+def M2_mmacls_s0: T_M2_vmpy_acc_sat <"vmpyweh", 0b000, 0b101, 0, 0>;
+def M2_mmacls_rs1: T_M2_vmpy_acc_sat <"vmpyweh", 0b101, 0b101, 1, 1>;
+def M2_mmacls_rs0: T_M2_vmpy_acc_sat <"vmpyweh", 0b001, 0b101, 0, 1>;
+
+def M2_mmachs_s1: T_M2_vmpy_acc_sat <"vmpywoh", 0b100, 0b111, 1, 0>;
+def M2_mmachs_s0: T_M2_vmpy_acc_sat <"vmpywoh", 0b000, 0b111, 0, 0>;
+def M2_mmachs_rs1: T_M2_vmpy_acc_sat <"vmpywoh", 0b101, 0b111, 1, 1>;
+def M2_mmachs_rs0: T_M2_vmpy_acc_sat <"vmpywoh", 0b001, 0b111, 0, 1>;
+
+// Vector multiply word by unsigned half with accumulation
+// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmaculs_s1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b110, 0b101, 1, 0>;
+def M2_mmaculs_s0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b010, 0b101, 0, 0>;
+def M2_mmaculs_rs1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b111, 0b101, 1, 1>;
+def M2_mmaculs_rs0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b011, 0b101, 0, 1>;
+
+def M2_mmacuhs_s1: T_M2_vmpy_acc_sat <"vmpywouh", 0b110, 0b111, 1, 0>;
+def M2_mmacuhs_s0: T_M2_vmpy_acc_sat <"vmpywouh", 0b010, 0b111, 0, 0>;
+def M2_mmacuhs_rs1: T_M2_vmpy_acc_sat <"vmpywouh", 0b111, 0b111, 1, 1>;
+def M2_mmacuhs_rs0: T_M2_vmpy_acc_sat <"vmpywouh", 0b011, 0b111, 0, 1>;
+
+// Vector multiply even halfwords with accumulation
+// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat]
+def M2_vmac2es: T_M2_vmpy_acc <"vmpyeh", 0b001, 0b010, 0, 0>;
+def M2_vmac2es_s1: T_M2_vmpy_acc_sat <"vmpyeh", 0b100, 0b110, 1, 0>;
+def M2_vmac2es_s0: T_M2_vmpy_acc_sat <"vmpyeh", 0b000, 0b110, 0, 0>;
+
+// Vector dual multiply with accumulation
+// Rxx+=vdmpy(Rss,Rtt)[:sat]
+def M2_vdmacs_s1: T_M2_vmpy_acc_sat <"vdmpy", 0b100, 0b100, 1, 0>;
+def M2_vdmacs_s0: T_M2_vmpy_acc_sat <"vdmpy", 0b000, 0b100, 0, 0>;
+
+// Vector complex multiply real or imaginary with accumulation
+// Rxx+=vcmpy[ir](Rss,Rtt):sat
+def M2_vcmac_s0_sat_r: T_M2_vmpy_acc_sat <"vcmpyr", 0b001, 0b100, 0, 0>;
+def M2_vcmac_s0_sat_i: T_M2_vmpy_acc_sat <"vcmpyi", 0b010, 0b100, 0, 0>;
+
//===----------------------------------------------------------------------===//
// Template Class -- Multiply signed/unsigned halfwords with and without
// saturation and rounding
@@ -2478,7 +3065,6 @@ class T_M2_mpyd < bits<2> LHbits, bit isRnd, bit hasShift, bit isUnsigned >
let Inst{12-8} = Rt;
}
-let isCodeGenOnly = 0 in {
def M2_mpyd_hh_s0: T_M2_mpyd<0b11, 0, 0, 0>;
def M2_mpyd_hl_s0: T_M2_mpyd<0b10, 0, 0, 0>;
def M2_mpyd_lh_s0: T_M2_mpyd<0b01, 0, 0, 0>;
@@ -2509,7 +3095,7 @@ def M2_mpyud_hh_s1: T_M2_mpyd<0b11, 0, 1, 1>;
def M2_mpyud_hl_s1: T_M2_mpyd<0b10, 0, 1, 1>;
def M2_mpyud_lh_s1: T_M2_mpyd<0b01, 0, 1, 1>;
def M2_mpyud_ll_s1: T_M2_mpyd<0b00, 0, 1, 1>;
-}
+
//===----------------------------------------------------------------------===//
// Template Class for xtype mpy:
// Vector multiply
@@ -2570,7 +3156,6 @@ class T_XTYPE_mpy64_acc <string op1, string op2, bits<3> MajOp, bits<3> MinOp,
// MPY - Multiply and use full result
// Rdd = mpy[u](Rs,Rt)
-let isCodeGenOnly = 0 in {
def M2_dpmpyss_s0 : T_XTYPE_mpy64 < "mpy", 0b000, 0b000, 0, 0, 0>;
def M2_dpmpyuu_s0 : T_XTYPE_mpy64 < "mpyu", 0b010, 0b000, 0, 0, 0>;
@@ -2579,7 +3164,48 @@ def M2_dpmpyss_acc_s0 : T_XTYPE_mpy64_acc < "mpy", "+", 0b000, 0b000, 0, 0, 0>;
def M2_dpmpyss_nac_s0 : T_XTYPE_mpy64_acc < "mpy", "-", 0b001, 0b000, 0, 0, 0>;
def M2_dpmpyuu_acc_s0 : T_XTYPE_mpy64_acc < "mpyu", "+", 0b010, 0b000, 0, 0, 0>;
def M2_dpmpyuu_nac_s0 : T_XTYPE_mpy64_acc < "mpyu", "-", 0b011, 0b000, 0, 0, 0>;
-}
+
+// Complex multiply real or imaginary
+// Rxx=cmpy[ir](Rs,Rt)
+def M2_cmpyi_s0 : T_XTYPE_mpy64 < "cmpyi", 0b000, 0b001, 0, 0, 0>;
+def M2_cmpyr_s0 : T_XTYPE_mpy64 < "cmpyr", 0b000, 0b010, 0, 0, 0>;
+
+// Rxx+=cmpy[ir](Rs,Rt)
+def M2_cmaci_s0 : T_XTYPE_mpy64_acc < "cmpyi", "+", 0b000, 0b001, 0, 0, 0>;
+def M2_cmacr_s0 : T_XTYPE_mpy64_acc < "cmpyr", "+", 0b000, 0b010, 0, 0, 0>;
+
+// Complex multiply
+// Rdd=cmpy(Rs,Rt)[:<<]:sat
+def M2_cmpys_s0 : T_XTYPE_mpy64 < "cmpy", 0b000, 0b110, 1, 0, 0>;
+def M2_cmpys_s1 : T_XTYPE_mpy64 < "cmpy", 0b100, 0b110, 1, 1, 0>;
+
+// Rdd=cmpy(Rs,Rt*)[:<<]:sat
+def M2_cmpysc_s0 : T_XTYPE_mpy64 < "cmpy", 0b010, 0b110, 1, 0, 1>;
+def M2_cmpysc_s1 : T_XTYPE_mpy64 < "cmpy", 0b110, 0b110, 1, 1, 1>;
+
+// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat
+def M2_cmacs_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b000, 0b110, 1, 0, 0>;
+def M2_cnacs_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b000, 0b111, 1, 0, 0>;
+def M2_cmacs_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b100, 0b110, 1, 1, 0>;
+def M2_cnacs_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b100, 0b111, 1, 1, 0>;
+
+// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat
+def M2_cmacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b010, 0b110, 1, 0, 1>;
+def M2_cnacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b010, 0b111, 1, 0, 1>;
+def M2_cmacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b110, 0b110, 1, 1, 1>;
+def M2_cnacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b110, 0b111, 1, 1, 1>;
+
+// Vector multiply halfwords
+// Rdd=vmpyh(Rs,Rt)[:<<]:sat
+//let Defs = [USR_OVF] in {
+ def M2_vmpy2s_s1 : T_XTYPE_mpy64 < "vmpyh", 0b100, 0b101, 1, 1, 0>;
+ def M2_vmpy2s_s0 : T_XTYPE_mpy64 < "vmpyh", 0b000, 0b101, 1, 0, 0>;
+//}
+
+// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat]
+def M2_vmac2 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b001, 0b001, 0, 0, 0>;
+def M2_vmac2s_s1 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b100, 0b101, 1, 1, 0>;
+def M2_vmac2s_s0 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b000, 0b101, 1, 0, 0>;
def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))),
(i64 (anyext (i32 IntRegs:$src2))))),
@@ -2750,35 +3376,30 @@ multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
}
}
-let accessSize = ByteAccess, isCodeGenOnly = 0 in
+let accessSize = ByteAccess in
defm storerb: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm, 0b1000>;
-let accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+let accessSize = HalfWordAccess in
defm storerh: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm, 0b1010>;
-let accessSize = WordAccess, isCodeGenOnly = 0 in
+let accessSize = WordAccess in
defm storeri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm, 0b1100>;
-let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+let accessSize = DoubleWordAccess in
defm storerd: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm, 0b1110>;
-let accessSize = HalfWordAccess, isNVStorable = 0, isCodeGenOnly = 0 in
+let accessSize = HalfWordAccess, isNVStorable = 0 in
defm storerf: ST_PostInc <"memh", "STrih_H", IntRegs, s4_1Imm, 0b1011, 1>;
-def : Pat<(post_truncsti8 (i32 IntRegs:$src1), IntRegs:$src2,
- s4_3ImmPred:$offset),
- (S2_storerb_pi IntRegs:$src2, s4_0ImmPred:$offset, IntRegs:$src1)>;
+class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset,
+ InstHexagon MI>
+ : Pat<(Store Value:$src1, I32:$src2, Offset:$offset),
+ (MI I32:$src2, imm:$offset, Value:$src1)>;
-def : Pat<(post_truncsti16 (i32 IntRegs:$src1), IntRegs:$src2,
- s4_3ImmPred:$offset),
- (S2_storerh_pi IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
-
-def : Pat<(post_store (i32 IntRegs:$src1), IntRegs:$src2, s4_2ImmPred:$offset),
- (S2_storeri_pi IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
-
-def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2,
- s4_3ImmPred:$offset),
- (S2_storerd_pi IntRegs:$src2, s4_3ImmPred:$offset, DoubleRegs:$src1)>;
+def: Storepi_pat<post_truncsti8, I32, s4_0ImmPred, S2_storerb_pi>;
+def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>;
+def: Storepi_pat<post_store, I32, s4_2ImmPred, S2_storeri_pi>;
+def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>;
//===----------------------------------------------------------------------===//
// Template class for post increment stores with register offset.
@@ -2805,14 +3426,13 @@ class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp,
let Inst{7} = 0b0;
}
-let isCodeGenOnly = 0 in {
def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>;
def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>;
def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>;
def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>;
def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>;
-}
+
let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp,
bits<3>MajOp, bit isH = 0>
@@ -2906,7 +3526,7 @@ multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
}
}
-let addrMode = BaseImmOffset, InputType = "imm", isCodeGenOnly = 0 in {
+let addrMode = BaseImmOffset, InputType = "imm" in {
let accessSize = ByteAccess in
defm storerb: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext, u6_0Ext, 0b000>;
@@ -2925,42 +3545,102 @@ let addrMode = BaseImmOffset, InputType = "imm", isCodeGenOnly = 0 in {
u6_1Ext, 0b011, 1>;
}
-def : Pat<(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr),
- (S2_storerb_io AddrFI:$addr, 0, (i32 IntRegs:$src1))>;
-
-def : Pat<(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr),
- (S2_storerh_io AddrFI:$addr, 0, (i32 IntRegs:$src1))>;
-
-def : Pat<(store (i32 IntRegs:$src1), ADDRriS11_2:$addr),
- (S2_storeri_io AddrFI:$addr, 0, (i32 IntRegs:$src1))>;
-
-def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr),
- (S2_storerd_io AddrFI:$addr, 0, (i64 DoubleRegs:$src1))>;
+// Patterns for generating stores, where the address takes different forms:
+// - frameindex,
+// - frameindex + offset,
+// - base + offset,
+// - simple (base address without offset).
+// These would usually be used together (via Storex_pat defined below), but
+// in some cases one may want to apply different properties (such as
+// AddedComplexity) to the individual patterns.
+class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+ : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;
+class Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ InstHexagon MI>
+ : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+ (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
+class Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ InstHexagon MI>
+ : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
+ (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
+class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+ : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
+ (MI IntRegs:$Rs, 0, Value:$Rt)>;
+
+// Patterns for generating stores, where the address takes different forms,
+// and where the value being stored is transformed through the value modifier
+// ValueMod. The address forms are same as above.
+class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
+ InstHexagon MI>
+ : Pat<(Store Value:$Rs, AddrFI:$fi),
+ (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;
+class Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ PatFrag ValueMod, InstHexagon MI>
+ : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+ (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
+class Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ PatFrag ValueMod, InstHexagon MI>
+ : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
+ (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
+class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
+ InstHexagon MI>
+ : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
+ (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>;
+
+multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
+ InstHexagon MI> {
+ def: Storex_fi_pat <Store, Value, MI>;
+ def: Storex_fi_add_pat <Store, Value, ImmPred, MI>;
+ def: Storex_add_pat <Store, Value, ImmPred, MI>;
+}
+
+multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
+ PatFrag ValueMod, InstHexagon MI> {
+ def: Storexm_fi_pat <Store, Value, ValueMod, MI>;
+ def: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
+ def: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>;
+}
+
+// Regular stores in the DAG have two operands: value and address.
+// Atomic stores also have two, but they are reversed: address, value.
+// To use atomic stores with the patterns, they need to have their operands
+// swapped. This relies on the knowledge that the F.Fragment uses names
+// "ptr" and "val".
+class SwapSt<PatFrag F>
+ : PatFrag<(ops node:$val, node:$ptr), F.Fragment>;
+let AddedComplexity = 20 in {
+ defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>;
+ defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>;
+ defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>;
+ defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>;
-let AddedComplexity = 10 in {
-def : Pat<(truncstorei8 (i32 IntRegs:$src1), (add IntRegs:$src2,
- s11_0ExtPred:$offset)),
- (S2_storerb_io IntRegs:$src2, s11_0ImmPred:$offset,
- (i32 IntRegs:$src1))>;
+ defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>;
+ defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>;
+ defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>;
+ defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>;
+}
-def : Pat<(truncstorei16 (i32 IntRegs:$src1), (add IntRegs:$src2,
- s11_1ExtPred:$offset)),
- (S2_storerh_io IntRegs:$src2, s11_1ImmPred:$offset,
- (i32 IntRegs:$src1))>;
+// Simple patterns should be tried with the least priority.
+def: Storex_simple_pat<truncstorei8, I32, S2_storerb_io>;
+def: Storex_simple_pat<truncstorei16, I32, S2_storerh_io>;
+def: Storex_simple_pat<store, I32, S2_storeri_io>;
+def: Storex_simple_pat<store, I64, S2_storerd_io>;
-def : Pat<(store (i32 IntRegs:$src1), (add IntRegs:$src2,
- s11_2ExtPred:$offset)),
- (S2_storeri_io IntRegs:$src2, s11_2ImmPred:$offset,
- (i32 IntRegs:$src1))>;
+def: Storex_simple_pat<SwapSt<atomic_store_8>, I32, S2_storerb_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>;
-def : Pat<(store (i64 DoubleRegs:$src1), (add IntRegs:$src2,
- s11_3ExtPred:$offset)),
- (S2_storerd_io IntRegs:$src2, s11_3ImmPred:$offset,
- (i64 DoubleRegs:$src1))>;
+let AddedComplexity = 20 in {
+ defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>;
+ defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>;
+ defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>;
}
-// memh(Rx++#s4:1)=Rt.H
+def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>;
+def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
+def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
// Store predicate.
let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
@@ -2971,7 +3651,7 @@ def STriw_pred : STInst<(outs),
// S2_allocframe: Allocate stack frame.
let Defs = [R29, R30], Uses = [R29, R31, R30],
- hasSideEffects = 0, accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+ hasSideEffects = 0, accessSize = DoubleWordAccess in
def S2_allocframe: ST0Inst <
(outs), (ins u11_3Imm:$u11_3),
"allocframe(#$u11_3)" > {
@@ -3015,7 +3695,6 @@ class T_store_pci <string mnemonic, RegisterClass RC,
let Inst{1} = 0b0;
}
-let isCodeGenOnly = 0 in {
def S2_storerb_pci : T_store_pci<"memb", IntRegs, s4_0Imm, 0b1000,
ByteAccess>;
def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010,
@@ -3026,12 +3705,64 @@ def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100,
WordAccess>;
def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110,
DoubleWordAccess>;
-}
+
+let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4 in
+class T_storenew_pci <string mnemonic, Operand Imm,
+ bits<2>MajOp, MemAccessSize AlignSize>
+ : NVInst < (outs IntRegs:$_dst_),
+ (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, IntRegs:$Nt),
+ #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $Nt.new",
+ [],
+ "$Rz = $_dst_"> {
+ bits<5> Rz;
+ bits<6> offset;
+ bits<1> Mu;
+ bits<3> Nt;
+
+ let accessSize = AlignSize;
+
+ let IClass = 0b1010;
+ let Inst{27-21} = 0b1001101;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = Nt;
+ let Inst{7} = 0b0;
+ let Inst{6-3} =
+ !if (!eq(!cast<string>(AlignSize), "WordAccess"), offset{5-2},
+ !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1},
+ /* ByteAccess */ offset{3-0}));
+ let Inst{1} = 0b0;
+ }
+
+def S2_storerbnew_pci : T_storenew_pci <"memb", s4_0Imm, 0b00, ByteAccess>;
+def S2_storerhnew_pci : T_storenew_pci <"memh", s4_1Imm, 0b01, HalfWordAccess>;
+def S2_storerinew_pci : T_storenew_pci <"memw", s4_2Imm, 0b10, WordAccess>;
+
+//===----------------------------------------------------------------------===//
+// Circular stores - Pseudo
+//
+// Please note that the input operand order in the pseudo instructions
+// doesn't match with the real instructions. Pseudo instructions operand
+// order should mimics the ordering in the intrinsics.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_store_pci_pseudo <string opc, RegisterClass RC>
+ : STInstPI<(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, RC:$src2, IntRegs:$src3, s4Imm:$src4),
+ ".error \""#opc#"($src1++#$src4:circ($src3)) = $src2\"",
+ [], "$_dst_ = $src1">;
+
+def S2_storerb_pci_pseudo : T_store_pci_pseudo <"memb", IntRegs>;
+def S2_storerh_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>;
+def S2_storerf_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>;
+def S2_storeri_pci_pseudo : T_store_pci_pseudo <"memw", IntRegs>;
+def S2_storerd_pci_pseudo : T_store_pci_pseudo <"memd", DoubleRegs>;
//===----------------------------------------------------------------------===//
// Circular stores with auto-increment register
//===----------------------------------------------------------------------===//
-let Uses = [CS], isNVStorable = 1, isCodeGenOnly = 0 in
+let Uses = [CS], isNVStorable = 1 in
class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
MemAccessSize AlignSize, string RegSrc = "Rt">
: STInst <(outs IntRegs:$_dst_),
@@ -3055,14 +3786,43 @@ class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
let Inst{1} = 0b1;
}
-let isCodeGenOnly = 0 in {
def S2_storerb_pcr : T_store_pcr<"memb", IntRegs, 0b1000, ByteAccess>;
def S2_storerh_pcr : T_store_pcr<"memh", IntRegs, 0b1010, HalfWordAccess>;
def S2_storeri_pcr : T_store_pcr<"memw", IntRegs, 0b1100, WordAccess>;
def S2_storerd_pcr : T_store_pcr<"memd", DoubleRegs, 0b1110, DoubleWordAccess>;
def S2_storerf_pcr : T_store_pcr<"memh", IntRegs, 0b1011,
HalfWordAccess, "Rt.h">;
-}
+
+//===----------------------------------------------------------------------===//
+// Circular .new stores with auto-increment register
+//===----------------------------------------------------------------------===//
+let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in
+class T_storenew_pcr <string mnemonic, bits<2>MajOp,
+ MemAccessSize AlignSize>
+ : NVInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt),
+ #mnemonic#"($Rz ++ I:circ($Mu)) = $Nt.new" ,
+ [] ,
+ "$Rz = $_dst_"> {
+ bits<5> Rz;
+ bits<1> Mu;
+ bits<3> Nt;
+
+ let accessSize = AlignSize;
+
+ let IClass = 0b1010;
+ let Inst{27-21} = 0b1001101;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = Nt;
+ let Inst{7} = 0b0;
+ let Inst{1} = 0b1;
+ }
+
+def S2_storerbnew_pcr : T_storenew_pcr <"memb", 0b00, ByteAccess>;
+def S2_storerhnew_pcr : T_storenew_pcr <"memh", 0b01, HalfWordAccess>;
+def S2_storerinew_pcr : T_storenew_pcr <"memw", 0b10, WordAccess>;
//===----------------------------------------------------------------------===//
// Bit-reversed stores with auto-increment register
@@ -3093,7 +3853,7 @@ class T_store_pbr<string mnemonic, RegisterClass RC,
let Inst{12-8} = src;
}
-let isNVStorable = 1, isCodeGenOnly = 0 in {
+let isNVStorable = 1 in {
let BaseOpcode = "S2_storerb_pbr" in
def S2_storerb_pbr : T_store_pbr<"memb", IntRegs, ByteAccess,
0b000>, NewValueRel;
@@ -3104,28 +3864,71 @@ let isNVStorable = 1, isCodeGenOnly = 0 in {
def S2_storeri_pbr : T_store_pbr<"memw", IntRegs, WordAccess,
0b100>, NewValueRel;
}
-let isCodeGenOnly = 0 in {
+
def S2_storerf_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess, 0b011, 1>;
def S2_storerd_pbr : T_store_pbr<"memd", DoubleRegs, DoubleWordAccess, 0b110>;
-}
//===----------------------------------------------------------------------===//
-// ST -
+// Bit-reversed .new stores with auto-increment register
//===----------------------------------------------------------------------===//
+let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3,
+ hasSideEffects = 0 in
+class T_storenew_pbr<string mnemonic, MemAccessSize addrSize, bits<2> majOp>
+ : NVInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt),
+ #mnemonic#"($Rz ++ $Mu:brev) = $Nt.new", [],
+ "$Rz = $_dst_">, NewValueRel {
+ let accessSize = addrSize;
+ bits<5> Rz;
+ bits<1> Mu;
+ bits<3> Nt;
+
+ let IClass = 0b1010;
+
+ let Inst{27-21} = 0b1111101;
+ let Inst{12-11} = majOp;
+ let Inst{7} = 0b0;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{10-8} = Nt;
+ }
+
+let BaseOpcode = "S2_storerb_pbr" in
+def S2_storerbnew_pbr : T_storenew_pbr<"memb", ByteAccess, 0b00>;
+
+let BaseOpcode = "S2_storerh_pbr" in
+def S2_storerhnew_pbr : T_storenew_pbr<"memh", HalfWordAccess, 0b01>;
+
+let BaseOpcode = "S2_storeri_pbr" in
+def S2_storerinew_pbr : T_storenew_pbr<"memw", WordAccess, 0b10>;
//===----------------------------------------------------------------------===//
-// STYPE/ALU +
+// Bit-reversed stores - Pseudo
+//
+// Please note that the input operand order in the pseudo instructions
+// doesn't match with the real instructions. Pseudo instructions operand
+// order should mimics the ordering in the intrinsics.
//===----------------------------------------------------------------------===//
-// Logical NOT.
-def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
- "$dst = not($src1)",
- [(set (i64 DoubleRegs:$dst), (not (i64 DoubleRegs:$src1)))]>;
+let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_store_pbr_pseudo <string opc, RegisterClass RC>
+ : STInstPI<(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, RC:$src2, IntRegs:$src3),
+ ".error \""#opc#"($src1++$src3:brev) = $src2\"",
+ [], "$_dst_ = $src1">;
+def S2_storerb_pbr_pseudo : T_store_pbr_pseudo <"memb", IntRegs>;
+def S2_storerh_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>;
+def S2_storeri_pbr_pseudo : T_store_pbr_pseudo <"memw", IntRegs>;
+def S2_storerf_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>;
+def S2_storerd_pbr_pseudo : T_store_pbr_pseudo <"memd", DoubleRegs>;
//===----------------------------------------------------------------------===//
-// STYPE/ALU -
+// ST -
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Template class for S_2op instructions.
+//===----------------------------------------------------------------------===//
let hasSideEffects = 0 in
class T_S2op_1 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut,
RegisterClass RCIn, bits<2> MajOp, bits<3> MinOp, bit isSat>
@@ -3156,26 +3959,62 @@ let hasNewValue = 1 in
class T_S2op_1_ii <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0>
: T_S2op_1 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp, isSat>;
+// Vector sign/zero extend
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def S2_vsxtbh : T_S2op_1_di <"vsxtbh", 0b00, 0b000>;
+ def S2_vsxthw : T_S2op_1_di <"vsxthw", 0b00, 0b100>;
+ def S2_vzxtbh : T_S2op_1_di <"vzxtbh", 0b00, 0b010>;
+ def S2_vzxthw : T_S2op_1_di <"vzxthw", 0b00, 0b110>;
+}
+
+// Vector splat bytes/halfwords
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def S2_vsplatrb : T_S2op_1_ii <"vsplatb", 0b01, 0b111>;
+ def S2_vsplatrh : T_S2op_1_di <"vsplath", 0b01, 0b010>;
+}
+
// Sign extend word to doubleword
-let isCodeGenOnly = 0 in
def A2_sxtw : T_S2op_1_di <"sxtw", 0b01, 0b000>;
def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>;
+// Vector saturate and pack
+let Defs = [USR_OVF] in {
+ def S2_svsathb : T_S2op_1_ii <"vsathb", 0b10, 0b000>;
+ def S2_svsathub : T_S2op_1_ii <"vsathub", 0b10, 0b010>;
+ def S2_vsathb : T_S2op_1_id <"vsathb", 0b00, 0b110>;
+ def S2_vsathub : T_S2op_1_id <"vsathub", 0b00, 0b000>;
+ def S2_vsatwh : T_S2op_1_id <"vsatwh", 0b00, 0b010>;
+ def S2_vsatwuh : T_S2op_1_id <"vsatwuh", 0b00, 0b100>;
+}
+
+// Vector truncate
+def S2_vtrunohb : T_S2op_1_id <"vtrunohb", 0b10, 0b000>;
+def S2_vtrunehb : T_S2op_1_id <"vtrunehb", 0b10, 0b010>;
+
// Swizzle the bytes of a word
-let isCodeGenOnly = 0 in
def A2_swiz : T_S2op_1_ii <"swiz", 0b10, 0b111>;
// Saturate
-let Defs = [USR_OVF], isCodeGenOnly = 0 in {
+let Defs = [USR_OVF] in {
def A2_sat : T_S2op_1_id <"sat", 0b11, 0b000>;
def A2_satb : T_S2op_1_ii <"satb", 0b11, 0b111>;
def A2_satub : T_S2op_1_ii <"satub", 0b11, 0b110>;
def A2_sath : T_S2op_1_ii <"sath", 0b11, 0b100>;
def A2_satuh : T_S2op_1_ii <"satuh", 0b11, 0b101>;
+ def A2_roundsat : T_S2op_1_id <"round", 0b11, 0b001, 0b1>;
}
-let Itinerary = S_2op_tc_2_SLOT23, isCodeGenOnly = 0 in {
+let Itinerary = S_2op_tc_2_SLOT23 in {
+ // Vector round and pack
+ def S2_vrndpackwh : T_S2op_1_id <"vrndwh", 0b10, 0b100>;
+
+ let Defs = [USR_OVF] in
+ def S2_vrndpackwhs : T_S2op_1_id <"vrndwh", 0b10, 0b110, 1>;
+
+ // Bit reverse
+ def S2_brev : T_S2op_1_ii <"brev", 0b01, 0b110>;
+
// Absolute value word
def A2_abs : T_S2op_1_ii <"abs", 0b10, 0b100>;
@@ -3227,7 +4066,7 @@ class T_S2op_2_di <string mnemonic, bits<3> MajOp, bits<3> MinOp>
let hasNewValue = 1 in
class T_S2op_2_id <string mnemonic, bits<3> MajOp, bits<3> MinOp>
: T_S2op_2 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, 0, 0>;
-
+
let hasNewValue = 1 in
class T_S2op_2_ii <string mnemonic, bits<3> MajOp, bits<3> MinOp,
bit isSat = 0, bit isRnd = 0, list<dag> pattern = []>
@@ -3239,21 +4078,33 @@ class T_S2op_shift <string mnemonic, bits<3> MajOp, bits<3> MinOp, SDNode OpNd>
[(set (i32 IntRegs:$dst), (OpNd (i32 IntRegs:$src),
(u5ImmPred:$u5)))]>;
+// Vector arithmetic shift right by immediate with truncate and pack
+def S2_asr_i_svw_trun : T_S2op_2_id <"vasrw", 0b110, 0b010>;
+
// Arithmetic/logical shift right/left by immediate
-let Itinerary = S_2op_tc_1_SLOT23, isCodeGenOnly = 0 in {
+let Itinerary = S_2op_tc_1_SLOT23 in {
def S2_asr_i_r : T_S2op_shift <"asr", 0b000, 0b000, sra>;
def S2_lsr_i_r : T_S2op_shift <"lsr", 0b000, 0b001, srl>;
def S2_asl_i_r : T_S2op_shift <"asl", 0b000, 0b010, shl>;
}
// Shift left by immediate with saturation
-let Defs = [USR_OVF], isCodeGenOnly = 0 in
+let Defs = [USR_OVF] in
def S2_asl_i_r_sat : T_S2op_2_ii <"asl", 0b010, 0b010, 1>;
// Shift right with round
-let isCodeGenOnly = 0 in
def S2_asr_i_r_rnd : T_S2op_2_ii <"asr", 0b010, 0b000, 0, 1>;
+let isAsmParserOnly = 1 in
+def S2_asr_i_r_rnd_goodsyntax
+ : SInst <(outs IntRegs:$dst), (ins IntRegs:$src, u5Imm:$u5),
+ "$dst = asrrnd($src, #$u5)",
+ [], "", S_2op_tc_1_SLOT23>;
+
+let isAsmParserOnly = 1 in
+def A2_not: ALU32_rr<(outs IntRegs:$dst),(ins IntRegs:$src),
+ "$dst = not($src)">;
+
def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5ImmPred:$src2)),
(i32 1))),
(i32 1))),
@@ -3272,17 +4123,34 @@ class T_S2op_3<string opc, bits<2>MajOp, bits<3>minOp, bits<1> sat = 0>
let Inst{4-0} = Rdd;
}
-let isCodeGenOnly = 0 in {
def A2_absp : T_S2op_3 <"abs", 0b10, 0b110>;
def A2_negp : T_S2op_3 <"neg", 0b10, 0b101>;
def A2_notp : T_S2op_3 <"not", 0b10, 0b100>;
-}
// Innterleave/deinterleave
-let isCodeGenOnly = 0 in {
def S2_interleave : T_S2op_3 <"interleave", 0b11, 0b101>;
def S2_deinterleave : T_S2op_3 <"deinterleave", 0b11, 0b100>;
-}
+
+// Vector Complex conjugate
+def A2_vconj : T_S2op_3 <"vconj", 0b10, 0b111, 1>;
+
+// Vector saturate without pack
+def S2_vsathb_nopack : T_S2op_3 <"vsathb", 0b00, 0b111>;
+def S2_vsathub_nopack : T_S2op_3 <"vsathub", 0b00, 0b100>;
+def S2_vsatwh_nopack : T_S2op_3 <"vsatwh", 0b00, 0b110>;
+def S2_vsatwuh_nopack : T_S2op_3 <"vsatwuh", 0b00, 0b101>;
+
+// Vector absolute value halfwords with and without saturation
+// Rdd64=vabsh(Rss64)[:sat]
+def A2_vabsh : T_S2op_3 <"vabsh", 0b01, 0b100>;
+def A2_vabshsat : T_S2op_3 <"vabsh", 0b01, 0b101, 1>;
+
+// Vector absolute value words with and without saturation
+def A2_vabsw : T_S2op_3 <"vabsw", 0b01, 0b110>;
+def A2_vabswsat : T_S2op_3 <"vabsw", 0b01, 0b111, 1>;
+
+def : Pat<(not (i64 DoubleRegs:$src1)),
+ (A2_notp DoubleRegs:$src1)>;
//===----------------------------------------------------------------------===//
// STYPE/BIT +
@@ -3313,7 +4181,6 @@ class T_COUNT_LEADING_64<string MnOp, bits<3> MajOp, bits<3> MinOp>
: T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b0,
(outs IntRegs:$Rd), (ins DoubleRegs:$Rs)>;
-let isCodeGenOnly = 0 in {
def S2_cl0 : T_COUNT_LEADING_32<"cl0", 0b000, 0b101>;
def S2_cl1 : T_COUNT_LEADING_32<"cl1", 0b000, 0b110>;
def S2_ct0 : T_COUNT_LEADING_32<"ct0", 0b010, 0b100>;
@@ -3323,14 +4190,28 @@ def S2_cl1p : T_COUNT_LEADING_64<"cl1", 0b010, 0b100>;
def S2_clb : T_COUNT_LEADING_32<"clb", 0b000, 0b100>;
def S2_clbp : T_COUNT_LEADING_64<"clb", 0b010, 0b000>;
def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>;
-}
-def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
-def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
-def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
-def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
-def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
+// Count leading zeros.
+def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
+def: Pat<(i32 (ctlz_zero_undef I32:$Rs)), (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz_zero_undef I64:$Rss))), (S2_cl0p I64:$Rss)>;
+
+// Count trailing zeros: 32-bit.
+def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
+def: Pat<(i32 (cttz_zero_undef I32:$Rs)), (S2_ct0 I32:$Rs)>;
+
+// Count leading ones.
+def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
+def: Pat<(i32 (ctlz_zero_undef (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz_zero_undef (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
+
+// Count trailing ones: 32-bit.
+def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
+def: Pat<(i32 (cttz_zero_undef (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
+
+// The 64-bit counts leading/trailing are defined in HexagonInstrInfoV4.td.
// Bit set/clear/toggle
@@ -3365,14 +4246,12 @@ class T_SCT_BIT_REG<string MnOp, bits<2> MinOp>
let Inst{4-0} = Rd;
}
-let isCodeGenOnly = 0 in {
def S2_clrbit_i : T_SCT_BIT_IMM<"clrbit", 0b001>;
def S2_setbit_i : T_SCT_BIT_IMM<"setbit", 0b000>;
def S2_togglebit_i : T_SCT_BIT_IMM<"togglebit", 0b010>;
def S2_clrbit_r : T_SCT_BIT_REG<"clrbit", 0b01>;
def S2_setbit_r : T_SCT_BIT_REG<"setbit", 0b00>;
def S2_togglebit_r : T_SCT_BIT_REG<"togglebit", 0b10>;
-}
def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5ImmPred:$u5)))),
(S2_clrbit_i IntRegs:$Rs, u5ImmPred:$u5)>;
@@ -3422,10 +4301,8 @@ class T_TEST_BIT_REG<string MnOp, bit IsNeg>
let Inst{1-0} = Pd;
}
-let isCodeGenOnly = 0 in {
def S2_tstbit_i : T_TEST_BIT_IMM<"tstbit", 0b000>;
def S2_tstbit_r : T_TEST_BIT_REG<"tstbit", 0>;
-}
let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
def: Pat<(i1 (setne (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
@@ -3437,6 +4314,7 @@ let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))),
(S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
}
+
let hasSideEffects = 0 in
class T_TEST_BITS_IMM<string MnOp, bits<2> MajOp, bit IsNeg>
: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6Imm:$u6),
@@ -3471,11 +4349,9 @@ class T_TEST_BITS_REG<string MnOp, bits<2> MajOp, bit IsNeg>
let Inst{1-0} = Pd;
}
-let isCodeGenOnly = 0 in {
def C2_bitsclri : T_TEST_BITS_IMM<"bitsclr", 0b10, 0>;
def C2_bitsclr : T_TEST_BITS_REG<"bitsclr", 0b10, 0>;
def C2_bitsset : T_TEST_BITS_REG<"bitsset", 0b01, 0>;
-}
let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6ImmPred:$u6), 0)),
@@ -3503,6 +4379,14 @@ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)),
// XTYPE/PERM +
//===----------------------------------------------------------------------===//
+def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))),
+ (i32 8)),
+ (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))),
+ (i32 16)),
+ (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))),
+ (zextloadi8 (i32 IntRegs:$b))),
+ (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
+
//===----------------------------------------------------------------------===//
// XTYPE/PERM -
//===----------------------------------------------------------------------===//
@@ -3512,7 +4396,7 @@ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)),
//===----------------------------------------------------------------------===//
// Predicate transfer.
-let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+let hasSideEffects = 0, hasNewValue = 1 in
def C2_tfrpr : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps),
"$Rd = $Ps", [], "", S_2op_tc_1_SLOT23> {
bits<5> Rd;
@@ -3526,7 +4410,7 @@ def C2_tfrpr : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps),
}
// Transfer general register to predicate.
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0 in
def C2_tfrrp: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs),
"$Pd = $Rs", [], "", S_2op_tc_2early_SLOT23> {
bits<2> Pd;
@@ -3538,6 +4422,27 @@ def C2_tfrrp: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs),
let Inst{1-0} = Pd;
}
+let hasSideEffects = 0, isCodeGenOnly = 1 in
+def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src),
+ "$dst = $src">;
+
+
+// Patterns for loads of i1:
+def: Pat<(i1 (load AddrFI:$fi)),
+ (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
+def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32ImmPred:$Off))),
+ (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
+def: Pat<(i1 (load (i32 IntRegs:$Rs))),
+ (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
+
+def I1toI32: OutPatFrag<(ops node:$Rs),
+ (C2_muxii (i1 $Rs), 1, 0)>;
+
+def I32toI1: OutPatFrag<(ops node:$Rs),
+ (i1 (C2_tfrrp (i32 $Rs)))>;
+
+defm: Storexm_pat<store, I1, s32ImmPred, I1toI32, S2_storerb_io>;
+def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
//===----------------------------------------------------------------------===//
// STYPE/PRED -
@@ -3570,15 +4475,12 @@ class S_2OpInstImmI6<string Mnemonic, SDNode OpNode, bits<3>MinOp>
}
// Shift by immediate.
-let isCodeGenOnly = 0 in {
def S2_asr_i_p : S_2OpInstImmI6<"asr", sra, 0b000>;
def S2_asl_i_p : S_2OpInstImmI6<"asl", shl, 0b010>;
def S2_lsr_i_p : S_2OpInstImmI6<"lsr", srl, 0b001>;
-}
// Shift left by small amount and add.
-let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0,
- isCodeGenOnly = 0 in
+let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0 in
def S2_addasl_rrri: SInst <(outs IntRegs:$Rd),
(ins IntRegs:$Rt, IntRegs:$Rs, u3Imm:$u3),
"$Rd = addasl($Rt, $Rs, #$u3)" ,
@@ -3627,8 +4529,8 @@ def S2_addasl_rrri: SInst <(outs IntRegs:$Rd),
//===----------------------------------------------------------------------===//
def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
-let hasSideEffects = 1, isSoloAX = 1, isCodeGenOnly = 0 in
-def BARRIER : SYSInst<(outs), (ins),
+let hasSideEffects = 1, isSoloAX = 1 in
+def Y2_barrier : SYSInst<(outs), (ins),
"barrier",
[(HexagonBARRIER)],"",ST_tc_st_SLOT0> {
let Inst{31-28} = 0b1010;
@@ -3638,6 +4540,20 @@ def BARRIER : SYSInst<(outs), (ins),
//===----------------------------------------------------------------------===//
// SYSTEM/SUPER -
//===----------------------------------------------------------------------===//
+
+// Generate frameindex addresses. The main reason for the offset operand is
+// that every instruction that is allowed to have frame index as an operand
+// will then have that operand followed by an immediate operand (the offset).
+// This simplifies the frame-index elimination code.
+//
+let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
+ isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in {
+ def TFR_FI : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$fi, s32Imm:$off), "">;
+ def TFR_FIA : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$fi, s32Imm:$off), "">;
+}
+
//===----------------------------------------------------------------------===//
// CRUSER - Type.
//===----------------------------------------------------------------------===//
@@ -3683,14 +4599,19 @@ class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0>
multiclass LOOP_ri<string mnemonic> {
def i : LOOP_iBase<mnemonic, brtarget>;
def r : LOOP_rBase<mnemonic, brtarget>;
+
+ let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in {
+ def iext: LOOP_iBase<mnemonic, brtargetExt, 1>;
+ def rext: LOOP_rBase<mnemonic, brtargetExt, 1>;
+ }
}
-let Defs = [SA0, LC0, USR], isCodeGenOnly = 0 in
+let Defs = [SA0, LC0, USR] in
defm J2_loop0 : LOOP_ri<"loop0">;
// Interestingly only loop0's appear to set usr.lpcfg
-let Defs = [SA1, LC1], isCodeGenOnly = 0 in
+let Defs = [SA1, LC1] in
defm J2_loop1 : LOOP_ri<"loop1">;
let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
@@ -3751,12 +4672,40 @@ multiclass SPLOOP_ri<string mnemonic, bits<2> op> {
def r : SPLOOP_rBase<mnemonic, op>;
}
-let isCodeGenOnly = 0 in {
defm J2_ploop1s : SPLOOP_ri<"1", 0b01>;
defm J2_ploop2s : SPLOOP_ri<"2", 0b10>;
defm J2_ploop3s : SPLOOP_ri<"3", 0b11>;
+
+// if (Rs[!>=<]=#0) jump:[t/nt]
+let Defs = [PC], isPredicated = 1, isBranch = 1, hasSideEffects = 0,
+ hasSideEffects = 0 in
+class J2_jump_0_Base<string compare, bit isTak, bits<2> op>
+ : CRInst <(outs), (ins IntRegs:$Rs, brtarget:$r13_2),
+ "if ($Rs"#compare#"#0) jump"#!if(isTak, ":t", ":nt")#" $r13_2" > {
+ bits<5> Rs;
+ bits<15> r13_2;
+
+ let IClass = 0b0110;
+
+ let Inst{27-24} = 0b0001;
+ let Inst{23-22} = op;
+ let Inst{12} = isTak;
+ let Inst{21} = r13_2{14};
+ let Inst{20-16} = Rs;
+ let Inst{11-1} = r13_2{12-2};
+ let Inst{13} = r13_2{13};
+ }
+
+multiclass J2_jump_compare_0<string compare, bits<2> op> {
+ def NAME : J2_jump_0_Base<compare, 0, op>;
+ def NAME#pt : J2_jump_0_Base<compare, 1, op>;
}
+defm J2_jumprz : J2_jump_compare_0<"!=", 0b00>;
+defm J2_jumprgtez : J2_jump_compare_0<">=", 0b01>;
+defm J2_jumprnz : J2_jump_compare_0<"==", 0b10>;
+defm J2_jumprltez : J2_jump_compare_0<"<=", 0b11>;
+
// Transfer to/from Control/GPR Guest/GPR
let hasSideEffects = 0 in
class TFR_CR_RS_base<RegisterClass CTRC, RegisterClass RC, bit isDouble>
@@ -3773,8 +4722,9 @@ class TFR_CR_RS_base<RegisterClass CTRC, RegisterClass RC, bit isDouble>
let Inst{20-16} = src;
let Inst{4-0} = dst;
}
-let isCodeGenOnly = 0 in
+
def A2_tfrrcr : TFR_CR_RS_base<CtrRegs, IntRegs, 0b0>;
+def A4_tfrpcp : TFR_CR_RS_base<CtrRegs64, DoubleRegs, 0b1>;
def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>;
def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>;
@@ -3794,13 +4744,14 @@ class TFR_RD_CR_base<RegisterClass RC, RegisterClass CTRC, bit isSingle>
let Inst{4-0} = dst;
}
-let hasNewValue = 1, opNewValue = 0, isCodeGenOnly = 0 in
+let hasNewValue = 1, opNewValue = 0 in
def A2_tfrcrr : TFR_RD_CR_base<IntRegs, CtrRegs, 1>;
+def A4_tfrcpp : TFR_RD_CR_base<DoubleRegs, CtrRegs64, 0>;
def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>;
def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>;
// Y4_trace: Send value to etm trace.
-let isSoloAX = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+let isSoloAX = 1, hasSideEffects = 0 in
def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs),
"trace($Rs)"> {
bits<5> Rs;
@@ -3810,350 +4761,201 @@ def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs),
let Inst{20-16} = Rs;
}
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3),
- "Error; should not emit",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
- s12ImmPred:$src3)))]>;
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3),
- "Error; should not emit",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2,
- (i32 IntRegs:$src3))))]>;
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3),
- "Error; should not emit",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2,
- s12ImmPred:$src3)))]>;
-
-// Generate frameindex addresses.
-let isReMaterializable = 1 in
-def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1),
- "$dst = add($src1)",
- [(set (i32 IntRegs:$dst), ADDRri:$src1)]>;
-
// Support for generating global address.
// Taken from X86InstrInfo.td.
-def SDTHexagonCONST32 : SDTypeProfile<1, 1, [
- SDTCisVT<0, i32>,
- SDTCisVT<1, i32>,
- SDTCisPtrTy<0>]>;
-def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
-def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
+def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
+ SDTCisPtrTy<0>]>;
+def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
+def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
// HI/LO Instructions
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
-def LO : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
- "$dst.l = #LO($global)",
- []>;
-
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
-def HI : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
- "$dst.h = #HI($global)",
- []>;
+let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
+ hasNewValue = 1, opNewValue = 0 in
+class REG_IMMED<string RegHalf, string Op, bit Rs, bits<3> MajOp, bit MinOp>
+ : ALU32_ri<(outs IntRegs:$dst),
+ (ins i32imm:$imm_value),
+ "$dst"#RegHalf#" = #"#Op#"($imm_value)", []> {
+ bits<5> dst;
+ bits<32> imm_value;
+ let IClass = 0b0111;
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
-def LOi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value),
- "$dst.l = #LO($imm_value)",
- []>;
+ let Inst{27} = Rs;
+ let Inst{26-24} = MajOp;
+ let Inst{21} = MinOp;
+ let Inst{20-16} = dst;
+ let Inst{23-22} = !if (!eq(Op, "LO"), imm_value{15-14}, imm_value{31-30});
+ let Inst{13-0} = !if (!eq(Op, "LO"), imm_value{13-0}, imm_value{29-16});
+}
+let isAsmParserOnly = 1 in {
+ def LO : REG_IMMED<".l", "LO", 0b0, 0b001, 0b1>;
+ def LO_H : REG_IMMED<".l", "HI", 0b0, 0b001, 0b1>;
+ def HI : REG_IMMED<".h", "HI", 0b0, 0b010, 0b1>;
+ def HI_L : REG_IMMED<".h", "LO", 0b0, 0b010, 0b1>;
+}
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
-def HIi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value),
- "$dst.h = #HI($imm_value)",
- []>;
+let isMoveImm = 1, isCodeGenOnly = 1 in
+def LO_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
+ "$dst.l = #LO($label@GOTREL)",
+ []>;
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
-def LO_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt),
- "$dst.l = #LO($jt)",
- []>;
+let isMoveImm = 1, isCodeGenOnly = 1 in
+def HI_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
+ "$dst.h = #HI($label@GOTREL)",
+ []>;
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
-def HI_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt),
- "$dst.h = #HI($jt)",
- []>;
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def HI_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.h = #HI($global@GOT)",
+ []>;
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def LO_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.l = #LO($global@GOT)",
+ []>;
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
-def LO_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
- "$dst.l = #LO($label)",
- []>;
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def HI_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.h = #HI($global@GOTREL)",
+ []>;
-let isReMaterializable = 1, isMoveImm = 1 , hasSideEffects = 0 in
-def HI_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
- "$dst.h = #HI($label)",
- []>;
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def LO_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.l = #LO($global@GOTREL)",
+ []>;
// This pattern is incorrect. When we add small data, we should change
// this pattern to use memw(#foo).
// This is for sdata.
-let isMoveImm = 1 in
-def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+let isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST32 : CONSTLDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
"$dst = CONST32(#$global)",
[(set (i32 IntRegs:$dst),
(load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
-// This is for non-sdata.
-let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global),
- "$dst = CONST32(#$global)",
- [(set (i32 IntRegs:$dst),
- (HexagonCONST32 tglobaladdr:$global))]>;
-
-let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32_set_jt : LDInst2<(outs IntRegs:$dst), (ins jumptablebase:$jt),
- "$dst = CONST32(#$jt)",
- [(set (i32 IntRegs:$dst),
- (HexagonCONST32 tjumptable:$jt))]>;
-
-let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32GP_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global),
- "$dst = CONST32(#$global)",
- [(set (i32 IntRegs:$dst),
- (HexagonCONST32_GP tglobaladdr:$global))]>;
-
-let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32_Int_Real : LDInst2<(outs IntRegs:$dst), (ins i32imm:$global),
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST32_Int_Real : CONSTLDInst<(outs IntRegs:$dst), (ins i32imm:$global),
"$dst = CONST32(#$global)",
[(set (i32 IntRegs:$dst), imm:$global) ]>;
-// Map BlockAddress lowering to CONST32_Int_Real
-def : Pat<(HexagonCONST32_GP tblockaddress:$addr),
- (CONST32_Int_Real tblockaddress:$addr)>;
+// Map TLS addressses to a CONST32 instruction
+def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16Ext:$addr)>;
+def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16Ext:$label)>;
-let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label),
- "$dst = CONST32($label)",
- [(set (i32 IntRegs:$dst), (HexagonCONST32 bbl:$label))]>;
-
-let isReMaterializable = 1, isMoveImm = 1 in
-def CONST64_Int_Real : LDInst2<(outs DoubleRegs:$dst), (ins i64imm:$global),
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST64_Int_Real : CONSTLDInst<(outs DoubleRegs:$dst), (ins i64imm:$global),
"$dst = CONST64(#$global)",
- [(set (i64 DoubleRegs:$dst), imm:$global) ]>;
+ [(set (i64 DoubleRegs:$dst), imm:$global)]>;
-def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins),
- "$dst = xor($dst, $dst)",
- [(set (i1 PredRegs:$dst), 0)]>;
+let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
+ isCodeGenOnly = 1 in
+def TFR_PdTrue : SInst<(outs PredRegs:$dst), (ins), "",
+ [(set (i1 PredRegs:$dst), 1)]>;
-def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = mpy($src1, $src2)",
- [(set (i32 IntRegs:$dst),
- (trunc (i64 (srl (i64 (mul (i64 (sext (i32 IntRegs:$src1))),
- (i64 (sext (i32 IntRegs:$src2))))),
- (i32 32)))))]>;
+let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
+ isCodeGenOnly = 1 in
+def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), "$dst = xor($dst, $dst)",
+ [(set (i1 PredRegs:$dst), 0)]>;
// Pseudo instructions.
def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
-
-def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
-def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-
-def call : SDNode<"HexagonISD::CALL", SDT_SPCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
// Optional Flag and Variable Arguments.
// Its 1 Operand has pointer type.
-def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-let Defs = [R29, R30], Uses = [R31, R30, R29] in {
- def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
- "Should never be emitted",
- [(callseq_start timm:$amt)]>;
-}
+let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ ".error \"should not emit\" ",
+ [(callseq_start timm:$amt)]>;
-let Defs = [R29, R30, R31], Uses = [R29] in {
- def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
- "Should never be emitted",
- [(callseq_end timm:$amt1, timm:$amt2)]>;
-}
-// Call subroutine.
-let isCall = 1, hasSideEffects = 0,
- Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
- R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
- def CALL : JInst<(outs), (ins calltarget:$dst),
- "call $dst", []>;
-}
+let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ ".error \"should not emit\" ",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
// Call subroutine indirectly.
-let Defs = VolatileV3.Regs, isCodeGenOnly = 0 in
+let Defs = VolatileV3.Regs in
def J2_callr : JUMPR_MISC_CALLR<0, 1>;
// Indirect tail-call.
-let isCodeGenOnly = 1, isCall = 1, isReturn = 1 in
-def TCRETURNR : T_JMPr;
+let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+ isTerminator = 1, isCodeGenOnly = 1 in
+def TCRETURNr : T_JMPr;
// Direct tail-calls.
-let isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
-isTerminator = 1, isCodeGenOnly = 1 in {
- def TCRETURNtg : JInst<(outs), (ins calltarget:$dst), "jump $dst",
- [], "", J_tc_2early_SLOT23>;
- def TCRETURNtext : JInst<(outs), (ins calltarget:$dst), "jump $dst",
- [], "", J_tc_2early_SLOT23>;
-}
-
-// Map call instruction.
-def : Pat<(call (i32 IntRegs:$dst)),
- (J2_callr (i32 IntRegs:$dst))>, Requires<[HasV2TOnly]>;
-def : Pat<(call tglobaladdr:$dst),
- (CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>;
-def : Pat<(call texternalsym:$dst),
- (CALL texternalsym:$dst)>, Requires<[HasV2TOnly]>;
-//Tail calls.
-def : Pat<(HexagonTCRet tglobaladdr:$dst),
- (TCRETURNtg tglobaladdr:$dst)>;
-def : Pat<(HexagonTCRet texternalsym:$dst),
- (TCRETURNtext texternalsym:$dst)>;
-def : Pat<(HexagonTCRet (i32 IntRegs:$dst)),
- (TCRETURNR (i32 IntRegs:$dst))>;
-
-// Atomic load and store support
-// 8 bit atomic load
-def : Pat<(atomic_load_8 ADDRriS11_0:$src1),
- (i32 (L2_loadrub_io AddrFI:$src1, 0))>;
-
-def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)),
- (i32 (L2_loadrub_io (i32 IntRegs:$src1), s11_0ImmPred:$offset))>;
-
-// 16 bit atomic load
-def : Pat<(atomic_load_16 ADDRriS11_1:$src1),
- (i32 (L2_loadruh_io AddrFI:$src1, 0))>;
-
-def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)),
- (i32 (L2_loadruh_io (i32 IntRegs:$src1), s11_1ImmPred:$offset))>;
-
-def : Pat<(atomic_load_32 ADDRriS11_2:$src1),
- (i32 (L2_loadri_io AddrFI:$src1, 0))>;
-
-def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)),
- (i32 (L2_loadri_io (i32 IntRegs:$src1), s11_2ImmPred:$offset))>;
-
-// 64 bit atomic load
-def : Pat<(atomic_load_64 ADDRriS11_3:$src1),
- (i64 (L2_loadrd_io AddrFI:$src1, 0))>;
-
-def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)),
- (i64 (L2_loadrd_io (i32 IntRegs:$src1), s11_3ImmPred:$offset))>;
-
-
-def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)),
- (S2_storerb_io AddrFI:$src2, 0, (i32 IntRegs:$src1))>;
-
-def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset),
- (i32 IntRegs:$src1)),
- (S2_storerb_io (i32 IntRegs:$src2), s11_0ImmPred:$offset,
- (i32 IntRegs:$src1))>;
+let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+ isTerminator = 1, isCodeGenOnly = 1 in
+def TCRETURNi : JInst<(outs), (ins calltarget:$dst), "", []>;
-
-def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)),
- (S2_storerh_io AddrFI:$src2, 0, (i32 IntRegs:$src1))>;
-
-def : Pat<(atomic_store_16 (i32 IntRegs:$src1),
- (add (i32 IntRegs:$src2), s11_1ImmPred:$offset)),
- (S2_storerh_io (i32 IntRegs:$src2), s11_1ImmPred:$offset,
- (i32 IntRegs:$src1))>;
-
-def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)),
- (S2_storeri_io AddrFI:$src2, 0, (i32 IntRegs:$src1))>;
-
-def : Pat<(atomic_store_32 (add (i32 IntRegs:$src2), s11_2ImmPred:$offset),
- (i32 IntRegs:$src1)),
- (S2_storeri_io (i32 IntRegs:$src2), s11_2ImmPred:$offset,
- (i32 IntRegs:$src1))>;
-
-
-
-
-def : Pat<(atomic_store_64 ADDRriS11_3:$src2, (i64 DoubleRegs:$src1)),
- (S2_storerd_io AddrFI:$src2, 0, (i64 DoubleRegs:$src1))>;
-
-def : Pat<(atomic_store_64 (add (i32 IntRegs:$src2), s11_3ImmPred:$offset),
- (i64 DoubleRegs:$src1)),
- (S2_storerd_io (i32 IntRegs:$src2), s11_3ImmPred:$offset,
- (i64 DoubleRegs:$src1))>;
+//Tail calls.
+def: Pat<(HexagonTCRet tglobaladdr:$dst),
+ (TCRETURNi tglobaladdr:$dst)>;
+def: Pat<(HexagonTCRet texternalsym:$dst),
+ (TCRETURNi texternalsym:$dst)>;
+def: Pat<(HexagonTCRet (i32 IntRegs:$dst)),
+ (TCRETURNr IntRegs:$dst)>;
// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
-def : Pat <(and (i32 IntRegs:$src1), 65535),
- (A2_zxth (i32 IntRegs:$src1))>;
+def: Pat<(and (i32 IntRegs:$src1), 65535),
+ (A2_zxth IntRegs:$src1)>;
// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
-def : Pat <(and (i32 IntRegs:$src1), 255),
- (A2_zxtb (i32 IntRegs:$src1))>;
+def: Pat<(and (i32 IntRegs:$src1), 255),
+ (A2_zxtb IntRegs:$src1)>;
// Map Add(p1, true) to p1 = not(p1).
// Add(p1, false) should never be produced,
// if it does, it got to be mapped to NOOP.
-def : Pat <(add (i1 PredRegs:$src1), -1),
- (C2_not (i1 PredRegs:$src1))>;
+def: Pat<(add (i1 PredRegs:$src1), -1),
+ (C2_not PredRegs:$src1)>;
// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
-def : Pat <(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s8ImmPred:$src3),
- (i32 (TFR_condset_ii (i1 PredRegs:$src1), s8ImmPred:$src3,
- s8ImmPred:$src2))>;
+def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s32ImmPred:$src3),
+ (C2_muxii PredRegs:$src1, s32ImmPred:$src3, s8ImmPred:$src2)>;
// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
-// => r0 = TFR_condset_ri(p0, r1, #i)
-def : Pat <(select (not (i1 PredRegs:$src1)), s12ImmPred:$src2,
- (i32 IntRegs:$src3)),
- (i32 (TFR_condset_ri (i1 PredRegs:$src1), (i32 IntRegs:$src3),
- s12ImmPred:$src2))>;
+// => r0 = C2_muxir(p0, r1, #i)
+def: Pat<(select (not (i1 PredRegs:$src1)), s32ImmPred:$src2,
+ (i32 IntRegs:$src3)),
+ (C2_muxir PredRegs:$src1, IntRegs:$src3, s32ImmPred:$src2)>;
// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
-// => r0 = TFR_condset_ir(p0, #i, r1)
-def : Pat <(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s12ImmPred:$src3),
- (i32 (TFR_condset_ir (i1 PredRegs:$src1), s12ImmPred:$src3,
- (i32 IntRegs:$src2)))>;
+// => r0 = C2_muxri (p0, #i, r1)
+def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32ImmPred:$src3),
+ (C2_muxri PredRegs:$src1, s32ImmPred:$src3, IntRegs:$src2)>;
// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
-def : Pat <(brcond (not (i1 PredRegs:$src1)), bb:$offset),
- (J2_jumpf (i1 PredRegs:$src1), bb:$offset)>;
-
-// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2).
-def : Pat <(and (i1 PredRegs:$src1), (not (i1 PredRegs:$src2))),
- (i1 (C2_andn (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
-
-
-let AddedComplexity = 100 in
-def : Pat <(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$global))),
- (i64 (A2_combinew (A2_tfrsi 0),
- (L2_loadrub_io (CONST32_set tglobaladdr:$global), 0)))>,
- Requires<[NoV4T]>;
-
-// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
-let AddedComplexity = 10 in
-def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
- (i32 (A2_and (i32 (L2_loadrb_io AddrFI:$addr, 0)), (A2_tfrsi 0x1)))>;
+def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset),
+ (J2_jumpf PredRegs:$src1, bb:$offset)>;
// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo).
-def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)),
- (i64 (A2_sxtw (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg))))>;
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)),
+ (A2_sxtw (LoReg DoubleRegs:$src1))>;
-// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(SXTH(Rss.lo)).
-def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)),
- (i64 (A2_sxtw (i32 (A2_sxth (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
- subreg_loreg))))))>;
+// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)).
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)),
+ (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>;
-// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(SXTB(Rss.lo)).
-def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
- (i64 (A2_sxtw (i32 (A2_sxtb (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
- subreg_loreg))))))>;
+// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)).
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
+ (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>;
// We want to prevent emitting pnot's as much as possible.
// Map brcond with an unsupported setcc to a J2_jumpf.
@@ -4166,144 +4968,68 @@ def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
bb:$offset),
(J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
-def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
- (J2_jumpf (i1 PredRegs:$src1), bb:$offset)>;
+def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
+ (J2_jumpf PredRegs:$src1, bb:$offset)>;
-def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
- (J2_jumpt (i1 PredRegs:$src1), bb:$offset)>;
+def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
+ (J2_jumpt PredRegs:$src1, bb:$offset)>;
// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
-def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)),
- bb:$offset),
- (J2_jumpf (C2_cmpgti (i32 IntRegs:$src1),
- (DEC_CONST_SIGNED s8ImmPred:$src2)), bb:$offset)>;
-
-// cmp.lt(r0, r1) -> cmp.gt(r1, r0)
-def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- bb:$offset),
- (J2_jumpt (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>;
-
-def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- bb:$offset),
- (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)),
- bb:$offset)>;
-
-def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- bb:$offset),
- (J2_jumpf (C2_cmpgtu (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
- bb:$offset)>;
-
-def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- bb:$offset),
- (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
- bb:$offset)>;
+def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), bb:$offset),
+ (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ImmPred:$src2)),
+ bb:$offset)>;
// Map from a 64-bit select to an emulated 64-bit mux.
// Hexagon does not support 64-bit MUXes; so emulate with combines.
-def : Pat <(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
- (i64 DoubleRegs:$src3)),
- (i64 (A2_combinew (i32 (C2_mux (i1 PredRegs:$src1),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
- subreg_hireg)),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3),
- subreg_hireg)))),
- (i32 (C2_mux (i1 PredRegs:$src1),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
- subreg_loreg)),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3),
- subreg_loreg))))))>;
+def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src3)),
+ (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2),
+ (HiReg DoubleRegs:$src3)),
+ (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2),
+ (LoReg DoubleRegs:$src3)))>;
// Map from a 1-bit select to logical ops.
// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
-def : Pat <(select (i1 PredRegs:$src1), (i1 PredRegs:$src2),
- (i1 PredRegs:$src3)),
- (C2_or (C2_and (i1 PredRegs:$src1), (i1 PredRegs:$src2)),
- (C2_and (C2_not (i1 PredRegs:$src1)), (i1 PredRegs:$src3)))>;
-
-// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs.
-def : Pat<(i1 (load ADDRriS11_2:$addr)),
- (i1 (C2_tfrrp (i32 (L2_loadrb_io AddrFI:$addr, 0))))>;
+def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)),
+ (C2_or (C2_and PredRegs:$src1, PredRegs:$src2),
+ (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>;
// Map for truncating from 64 immediates to 32 bit immediates.
-def : Pat<(i32 (trunc (i64 DoubleRegs:$src))),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg))>;
+def: Pat<(i32 (trunc (i64 DoubleRegs:$src))),
+ (LoReg DoubleRegs:$src)>;
// Map for truncating from i64 immediates to i1 bit immediates.
-def : Pat<(i1 (trunc (i64 DoubleRegs:$src))),
- (i1 (C2_tfrrp (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
- subreg_loreg))))>;
-
-// Map memb(Rs) = Rdd -> memb(Rs) = Rt.
-def : Pat<(truncstorei8 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (S2_storerb_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
- subreg_loreg)))>;
-
-// Map memh(Rs) = Rdd -> memh(Rs) = Rt.
-def : Pat<(truncstorei16 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (S2_storerh_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
- subreg_loreg)))>;
-// Map memw(Rs) = Rdd -> memw(Rs) = Rt
-def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
- subreg_loreg)))>;
-
-// Map memw(Rs) = Rdd -> memw(Rs) = Rt.
-def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
- subreg_loreg)))>;
-
-// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0.
-def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
- (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>;
-
-
-// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
-def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
- (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>;
-
-// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt.
-def : Pat<(store (i1 PredRegs:$src1), ADDRriS11_2:$addr),
- (S2_storerb_io AddrFI:$addr, 0, (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0)) )>;
-
-// Map Rdd = anyext(Rs) -> Rdd = A2_sxtw(Rs).
-// Hexagon_TODO: We can probably use combine but that will cost 2 instructions.
-// Better way to do this?
-def : Pat<(i64 (anyext (i32 IntRegs:$src1))),
- (i64 (A2_sxtw (i32 IntRegs:$src1)))>;
-
-// Map cmple -> cmpgt.
+def: Pat<(i1 (trunc (i64 DoubleRegs:$src))),
+ (C2_tfrrp (LoReg DoubleRegs:$src))>;
+
// rs <= rt -> !(rs > rt).
-def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)),
- (i1 (C2_not (C2_cmpgti (i32 IntRegs:$src1), s10ExtPred:$src2)))>;
+let AddedComplexity = 30 in
+def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>;
// rs <= rt -> !(rs > rt).
def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
(i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
// Rss <= Rtt -> !(Rss > Rtt).
-def : Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (i1 (C2_not (C2_cmpgtp (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>;
+def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>;
// Map cmpne -> cmpeq.
// Hexagon_TODO: We should improve on this.
// rs != rt -> !(rs == rt).
-def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)),
- (i1 (C2_not(i1 (C2_cmpeqi (i32 IntRegs:$src1), s10ExtPred:$src2))))>;
-
-// Map cmpne(Rs) -> !cmpeqe(Rs).
-// rs != rt -> !(rs == rt).
-def : Pat <(i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (C2_not (i1 (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>;
+let AddedComplexity = 30 in
+def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpeqi IntRegs:$src1, s32ImmPred:$src2))>;
// Convert setne back to xor for hexagon since we compute w/ pred registers.
-def : Pat <(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
- (i1 (C2_xor (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
+def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
+ (C2_xor PredRegs:$src1, PredRegs:$src2)>;
// Map cmpne(Rss) -> !cmpew(Rss).
// rs != rt -> !(rs == rt).
-def : Pat <(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (i1 (C2_not (i1 (C2_cmpeqp (i64 DoubleRegs:$src1),
- (i64 DoubleRegs:$src2)))))>;
+def: Pat<(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>;
// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt).
// rs >= rt -> !(rt > rs).
@@ -4311,366 +5037,120 @@ def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
(i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
-def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)),
- (i1 (C2_cmpgti (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
+let AddedComplexity = 30 in
+def: Pat<(i1 (setge (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>;
// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
// rss >= rtt -> !(rtt > rss).
-def : Pat <(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (i1 (C2_not (i1 (C2_cmpgtp (i64 DoubleRegs:$src2),
- (i64 DoubleRegs:$src1)))))>;
+def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>;
// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
// rs < rt -> !(rs >= rt).
-def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)),
- (i1 (C2_not (C2_cmpgti (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2))))>;
-
-// Map cmplt(Rs, Rt) -> cmpgt(Rt, Rs).
-// rs < rt -> rt > rs.
-// We can let assembler map it, or we can do in the compiler itself.
-def : Pat <(i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>;
-
-// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss).
-// rss < rtt -> (rtt > rss).
-def : Pat <(i1 (setlt (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (i1 (C2_cmpgtp (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
-
-// Map from cmpltu(Rs, Rd) -> cmpgtu(Rd, Rs)
-// rs < rt -> rt > rs.
-// We can let assembler map it, or we can do in the compiler itself.
-def : Pat <(i1 (setult (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (C2_cmpgtu (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>;
-
-// Map from cmpltu(Rss, Rdd) -> cmpgtu(Rdd, Rss).
-// rs < rt -> rt > rs.
-def : Pat <(i1 (setult (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (i1 (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
+let AddedComplexity = 30 in
+def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpgti IntRegs:$src1,
+ (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
-def : Pat <(i1 (setuge (i32 IntRegs:$src1), 0)),
- (i1 (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src1)))>;
+def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)),
+ (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>;
// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
-def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)),
- (i1 (C2_cmpgtui (i32 IntRegs:$src1), (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>;
+def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32ImmPred:$src2)),
+ (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32ImmPred:$src2))>;
// Generate cmpgtu(Rs, #u9)
-def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)),
- (i1 (C2_cmpgtui (i32 IntRegs:$src1), u9ExtPred:$src2))>;
+def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32ImmPred:$src2)),
+ (C2_cmpgtui IntRegs:$src1, u32ImmPred:$src2)>;
// Map from Rs >= Rt -> !(Rt > Rs).
// rs >= rt -> !(rt > rs).
-def : Pat <(i1 (setuge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (C2_not (C2_cmpgtu (i32 IntRegs:$src2), (i32 IntRegs:$src1))))>;
-
-// Map from Rs >= Rt -> !(Rt > Rs).
-// rs >= rt -> !(rt > rs).
-def : Pat <(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (i1 (C2_not (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1))))>;
-
-// Map from cmpleu(Rs, Rt) -> !cmpgtu(Rs, Rt).
-// Map from (Rs <= Rt) -> !(Rs > Rt).
-def : Pat <(i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (C2_not (C2_cmpgtu (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
+def: Pat<(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>;
// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
// Map from (Rs <= Rt) -> !(Rs > Rt).
-def : Pat <(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (i1 (C2_not (C2_cmpgtup (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>;
+def: Pat<(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>;
// Sign extends.
// i1 -> i32
-def : Pat <(i32 (sext (i1 PredRegs:$src1))),
- (i32 (C2_muxii (i1 PredRegs:$src1), -1, 0))>;
+def: Pat<(i32 (sext (i1 PredRegs:$src1))),
+ (C2_muxii PredRegs:$src1, -1, 0)>;
// i1 -> i64
-def : Pat <(i64 (sext (i1 PredRegs:$src1))),
- (i64 (A2_combinew (A2_tfrsi -1), (C2_muxii (i1 PredRegs:$src1), -1, 0)))>;
-
-// Convert sign-extended load back to load and sign extend.
-// i8 -> i64
-def: Pat <(i64 (sextloadi8 ADDRriS11_0:$src1)),
- (i64 (A2_sxtw (L2_loadrb_io AddrFI:$src1, 0)))>;
-
-// Convert any-extended load back to load and sign extend.
-// i8 -> i64
-def: Pat <(i64 (extloadi8 ADDRriS11_0:$src1)),
- (i64 (A2_sxtw (L2_loadrb_io AddrFI:$src1, 0)))>;
-
-// Convert sign-extended load back to load and sign extend.
-// i16 -> i64
-def: Pat <(i64 (sextloadi16 ADDRriS11_1:$src1)),
- (i64 (A2_sxtw (L2_loadrh_io AddrFI:$src1, 0)))>;
-
-// Convert sign-extended load back to load and sign extend.
-// i32 -> i64
-def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)),
- (i64 (A2_sxtw (L2_loadri_io AddrFI:$src1, 0)))>;
-
+def: Pat<(i64 (sext (i1 PredRegs:$src1))),
+ (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
// Zero extends.
// i1 -> i32
-def : Pat <(i32 (zext (i1 PredRegs:$src1))),
- (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))>;
-
-// i1 -> i64
-def : Pat <(i64 (zext (i1 PredRegs:$src1))),
- (i64 (A2_combinew (A2_tfrsi 0), (C2_muxii (i1 PredRegs:$src1), 1, 0)))>,
- Requires<[NoV4T]>;
-
-// i32 -> i64
-def : Pat <(i64 (zext (i32 IntRegs:$src1))),
- (i64 (A2_combinew (A2_tfrsi 0), (i32 IntRegs:$src1)))>,
- Requires<[NoV4T]>;
-
-// i8 -> i64
-def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
- (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io AddrFI:$src1, 0)))>,
- Requires<[NoV4T]>;
-
-let AddedComplexity = 20 in
-def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
- s11_0ExtPred:$offset))),
- (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io IntRegs:$src1,
- s11_0ExtPred:$offset)))>,
- Requires<[NoV4T]>;
-
-// i1 -> i64
-def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
- (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io AddrFI:$src1, 0)))>,
- Requires<[NoV4T]>;
-
-let AddedComplexity = 20 in
-def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
- s11_0ExtPred:$offset))),
- (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io IntRegs:$src1,
- s11_0ExtPred:$offset)))>,
- Requires<[NoV4T]>;
-
-// i16 -> i64
-def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
- (i64 (A2_combinew (A2_tfrsi 0), (L2_loadruh_io AddrFI:$src1, 0)))>,
- Requires<[NoV4T]>;
-
-let AddedComplexity = 20 in
-def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
- s11_1ExtPred:$offset))),
- (i64 (A2_combinew (A2_tfrsi 0), (L2_loadruh_io IntRegs:$src1,
- s11_1ExtPred:$offset)))>,
- Requires<[NoV4T]>;
-
-// i32 -> i64
-def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
- (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io AddrFI:$src1, 0)))>,
- Requires<[NoV4T]>;
-
-let AddedComplexity = 100 in
-def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
- (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io IntRegs:$src1,
- s11_2ExtPred:$offset)))>,
- Requires<[NoV4T]>;
-
-let AddedComplexity = 10 in
-def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
- (i32 (L2_loadri_io AddrFI:$src1, 0))>;
+def: Pat<(i32 (zext (i1 PredRegs:$src1))),
+ (C2_muxii PredRegs:$src1, 1, 0)>;
// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
-def : Pat <(i32 (zext (i1 PredRegs:$src1))),
- (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))>;
-
-// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
-def : Pat <(i32 (anyext (i1 PredRegs:$src1))),
- (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))>;
-
-// Map from Rss = Pd to Rdd = A2_sxtw (mux(Pd, #1, #0))
-def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
- (i64 (A2_sxtw (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))))>;
-
-
-let AddedComplexity = 100 in
-def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
- (i32 32))),
- (i64 (zextloadi32 (i32 (add IntRegs:$src2,
- s11_2ExtPred:$offset2)))))),
- (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
- (L2_loadri_io IntRegs:$src2,
- s11_2ExtPred:$offset2)))>;
-
-def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
- (i32 32))),
- (i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
- (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
- (L2_loadri_io AddrFI:$srcLow, 0)))>;
-
-def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
- (i32 32))),
- (i64 (zext (i32 IntRegs:$srcLow))))),
- (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
- IntRegs:$srcLow))>;
-
-let AddedComplexity = 100 in
-def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
- (i32 32))),
- (i64 (zextloadi32 (i32 (add IntRegs:$src2,
- s11_2ExtPred:$offset2)))))),
- (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
- (L2_loadri_io IntRegs:$src2,
- s11_2ExtPred:$offset2)))>;
-
-def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
- (i32 32))),
- (i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
- (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
- (L2_loadri_io AddrFI:$srcLow, 0)))>;
-
-def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
- (i32 32))),
- (i64 (zext (i32 IntRegs:$srcLow))))),
- (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
- IntRegs:$srcLow))>;
-
-// Any extended 64-bit load.
-// anyext i32 -> i64
-def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
- (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io AddrFI:$src1, 0)))>,
- Requires<[NoV4T]>;
-
-// When there is an offset we should prefer the pattern below over the pattern above.
-// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc)
-// So this complexity below is comfortably higher to allow for choosing the below.
-// If this is not done then we generate addresses such as
-// ********************************************
-// r1 = add (r0, #4)
-// r1 = memw(r1 + #0)
-// instead of
-// r1 = memw(r0 + #4)
-// ********************************************
-let AddedComplexity = 100 in
-def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
- (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io IntRegs:$src1,
- s11_2ExtPred:$offset)))>,
- Requires<[NoV4T]>;
-
-// anyext i16 -> i64.
-def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
- (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrh_io AddrFI:$src1, 0)))>,
- Requires<[NoV4T]>;
+def: Pat<(i32 (anyext (i1 PredRegs:$src1))),
+ (C2_muxii PredRegs:$src1, 1, 0)>;
-let AddedComplexity = 20 in
-def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
- s11_1ExtPred:$offset))),
- (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrh_io IntRegs:$src1,
- s11_1ExtPred:$offset)))>,
- Requires<[NoV4T]>;
-
-// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
-def : Pat<(i64 (zext (i32 IntRegs:$src1))),
- (i64 (A2_combinew (A2_tfrsi 0), (i32 IntRegs:$src1)))>,
- Requires<[NoV4T]>;
+// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
+def: Pat<(i64 (anyext (i1 PredRegs:$src1))),
+ (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
// Multiply 64-bit unsigned and use upper result.
def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
- (i64
- (M2_dpmpyuu_acc_s0
- (i64
- (A2_combinew
- (A2_tfrsi 0),
- (i32
- (EXTRACT_SUBREG
- (i64
- (S2_lsr_i_p
- (i64
- (M2_dpmpyuu_acc_s0
- (i64
- (M2_dpmpyuu_acc_s0
- (i64
- (A2_combinew (A2_tfrsi 0),
- (i32
- (EXTRACT_SUBREG
- (i64
- (S2_lsr_i_p
- (i64
- (M2_dpmpyuu_s0
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
- subreg_loreg)),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
- subreg_loreg)))), 32)),
- subreg_loreg)))),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))),
- 32)), subreg_loreg)))),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>;
-
-// Multiply 64-bit signed and use upper result.
-def : Pat <(mulhs (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
- (i64
- (M2_dpmpyss_acc_s0
- (i64
- (A2_combinew (A2_tfrsi 0),
- (i32
- (EXTRACT_SUBREG
- (i64
- (S2_lsr_i_p
- (i64
- (M2_dpmpyss_acc_s0
- (i64
- (M2_dpmpyss_acc_s0
- (i64
- (A2_combinew (A2_tfrsi 0),
- (i32
- (EXTRACT_SUBREG
- (i64
- (S2_lsr_i_p
- (i64
- (M2_dpmpyuu_s0
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
- subreg_loreg)),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
- subreg_loreg)))), 32)),
- subreg_loreg)))),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))),
- 32)), subreg_loreg)))),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
- (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>;
+ (A2_addp
+ (M2_dpmpyuu_acc_s0
+ (S2_lsr_i_p
+ (A2_addp
+ (M2_dpmpyuu_acc_s0
+ (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (LoReg $src2)), 32),
+ (HiReg $src1),
+ (LoReg $src2)),
+ (A2_combinew (A2_tfrsi 0),
+ (LoReg (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2))))),
+ 32),
+ (HiReg $src1),
+ (HiReg $src2)),
+ (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2)), 32)
+)>;
// Hexagon specific ISD nodes.
-//def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>;
-def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2,
- [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC",
- SDTHexagonADJDYNALLOC>;
-// Needed to tag these instructions for stack layout.
-let usesCustomInserter = 1 in
-def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1,
- s16Imm:$src2),
- "$dst = add($src1, #$src2)",
- [(set (i32 IntRegs:$dst),
- (Hexagon_ADJDYNALLOC (i32 IntRegs:$src1),
- s16ImmPred:$src2))]>;
+def SDTHexagonALLOCA : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA,
+ [SDNPHasChain]>;
+
+// The reason for the custom inserter is to record all ALLOCA instructions
+// in MachineFunctionInfo.
+let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1,
+ usesCustomInserter = 1 in
+def ALLOCA: ALU32Inst<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, u32Imm:$A), "",
+ [(set (i32 IntRegs:$Rd),
+ (HexagonALLOCA (i32 IntRegs:$Rs), (i32 imm:$A)))]>;
+
+let isCodeGenOnly = 1, isPseudo = 1, Uses = [R30], hasSideEffects = 0 in
+def ALIGNA : ALU32Inst<(outs IntRegs:$Rd), (ins u32Imm:$A), "", []>;
def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>;
+let isCodeGenOnly = 1 in
def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1),
"$dst = $src1",
[(set (i32 IntRegs:$dst),
(Hexagon_ARGEXTEND (i32 IntRegs:$src1)))]>;
let AddedComplexity = 100 in
-def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)),
- (COPY (i32 IntRegs:$src1))>;
+def: Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)),
+ (i32 IntRegs:$src1)>;
-def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
+def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>;
+def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>;
-def : Pat<(HexagonWrapperJT tjumptable:$dst),
- (i32 (CONST32_set_jt tjumptable:$dst))>;
+def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi s16Ext:$dst)>;
+def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi s16Ext:$dst)>;
// XTYPE/SHIFT
//
@@ -4820,7 +5300,6 @@ let AddedComplexity = 100 in
defm _xacc : xtype_imm_base< opc1, "^= ", OpNode, xor, 0b100, minOp>;
}
-let isCodeGenOnly = 0 in {
defm S2_asr : xtype_imm_acc<"asr", sra, 0b00>;
defm S2_lsr : xtype_imm_acc<"lsr", srl, 0b01>,
@@ -4828,7 +5307,6 @@ defm S2_lsr : xtype_imm_acc<"lsr", srl, 0b01>,
defm S2_asl : xtype_imm_acc<"asl", shl, 0b10>,
xtype_xor_imm_acc<"asl", shl, 0b10>;
-}
multiclass xtype_reg_acc_r<string opc1, SDNode OpNode, bits<2>minOp> {
let AddedComplexity = 100 in
@@ -4854,12 +5332,10 @@ multiclass xtype_reg_acc<string OpcStr, SDNode OpNode, bits<2> minOp > {
defm _r_p : xtype_reg_acc_p <OpcStr, OpNode, minOp>;
}
-let isCodeGenOnly = 0 in {
defm S2_asl : xtype_reg_acc<"asl", shl, 0b10>;
defm S2_asr : xtype_reg_acc<"asr", sra, 0b00>;
defm S2_lsr : xtype_reg_acc<"lsr", srl, 0b01>;
defm S2_lsl : xtype_reg_acc<"lsl", shl, 0b11>;
-}
//===----------------------------------------------------------------------===//
let hasSideEffects = 0 in
@@ -4890,9 +5366,42 @@ class T_S3op_64 <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit SwapOps,
: T_S3op_1 <mnemonic, DoubleRegs, MajOp, MinOp, SwapOps,
isSat, isRnd, hasShift>;
-let isCodeGenOnly = 0 in
+let Itinerary = S_3op_tc_1_SLOT23 in {
+ def S2_shuffeb : T_S3op_64 < "shuffeb", 0b00, 0b010, 0>;
+ def S2_shuffeh : T_S3op_64 < "shuffeh", 0b00, 0b110, 0>;
+ def S2_shuffob : T_S3op_64 < "shuffob", 0b00, 0b100, 1>;
+ def S2_shuffoh : T_S3op_64 < "shuffoh", 0b10, 0b000, 1>;
+
+ def S2_vtrunewh : T_S3op_64 < "vtrunewh", 0b10, 0b010, 0>;
+ def S2_vtrunowh : T_S3op_64 < "vtrunowh", 0b10, 0b100, 0>;
+}
+
def S2_lfsp : T_S3op_64 < "lfs", 0b10, 0b110, 0>;
+let hasSideEffects = 0 in
+class T_S3op_2 <string mnemonic, bits<3> MajOp, bit SwapOps>
+ : SInst < (outs DoubleRegs:$Rdd),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu),
+ "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu)",
+ [], "", S_3op_tc_1_SLOT23 > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+ bits<2> Pu;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = 0b0010;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = !if (SwapOps, Rtt, Rss);
+ let Inst{12-8} = !if (SwapOps, Rss, Rtt);
+ let Inst{6-5} = Pu;
+ let Inst{4-0} = Rdd;
+ }
+
+def S2_valignrb : T_S3op_2 < "valignb", 0b000, 1>;
+def S2_vsplicerb : T_S3op_2 < "vspliceb", 0b100, 0>;
+
//===----------------------------------------------------------------------===//
// Template class used by vector shift, vector rotate, vector neg,
// 32-bit shift, 64-bit shifts, etc.
@@ -4943,31 +5452,78 @@ class T_S3op_shiftVect <string mnemonic, bits<2> MajOp, bits<2> MinOp>
// Shift by register
// Rdd=[asr|lsr|asl|lsl](Rss,Rt)
-let isCodeGenOnly = 0 in {
def S2_asr_r_p : T_S3op_shift64 < "asr", sra, 0b00>;
def S2_lsr_r_p : T_S3op_shift64 < "lsr", srl, 0b01>;
def S2_asl_r_p : T_S3op_shift64 < "asl", shl, 0b10>;
def S2_lsl_r_p : T_S3op_shift64 < "lsl", shl, 0b11>;
-}
// Rd=[asr|lsr|asl|lsl](Rs,Rt)
-let isCodeGenOnly = 0 in {
def S2_asr_r_r : T_S3op_shift32<"asr", sra, 0b00>;
def S2_lsr_r_r : T_S3op_shift32<"lsr", srl, 0b01>;
def S2_asl_r_r : T_S3op_shift32<"asl", shl, 0b10>;
def S2_lsl_r_r : T_S3op_shift32<"lsl", shl, 0b11>;
-}
// Shift by register with saturation
// Rd=asr(Rs,Rt):sat
// Rd=asl(Rs,Rt):sat
-let Defs = [USR_OVF], isCodeGenOnly = 0 in {
+let Defs = [USR_OVF] in {
def S2_asr_r_r_sat : T_S3op_shift32_Sat<"asr", 0b00>;
def S2_asl_r_r_sat : T_S3op_shift32_Sat<"asl", 0b10>;
}
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_S3op_8 <string opc, bits<3> MinOp, bit isSat, bit isRnd, bit hasShift, bit hasSplat = 0>
+ : SInst < (outs IntRegs:$Rd),
+ (ins DoubleRegs:$Rss, IntRegs:$Rt),
+ "$Rd = "#opc#"($Rss, $Rt"#!if(hasSplat, "*", "")#")"
+ #!if(hasShift, ":<<1", "")
+ #!if(isRnd, ":rnd", "")
+ #!if(isSat, ":sat", ""),
+ [], "", S_3op_tc_1_SLOT23 > {
+ bits<5> Rd;
+ bits<5> Rss;
+ bits<5> Rt;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = 0b0101;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rd;
+ }
+
+def S2_asr_r_svw_trun : T_S3op_8<"vasrw", 0b010, 0, 0, 0>;
+
+let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
+def S2_vcrotate : T_S3op_shiftVect < "vcrotate", 0b11, 0b00>;
+
+let hasSideEffects = 0 in
+class T_S3op_7 <string mnemonic, bit MajOp >
+ : SInst <(outs DoubleRegs:$Rdd),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, u3Imm:$u3),
+ "$Rdd = "#mnemonic#"($Rss, $Rtt, #$u3)" ,
+ [], "", S_3op_tc_1_SLOT23 > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+ bits<3> u3;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = 0b0000;
+ let Inst{23} = MajOp;
+ let Inst{20-16} = !if(MajOp, Rss, Rtt);
+ let Inst{12-8} = !if(MajOp, Rtt, Rss);
+ let Inst{7-5} = u3;
+ let Inst{4-0} = Rdd;
+ }
+
+def S2_valignib : T_S3op_7 < "valignb", 0>;
+def S2_vspliceib : T_S3op_7 < "vspliceb", 1>;
+
//===----------------------------------------------------------------------===//
// Template class for 'insert bitfield' instructions
//===----------------------------------------------------------------------===//
@@ -5021,17 +5577,45 @@ class T_S2op_insert <bits<4> RegTyBits, RegisterClass RC, Operand ImmOp>
// Rx=insert(Rs,Rtt)
// Rx=insert(Rs,#u5,#U5)
-let hasNewValue = 1, isCodeGenOnly = 0 in {
+let hasNewValue = 1 in {
def S2_insert_rp : T_S3op_insert <"insert", IntRegs>;
def S2_insert : T_S2op_insert <0b1111, IntRegs, u5Imm>;
}
// Rxx=insert(Rss,Rtt)
// Rxx=insert(Rss,#u6,#U6)
-let isCodeGenOnly = 0 in {
def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>;
def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6Imm>;
-}
+
+
+def SDTHexagonINSERT:
+ SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
+def SDTHexagonINSERTRP:
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisVT<3, i64>]>;
+
+def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>;
+def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>;
+
+def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2),
+ (S2_insert I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2)>;
+def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2),
+ (S2_insertp I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2)>;
+def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru),
+ (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
+def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru),
+ (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
+
+let AddedComplexity = 100 in
+def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))),
+ (i32 (extloadi8 (add I32:$b, 3))),
+ 24, 8),
+ (i32 16)),
+ (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
+ (zextloadi8 I32:$b)),
+ (A2_swiz (L2_loadri_io I32:$b, 0))>;
+
//===----------------------------------------------------------------------===//
// Template class for 'extract bitfield' instructions
@@ -5089,18 +5673,39 @@ class T_S2op_extract <string mnemonic, bits<4> RegTyBits,
// Rdd=extractu(Rss,Rtt)
// Rdd=extractu(Rss,#u6,#U6)
-let isCodeGenOnly = 0 in {
def S2_extractup_rp : T_S3op_64 < "extractu", 0b00, 0b000, 0>;
def S2_extractup : T_S2op_extract <"extractu", 0b0001, DoubleRegs, u6Imm>;
-}
// Rd=extractu(Rs,Rtt)
// Rd=extractu(Rs,#u5,#U5)
-let hasNewValue = 1, isCodeGenOnly = 0 in {
+let hasNewValue = 1 in {
def S2_extractu_rp : T_S3op_extract<"extractu", 0b00>;
def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5Imm>;
}
+def SDTHexagonEXTRACTU:
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDTHexagonEXTRACTURP:
+ SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, i64>]>;
+
+def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>;
+def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>;
+
+def: Pat<(HexagonEXTRACTU I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3),
+ (S2_extractu I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3)>;
+def: Pat<(HexagonEXTRACTU I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3),
+ (S2_extractup I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3)>;
+def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2),
+ (S2_extractu_rp I32:$src1, I64:$src2)>;
+def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2),
+ (S2_extractup_rp I64:$src1, I64:$src2)>;
+
+// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
+def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)),
+ (M2_mpysin IntRegs:$src1, u8ImmPred:$src2)>;
+
//===----------------------------------------------------------------------===//
// :raw for of tableindx[bdhw] insns
//===----------------------------------------------------------------------===//
@@ -5127,16 +5732,26 @@ class tableidxRaw<string OpStr, bits<2>MinOp>
let Inst{4-0} = Rx;
}
-let isCodeGenOnly = 0 in {
def S2_tableidxb : tableidxRaw<"tableidxb", 0b00>;
def S2_tableidxh : tableidxRaw<"tableidxh", 0b01>;
def S2_tableidxw : tableidxRaw<"tableidxw", 0b10>;
def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>;
-}
-// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
-def : Pat <(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)),
- (i32 (M2_mpysin (i32 IntRegs:$src1), u8ImmPred:$src2))>;
+//===----------------------------------------------------------------------===//
+// Template class for 'table index' instructions which are assembler mapped
+// to their :raw format.
+//===----------------------------------------------------------------------===//
+let isPseudo = 1 in
+class tableidx_goodsyntax <string mnemonic>
+ : SInst <(outs IntRegs:$Rx),
+ (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, u5Imm:$u5),
+ "$Rx = "#mnemonic#"($Rs, #$u4, #$u5)",
+ [], "$Rx = $_dst_" >;
+
+def S2_tableidxb_goodsyntax : tableidx_goodsyntax<"tableidxb">;
+def S2_tableidxh_goodsyntax : tableidx_goodsyntax<"tableidxh">;
+def S2_tableidxw_goodsyntax : tableidx_goodsyntax<"tableidxw">;
+def S2_tableidxd_goodsyntax : tableidx_goodsyntax<"tableidxd">;
//===----------------------------------------------------------------------===//
// V3 Instructions +
@@ -5167,3 +5782,9 @@ include "HexagonInstrInfoV5.td"
//===----------------------------------------------------------------------===//
// V5 Instructions -
//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU32/64/Vector +
+//===----------------------------------------------------------------------===///
+
+include "HexagonInstrInfoVector.td"
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td
index 8e9147600fa6..84d035da451b 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV3.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -21,8 +21,7 @@ def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall,
// J +
//===----------------------------------------------------------------------===//
// Call subroutine.
-let isCall = 1, hasSideEffects = 1, validSubTargets = HasV3SubT,
- Defs = VolatileV3.Regs, isPredicable = 1,
+let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicable = 1,
isExtended = 0, isExtendable = 1, opExtendable = 0,
isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in
class T_Call<string ExtStr>
@@ -37,8 +36,7 @@ class T_Call<string ExtStr>
let Inst{0} = 0b0;
}
-let isCall = 1, hasSideEffects = 1, validSubTargets = HasV3SubT,
- Defs = VolatileV3.Regs, isPredicated = 1,
+let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicated = 1,
isExtended = 0, isExtendable = 1, opExtendable = 1,
isExtentSigned = 1, opExtentBits = 17, opExtentAlign = 2 in
class T_CallPred<bit IfTrue, string ExtStr>
@@ -64,9 +62,11 @@ multiclass T_Calls<string ExtStr> {
def f : T_CallPred<0, ExtStr>;
}
-let isCodeGenOnly = 0 in
defm J2_call: T_Calls<"">, PredRel;
+let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs in
+def CALLv3nr : T_Call<"">, PredRel;
+
//===----------------------------------------------------------------------===//
// J -
//===----------------------------------------------------------------------===//
@@ -76,13 +76,10 @@ defm J2_call: T_Calls<"">, PredRel;
// JR +
//===----------------------------------------------------------------------===//
// Call subroutine from register.
-let isCall = 1, hasSideEffects = 0,
- Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
- P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
- def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst),
- "callr $dst",
- []>, Requires<[HasV3TOnly]>;
- }
+
+let isCodeGenOnly = 1, Defs = VolatileV3.Regs in {
+ def CALLRv3nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return.
+}
//===----------------------------------------------------------------------===//
// JR -
@@ -92,20 +89,16 @@ let isCall = 1, hasSideEffects = 0,
// ALU64/ALU +
//===----------------------------------------------------------------------===//
-
-let Defs = [USR_OVF], Itinerary = ALU64_tc_2_SLOT23,
- validSubTargets = HasV3SubT, isCodeGenOnly = 0 in
+let Defs = [USR_OVF], Itinerary = ALU64_tc_2_SLOT23 in
def A2_addpsat : T_ALU64_arith<"add", 0b011, 0b101, 1, 0, 1>;
class T_ALU64_addsp_hl<string suffix, bits<3> MinOp>
: T_ALU64_rr<"add", suffix, 0b0011, 0b011, MinOp, 0, 0, "">;
-let isCodeGenOnly = 0 in {
def A2_addspl : T_ALU64_addsp_hl<":raw:lo", 0b110>;
def A2_addsph : T_ALU64_addsp_hl<":raw:hi", 0b111>;
-}
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0, isAsmParserOnly = 1 in
def A2_addsp : ALU64_rr<(outs DoubleRegs:$Rd),
(ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)",
[(set (i64 DoubleRegs:$Rd), (i64 (add (i64 (sext (i32 IntRegs:$Rs))),
@@ -134,12 +127,10 @@ class T_XTYPE_MIN_MAX_P<bit isMax, bit isUnsigned>
let Inst{4-0} = Rd;
}
-let isCodeGenOnly = 0 in {
def A2_minp : T_XTYPE_MIN_MAX_P<0, 0>;
def A2_minup : T_XTYPE_MIN_MAX_P<0, 1>;
def A2_maxp : T_XTYPE_MIN_MAX_P<1, 0>;
def A2_maxup : T_XTYPE_MIN_MAX_P<1, 1>;
-}
multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>;
@@ -164,25 +155,112 @@ let AddedComplexity = 200 in {
//def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset),
-// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>;
//def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset),
-// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>;
//def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset),
-// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>;
//def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset),
-// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>;
//def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset),
-// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
-
+// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>;
// Map call instruction
-def : Pat<(call (i32 IntRegs:$dst)),
- (J2_call (i32 IntRegs:$dst))>, Requires<[HasV3T]>;
-def : Pat<(call tglobaladdr:$dst),
- (J2_call tglobaladdr:$dst)>, Requires<[HasV3T]>;
-def : Pat<(call texternalsym:$dst),
- (J2_call texternalsym:$dst)>, Requires<[HasV3T]>;
+def : Pat<(callv3 (i32 IntRegs:$dst)),
+ (J2_callr (i32 IntRegs:$dst))>;
+def : Pat<(callv3 tglobaladdr:$dst),
+ (J2_call tglobaladdr:$dst)>;
+def : Pat<(callv3 texternalsym:$dst),
+ (J2_call texternalsym:$dst)>;
+def : Pat<(callv3 tglobaltlsaddr:$dst),
+ (J2_call tglobaltlsaddr:$dst)>;
+
+def : Pat<(callv3nr (i32 IntRegs:$dst)),
+ (CALLRv3nr (i32 IntRegs:$dst))>;
+def : Pat<(callv3nr tglobaladdr:$dst),
+ (CALLv3nr tglobaladdr:$dst)>;
+def : Pat<(callv3nr texternalsym:$dst),
+ (CALLv3nr texternalsym:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// :raw form of vrcmpys:hi/lo insns
+//===----------------------------------------------------------------------===//
+// Vector reduce complex multiply by scalar.
+let Defs = [USR_OVF], hasSideEffects = 0 in
+class T_vrcmpRaw<string HiLo, bits<3>MajOp>:
+ MInst<(outs DoubleRegs:$Rdd),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rdd = vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, []> {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1000;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ let Inst{7-5} = 0b100;
+ let Inst{4-0} = Rdd;
+}
+
+def M2_vrcmpys_s1_h: T_vrcmpRaw<"hi", 0b101>;
+def M2_vrcmpys_s1_l: T_vrcmpRaw<"lo", 0b111>;
+
+// Assembler mapped to M2_vrcmpys_s1_h or M2_vrcmpys_s1_l
+let hasSideEffects = 0, isAsmParserOnly = 1 in
+def M2_vrcmpys_s1
+ : MInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, IntRegs:$Rt),
+ "$Rdd=vrcmpys($Rss,$Rt):<<1:sat">;
+
+// Vector reduce complex multiply by scalar with accumulation.
+let Defs = [USR_OVF], hasSideEffects = 0 in
+class T_vrcmpys_acc<string HiLo, bits<3>MajOp>:
+ MInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$_src_, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rxx += vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, [],
+ "$Rxx = $_src_"> {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1010;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ let Inst{7-5} = 0b100;
+ let Inst{4-0} = Rxx;
+ }
+
+def M2_vrcmpys_acc_s1_h: T_vrcmpys_acc<"hi", 0b101>;
+def M2_vrcmpys_acc_s1_l: T_vrcmpys_acc<"lo", 0b111>;
+
+// Assembler mapped to M2_vrcmpys_acc_s1_h or M2_vrcmpys_acc_s1_l
+
+let isAsmParserOnly = 1 in
+def M2_vrcmpys_acc_s1
+ : MInst <(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$dst2, DoubleRegs:$src1, IntRegs:$src2),
+ "$dst += vrcmpys($src1, $src2):<<1:sat", [],
+ "$dst2 = $dst">;
+
+def M2_vrcmpys_s1rp_h : T_MType_vrcmpy <"vrcmpys", 0b101, 0b110, 1>;
+def M2_vrcmpys_s1rp_l : T_MType_vrcmpy <"vrcmpys", 0b101, 0b111, 0>;
+
+// Assembler mapped to M2_vrcmpys_s1rp_h or M2_vrcmpys_s1rp_l
+let isAsmParserOnly = 1 in
+def M2_vrcmpys_s1rp
+ : MInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, IntRegs:$Rt),
+ "$Rd=vrcmpys($Rss,$Rt):<<1:rnd:sat">;
+
+
+// S2_cabacdecbin: Cabac decode bin.
+let Defs = [P0], isPredicateLate = 1, Itinerary = S_3op_tc_1_SLOT23 in
+def S2_cabacdecbin : T_S3op_64 < "decbin", 0b11, 0b110, 0>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 08bfd676fed8..8b667c645156 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -11,6 +11,28 @@
//
//===----------------------------------------------------------------------===//
+def DuplexIClass0: InstDuplex < 0 >;
+def DuplexIClass1: InstDuplex < 1 >;
+def DuplexIClass2: InstDuplex < 2 >;
+let isExtendable = 1 in {
+ def DuplexIClass3: InstDuplex < 3 >;
+ def DuplexIClass4: InstDuplex < 4 >;
+ def DuplexIClass5: InstDuplex < 5 >;
+ def DuplexIClass6: InstDuplex < 6 >;
+ def DuplexIClass7: InstDuplex < 7 >;
+}
+def DuplexIClass8: InstDuplex < 8 >;
+def DuplexIClass9: InstDuplex < 9 >;
+def DuplexIClassA: InstDuplex < 0xA >;
+def DuplexIClassB: InstDuplex < 0xB >;
+def DuplexIClassC: InstDuplex < 0xC >;
+def DuplexIClassD: InstDuplex < 0xD >;
+def DuplexIClassE: InstDuplex < 0xE >;
+def DuplexIClassF: InstDuplex < 0xF >;
+
+def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
+def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
+
let hasSideEffects = 0 in
class T_Immext<Operand ImmType>
: EXTENDERInst<(outs), (ins ImmType:$imm),
@@ -35,19 +57,9 @@ def BITPOS32 : SDNodeXForm<imm, [{
// Return the bit position we will set [0-31].
// As an SDNode.
int32_t imm = N->getSExtValue();
- return XformMskToBitPosU5Imm(imm);
+ return XformMskToBitPosU5Imm(imm, SDLoc(N));
}]>;
-// Fold (add (CONST32 tglobaladdr:$addr) <offset>) into a global address.
-def FoldGlobalAddr : ComplexPattern<i32, 1, "foldGlobalAddress", [], []>;
-
-// Fold (add (CONST32_GP tglobaladdr:$addr) <offset>) into a global address.
-def FoldGlobalAddrGP : ComplexPattern<i32, 1, "foldGlobalAddressGP", [], []>;
-
-def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr),
- (HexagonCONST32 node:$addr), [{
- return hasNumUsesBelowThresGA(N->getOperand(0).getNode());
-}]>;
// Hexagon V4 Architecture spec defines 8 instruction classes:
// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
@@ -119,21 +131,19 @@ class T_ALU32_3op_not<string mnemonic, bits<3> MajOp, bits<3> MinOp,
let AsmString = "$Rd = "#mnemonic#"($Rs, ~$Rt)";
}
-let BaseOpcode = "andn_rr", CextOpcode = "andn", isCodeGenOnly = 0 in
+let BaseOpcode = "andn_rr", CextOpcode = "andn" in
def A4_andn : T_ALU32_3op_not<"and", 0b001, 0b100, 1>;
-let BaseOpcode = "orn_rr", CextOpcode = "orn", isCodeGenOnly = 0 in
+let BaseOpcode = "orn_rr", CextOpcode = "orn" in
def A4_orn : T_ALU32_3op_not<"or", 0b001, 0b101, 1>;
-let CextOpcode = "rcmp.eq", isCodeGenOnly = 0 in
+let CextOpcode = "rcmp.eq" in
def A4_rcmpeq : T_ALU32_3op<"cmp.eq", 0b011, 0b010, 0, 1>;
-let CextOpcode = "!rcmp.eq", isCodeGenOnly = 0 in
+let CextOpcode = "!rcmp.eq" in
def A4_rcmpneq : T_ALU32_3op<"!cmp.eq", 0b011, 0b011, 0, 1>;
-let isCodeGenOnly = 0 in {
def C4_cmpneq : T_ALU32_3op_cmp<"!cmp.eq", 0b00, 1, 1>;
def C4_cmplte : T_ALU32_3op_cmp<"!cmp.gt", 0b10, 1, 0>;
def C4_cmplteu : T_ALU32_3op_cmp<"!cmp.gtu", 0b11, 1, 0>;
-}
// Pats for instruction selection.
@@ -146,11 +156,15 @@ class CmpInReg<PatFrag Op>
def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>;
def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>;
+def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>;
+def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>;
+
+def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>;
+
class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm>
: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
"$Pd = "#mnemonic#"($Rs, $Rt)", [], "", S_3op_tc_2early_SLOT23>,
ImmRegRel {
- let validSubTargets = HasV4SubT;
let InputType = "reg";
let CextOpcode = mnemonic;
let isCompare = 1;
@@ -169,13 +183,26 @@ class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm>
let Inst{1-0} = Pd;
}
-let isCodeGenOnly = 0 in {
def A4_cmpbeq : T_CMP_rrbh<"cmpb.eq", 0b110, 1>;
def A4_cmpbgt : T_CMP_rrbh<"cmpb.gt", 0b010, 0>;
def A4_cmpbgtu : T_CMP_rrbh<"cmpb.gtu", 0b111, 0>;
def A4_cmpheq : T_CMP_rrbh<"cmph.eq", 0b011, 1>;
def A4_cmphgt : T_CMP_rrbh<"cmph.gt", 0b100, 0>;
def A4_cmphgtu : T_CMP_rrbh<"cmph.gtu", 0b101, 0>;
+
+let AddedComplexity = 100 in {
+ def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+ 255), 0)),
+ (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>;
+ def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+ 255), 0)),
+ (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>;
+ def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+ 65535), 0)),
+ (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>;
+ def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+ 65535), 0)),
+ (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>;
}
class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm,
@@ -183,7 +210,6 @@ class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm,
: ALU64Inst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, ImmType:$Imm),
"$Pd = "#mnemonic#"($Rs, #$Imm)", [], "", ALU64_tc_2early_SLOT23>,
ImmRegRel {
- let validSubTargets = HasV4SubT;
let InputType = "imm";
let CextOpcode = mnemonic;
let isCompare = 1;
@@ -208,19 +234,17 @@ class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm,
let Inst{1-0} = Pd;
}
-let isCodeGenOnly = 0 in {
def A4_cmpbeqi : T_CMP_ribh<"cmpb.eq", 0b00, 0, 1, u8Imm, 0, 0, 8>;
def A4_cmpbgti : T_CMP_ribh<"cmpb.gt", 0b01, 0, 0, s8Imm, 0, 1, 8>;
def A4_cmpbgtui : T_CMP_ribh<"cmpb.gtu", 0b10, 0, 0, u7Ext, 1, 0, 7>;
def A4_cmpheqi : T_CMP_ribh<"cmph.eq", 0b00, 1, 1, s8Ext, 1, 1, 8>;
def A4_cmphgti : T_CMP_ribh<"cmph.gt", 0b01, 1, 0, s8Ext, 1, 1, 8>;
def A4_cmphgtui : T_CMP_ribh<"cmph.gtu", 0b10, 1, 0, u7Ext, 1, 0, 7>;
-}
+
class T_RCMP_EQ_ri<string mnemonic, bit IsNeg>
: ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s8Ext:$s8),
"$Rd = "#mnemonic#"($Rs, #$s8)", [], "", ALU32_2op_tc_1_SLOT0123>,
ImmRegRel {
- let validSubTargets = HasV4SubT;
let InputType = "imm";
let CextOpcode = !if (IsNeg, "!rcmp.eq", "rcmp.eq");
let isExtendable = 1;
@@ -243,22 +267,19 @@ class T_RCMP_EQ_ri<string mnemonic, bit IsNeg>
let Inst{4-0} = Rd;
}
-let isCodeGenOnly = 0 in {
def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>;
def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>;
-}
-def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s8ExtPred:$s8)))),
- (A4_rcmpeqi IntRegs:$Rs, s8ExtPred:$s8)>;
-def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s8ExtPred:$s8)))),
- (A4_rcmpneqi IntRegs:$Rs, s8ExtPred:$s8)>;
+def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32ImmPred:$s8)))),
+ (A4_rcmpeqi IntRegs:$Rs, s32ImmPred:$s8)>;
+def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32ImmPred:$s8)))),
+ (A4_rcmpneqi IntRegs:$Rs, s32ImmPred:$s8)>;
// Preserve the S2_tstbit_r generation
def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
(i32 IntRegs:$src1))), 0)))),
(C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>;
-
//===----------------------------------------------------------------------===//
// ALU32 -
//===----------------------------------------------------------------------===//
@@ -286,26 +307,23 @@ class T_Combine1 <bits<2> MajOp, dag ins, string AsmStr>
let Inst{4-0} = Rdd;
}
-let opExtendable = 2, isCodeGenOnly = 0 in
+let opExtendable = 2 in
def A4_combineri : T_Combine1<0b00, (ins IntRegs:$Rs, s8Ext:$s8),
"$Rdd = combine($Rs, #$s8)">;
-let opExtendable = 1, isCodeGenOnly = 0 in
+let opExtendable = 1 in
def A4_combineir : T_Combine1<0b01, (ins s8Ext:$s8, IntRegs:$Rs),
"$Rdd = combine(#$s8, $Rs)">;
-def HexagonWrapperCombineRI_V4 :
- SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>;
-def HexagonWrapperCombineIR_V4 :
- SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>;
+// The complexity of the combines involving immediates should be greater
+// than the complexity of the combine with two registers.
+let AddedComplexity = 50 in {
+def: Pat<(HexagonCOMBINE IntRegs:$r, s32ImmPred:$i),
+ (A4_combineri IntRegs:$r, s32ImmPred:$i)>;
-def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i),
- (A4_combineri IntRegs:$r, s8ExtPred:$i)>,
- Requires<[HasV4T]>;
-
-def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r),
- (A4_combineir s8ExtPred:$i, IntRegs:$r)>,
- Requires<[HasV4T]>;
+def: Pat<(HexagonCOMBINE s32ImmPred:$i, IntRegs:$r),
+ (A4_combineir s32ImmPred:$i, IntRegs:$r)>;
+}
// A4_combineii: Set two small immediates.
let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in
@@ -323,6 +341,12 @@ def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6),
let Inst{4-0} = Rdd;
}
+// The complexity of the combine with two immediates should be greater than
+// the complexity of a combine involving a register.
+let AddedComplexity = 75 in
+def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u32ImmPred:$u6),
+ (A4_combineii imm:$s8, imm:$u6)>;
+
//===----------------------------------------------------------------------===//
// ALU32/PERM -
//===----------------------------------------------------------------------===//
@@ -330,24 +354,182 @@ def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6),
//===----------------------------------------------------------------------===//
// LD +
//===----------------------------------------------------------------------===//
+
+def Zext64: OutPatFrag<(ops node:$Rs),
+ (i64 (A4_combineir 0, (i32 $Rs)))>;
+def Sext64: OutPatFrag<(ops node:$Rs),
+ (i64 (A2_sxtw (i32 $Rs)))>;
+
+// Patterns to generate indexed loads with different forms of the address:
+// - frameindex,
+// - base + offset,
+// - base (without offset).
+multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
+ PatLeaf ImmPred, InstHexagon MI> {
+ def: Pat<(VT (Load AddrFI:$fi)),
+ (VT (ValueMod (MI AddrFI:$fi, 0)))>;
+ def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))),
+ (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
+ def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))),
+ (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
+ def: Pat<(VT (Load (i32 IntRegs:$Rs))),
+ (VT (ValueMod (MI IntRegs:$Rs, 0)))>;
+}
+
+defm: Loadxm_pat<extloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<zextloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<sextloadi8, i64, Sext64, s32_0ImmPred, L2_loadrb_io>;
+defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>;
+
+// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs).
+def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>;
+
//===----------------------------------------------------------------------===//
// Template class for load instructions with Absolute set addressing mode.
//===----------------------------------------------------------------------===//
-let isExtended = 1, opExtendable = 2, hasSideEffects = 0,
-validSubTargets = HasV4SubT, addrMode = AbsoluteSet in
-class T_LD_abs_set<string mnemonic, RegisterClass RC>:
- LDInst2<(outs RC:$dst1, IntRegs:$dst2),
- (ins u0AlwaysExt:$addr),
- "$dst1 = "#mnemonic#"($dst2=##$addr)",
- []>,
- Requires<[HasV4T]>;
+let isExtended = 1, opExtendable = 2, opExtentBits = 6, addrMode = AbsoluteSet,
+ hasSideEffects = 0 in
+class T_LD_abs_set<string mnemonic, RegisterClass RC, bits<4>MajOp>:
+ LDInst<(outs RC:$dst1, IntRegs:$dst2),
+ (ins u6Ext:$addr),
+ "$dst1 = "#mnemonic#"($dst2 = #$addr)",
+ []> {
+ bits<7> name;
+ bits<5> dst1;
+ bits<5> dst2;
+ bits<6> addr;
+
+ let IClass = 0b1001;
+ let Inst{27-25} = 0b101;
+ let Inst{24-21} = MajOp;
+ let Inst{13-12} = 0b01;
+ let Inst{4-0} = dst1;
+ let Inst{20-16} = dst2;
+ let Inst{11-8} = addr{5-2};
+ let Inst{6-5} = addr{1-0};
+}
+
+let accessSize = ByteAccess, hasNewValue = 1 in {
+ def L4_loadrb_ap : T_LD_abs_set <"memb", IntRegs, 0b1000>;
+ def L4_loadrub_ap : T_LD_abs_set <"memub", IntRegs, 0b1001>;
+}
+
+let accessSize = HalfWordAccess, hasNewValue = 1 in {
+ def L4_loadrh_ap : T_LD_abs_set <"memh", IntRegs, 0b1010>;
+ def L4_loadruh_ap : T_LD_abs_set <"memuh", IntRegs, 0b1011>;
+ def L4_loadbsw2_ap : T_LD_abs_set <"membh", IntRegs, 0b0001>;
+ def L4_loadbzw2_ap : T_LD_abs_set <"memubh", IntRegs, 0b0011>;
+}
+
+let accessSize = WordAccess, hasNewValue = 1 in
+ def L4_loadri_ap : T_LD_abs_set <"memw", IntRegs, 0b1100>;
+
+let accessSize = WordAccess in {
+ def L4_loadbzw4_ap : T_LD_abs_set <"memubh", DoubleRegs, 0b0101>;
+ def L4_loadbsw4_ap : T_LD_abs_set <"membh", DoubleRegs, 0b0111>;
+}
-def LDrid_abs_set_V4 : T_LD_abs_set <"memd", DoubleRegs>;
-def LDrib_abs_set_V4 : T_LD_abs_set <"memb", IntRegs>;
-def LDriub_abs_set_V4 : T_LD_abs_set <"memub", IntRegs>;
-def LDrih_abs_set_V4 : T_LD_abs_set <"memh", IntRegs>;
-def LDriw_abs_set_V4 : T_LD_abs_set <"memw", IntRegs>;
-def LDriuh_abs_set_V4 : T_LD_abs_set <"memuh", IntRegs>;
+let accessSize = DoubleWordAccess in
+def L4_loadrd_ap : T_LD_abs_set <"memd", DoubleRegs, 0b1110>;
+
+let accessSize = ByteAccess in
+ def L4_loadalignb_ap : T_LD_abs_set <"memb_fifo", DoubleRegs, 0b0100>;
+
+let accessSize = HalfWordAccess in
+def L4_loadalignh_ap : T_LD_abs_set <"memh_fifo", DoubleRegs, 0b0010>;
+
+// Load - Indirect with long offset
+let InputType = "imm", addrMode = BaseLongOffset, isExtended = 1,
+opExtentBits = 6, opExtendable = 3 in
+class T_LoadAbsReg <string mnemonic, string CextOp, RegisterClass RC,
+ bits<4> MajOp>
+ : LDInst <(outs RC:$dst), (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3),
+ "$dst = "#mnemonic#"($src1<<#$src2 + #$src3)",
+ [] >, ImmRegShl {
+ bits<5> dst;
+ bits<5> src1;
+ bits<2> src2;
+ bits<6> src3;
+ let CextOpcode = CextOp;
+ let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
+
+ let IClass = 0b1001;
+ let Inst{27-25} = 0b110;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13} = src2{1};
+ let Inst{12} = 0b1;
+ let Inst{11-8} = src3{5-2};
+ let Inst{7} = src2{0};
+ let Inst{6-5} = src3{1-0};
+ let Inst{4-0} = dst;
+ }
+
+let accessSize = ByteAccess in {
+ def L4_loadrb_ur : T_LoadAbsReg<"memb", "LDrib", IntRegs, 0b1000>;
+ def L4_loadrub_ur : T_LoadAbsReg<"memub", "LDriub", IntRegs, 0b1001>;
+ def L4_loadalignb_ur : T_LoadAbsReg<"memb_fifo", "LDrib_fifo",
+ DoubleRegs, 0b0100>;
+}
+
+let accessSize = HalfWordAccess in {
+ def L4_loadrh_ur : T_LoadAbsReg<"memh", "LDrih", IntRegs, 0b1010>;
+ def L4_loadruh_ur : T_LoadAbsReg<"memuh", "LDriuh", IntRegs, 0b1011>;
+ def L4_loadbsw2_ur : T_LoadAbsReg<"membh", "LDribh2", IntRegs, 0b0001>;
+ def L4_loadbzw2_ur : T_LoadAbsReg<"memubh", "LDriubh2", IntRegs, 0b0011>;
+ def L4_loadalignh_ur : T_LoadAbsReg<"memh_fifo", "LDrih_fifo",
+ DoubleRegs, 0b0010>;
+}
+
+let accessSize = WordAccess in {
+ def L4_loadri_ur : T_LoadAbsReg<"memw", "LDriw", IntRegs, 0b1100>;
+ def L4_loadbsw4_ur : T_LoadAbsReg<"membh", "LDribh4", DoubleRegs, 0b0111>;
+ def L4_loadbzw4_ur : T_LoadAbsReg<"memubh", "LDriubh4", DoubleRegs, 0b0101>;
+}
+
+let accessSize = DoubleWordAccess in
+def L4_loadrd_ur : T_LoadAbsReg<"memd", "LDrid", DoubleRegs, 0b1110>;
+
+
+multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> {
+ def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$src3)))),
+ (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3)>;
+ def : Pat <(VT (ldOp (add IntRegs:$src1,
+ (HexagonCONST32 tglobaladdr:$src2)))),
+ (MI IntRegs:$src1, 0, tglobaladdr:$src2)>;
+
+ def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tconstpool:$src3)))),
+ (MI IntRegs:$src1, u2ImmPred:$src2, tconstpool:$src3)>;
+ def : Pat <(VT (ldOp (add IntRegs:$src1,
+ (HexagonCONST32 tconstpool:$src2)))),
+ (MI IntRegs:$src1, 0, tconstpool:$src2)>;
+
+ def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tjumptable:$src3)))),
+ (MI IntRegs:$src1, u2ImmPred:$src2, tjumptable:$src3)>;
+ def : Pat <(VT (ldOp (add IntRegs:$src1,
+ (HexagonCONST32 tjumptable:$src2)))),
+ (MI IntRegs:$src1, 0, tjumptable:$src2)>;
+}
+
+let AddedComplexity = 60 in {
+defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>;
+defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>;
+defm : T_LoadAbsReg_Pat <extloadi8, L4_loadrub_ur>;
+
+defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>;
+defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>;
+defm : T_LoadAbsReg_Pat <extloadi16, L4_loadruh_ur>;
+
+defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>;
+defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>;
+}
//===----------------------------------------------------------------------===//
// Template classes for the non-predicated load instructions with
@@ -430,217 +612,234 @@ multiclass ld_idxd_shl <string mnemonic, string CextOp, RegisterClass RC,
}
}
-let hasNewValue = 1, accessSize = ByteAccess, isCodeGenOnly = 0 in {
+let hasNewValue = 1, accessSize = ByteAccess in {
defm loadrb : ld_idxd_shl<"memb", "LDrib", IntRegs, 0b000>;
defm loadrub : ld_idxd_shl<"memub", "LDriub", IntRegs, 0b001>;
}
-let hasNewValue = 1, accessSize = HalfWordAccess, isCodeGenOnly = 0 in {
+let hasNewValue = 1, accessSize = HalfWordAccess in {
defm loadrh : ld_idxd_shl<"memh", "LDrih", IntRegs, 0b010>;
defm loadruh : ld_idxd_shl<"memuh", "LDriuh", IntRegs, 0b011>;
}
-let hasNewValue = 1, accessSize = WordAccess, isCodeGenOnly = 0 in
+let hasNewValue = 1, accessSize = WordAccess in
defm loadri : ld_idxd_shl<"memw", "LDriw", IntRegs, 0b100>;
-let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+let accessSize = DoubleWordAccess in
defm loadrd : ld_idxd_shl<"memd", "LDrid", DoubleRegs, 0b110>;
// 'def pats' for load instructions with base + register offset and non-zero
// immediate value. Immediate value is used to left-shift the second
// register operand.
+class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI>
+ : Pat<(VT (Load (add (i32 IntRegs:$Rs),
+ (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2))))),
+ (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
+
let AddedComplexity = 40 in {
-def : Pat <(i32 (sextloadi8 (add IntRegs:$src1,
- (shl IntRegs:$src2, u2ImmPred:$offset)))),
- (L4_loadrb_rr IntRegs:$src1,
- IntRegs:$src2, u2ImmPred:$offset)>,
- Requires<[HasV4T]>;
-
-def : Pat <(i32 (zextloadi8 (add IntRegs:$src1,
- (shl IntRegs:$src2, u2ImmPred:$offset)))),
- (L4_loadrub_rr IntRegs:$src1,
- IntRegs:$src2, u2ImmPred:$offset)>,
- Requires<[HasV4T]>;
-
-def : Pat <(i32 (extloadi8 (add IntRegs:$src1,
- (shl IntRegs:$src2, u2ImmPred:$offset)))),
- (L4_loadrub_rr IntRegs:$src1,
- IntRegs:$src2, u2ImmPred:$offset)>,
- Requires<[HasV4T]>;
-
-def : Pat <(i32 (sextloadi16 (add IntRegs:$src1,
- (shl IntRegs:$src2, u2ImmPred:$offset)))),
- (L4_loadrh_rr IntRegs:$src1,
- IntRegs:$src2, u2ImmPred:$offset)>,
- Requires<[HasV4T]>;
-
-def : Pat <(i32 (zextloadi16 (add IntRegs:$src1,
- (shl IntRegs:$src2, u2ImmPred:$offset)))),
- (L4_loadruh_rr IntRegs:$src1,
- IntRegs:$src2, u2ImmPred:$offset)>,
- Requires<[HasV4T]>;
-
-def : Pat <(i32 (extloadi16 (add IntRegs:$src1,
- (shl IntRegs:$src2, u2ImmPred:$offset)))),
- (L4_loadruh_rr IntRegs:$src1,
- IntRegs:$src2, u2ImmPred:$offset)>,
- Requires<[HasV4T]>;
-
-def : Pat <(i32 (load (add IntRegs:$src1,
- (shl IntRegs:$src2, u2ImmPred:$offset)))),
- (L4_loadri_rr IntRegs:$src1,
- IntRegs:$src2, u2ImmPred:$offset)>,
- Requires<[HasV4T]>;
-
-def : Pat <(i64 (load (add IntRegs:$src1,
- (shl IntRegs:$src2, u2ImmPred:$offset)))),
- (L4_loadrd_rr IntRegs:$src1,
- IntRegs:$src2, u2ImmPred:$offset)>,
- Requires<[HasV4T]>;
+ def: Loadxs_pat<extloadi8, i32, L4_loadrub_rr>;
+ def: Loadxs_pat<zextloadi8, i32, L4_loadrub_rr>;
+ def: Loadxs_pat<sextloadi8, i32, L4_loadrb_rr>;
+ def: Loadxs_pat<extloadi16, i32, L4_loadruh_rr>;
+ def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>;
+ def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>;
+ def: Loadxs_pat<load, i32, L4_loadri_rr>;
+ def: Loadxs_pat<load, i64, L4_loadrd_rr>;
}
-
// 'def pats' for load instruction base + register offset and
// zero immediate value.
-let AddedComplexity = 10 in {
-def : Pat <(i64 (load (add IntRegs:$src1, IntRegs:$src2))),
- (L4_loadrd_rr IntRegs:$src1, IntRegs:$src2, 0)>,
- Requires<[HasV4T]>;
+class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI>
+ : Pat<(VT (Load (add (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)))),
+ (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
+
+let AddedComplexity = 20 in {
+ def: Loadxs_simple_pat<extloadi8, i32, L4_loadrub_rr>;
+ def: Loadxs_simple_pat<zextloadi8, i32, L4_loadrub_rr>;
+ def: Loadxs_simple_pat<sextloadi8, i32, L4_loadrb_rr>;
+ def: Loadxs_simple_pat<extloadi16, i32, L4_loadruh_rr>;
+ def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>;
+ def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>;
+ def: Loadxs_simple_pat<load, i32, L4_loadri_rr>;
+ def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>;
+}
-def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, IntRegs:$src2))),
- (L4_loadrb_rr IntRegs:$src1, IntRegs:$src2, 0)>,
- Requires<[HasV4T]>;
+// zext i1->i64
+def: Pat<(i64 (zext (i1 PredRegs:$src1))),
+ (Zext64 (C2_muxii PredRegs:$src1, 1, 0))>;
+
+// zext i32->i64
+def: Pat<(i64 (zext (i32 IntRegs:$src1))),
+ (Zext64 IntRegs:$src1)>;
-def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, IntRegs:$src2))),
- (L4_loadrub_rr IntRegs:$src1, IntRegs:$src2, 0)>,
- Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
-def : Pat <(i32 (extloadi8 (add IntRegs:$src1, IntRegs:$src2))),
- (L4_loadrub_rr IntRegs:$src1, IntRegs:$src2, 0)>,
- Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+//===----------------------------------------------------------------------===//
+// Template class for store instructions with Absolute set addressing mode.
+//===----------------------------------------------------------------------===//
+let isExtended = 1, opExtendable = 1, opExtentBits = 6,
+ addrMode = AbsoluteSet, isNVStorable = 1 in
+class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC,
+ bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
+ : STInst<(outs IntRegs:$dst),
+ (ins u6Ext:$addr, RC:$src),
+ mnemonic#"($dst = #$addr) = $src"#!if(isHalf, ".h","")>, NewValueRel {
+ bits<5> dst;
+ bits<6> addr;
+ bits<5> src;
+ let accessSize = AccessSz;
+ let BaseOpcode = BaseOp#"_AbsSet";
-def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, IntRegs:$src2))),
- (L4_loadrh_rr IntRegs:$src1, IntRegs:$src2, 0)>,
- Requires<[HasV4T]>;
+ let IClass = 0b1010;
-def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, IntRegs:$src2))),
- (L4_loadruh_rr IntRegs:$src1, IntRegs:$src2, 0)>,
- Requires<[HasV4T]>;
+ let Inst{27-24} = 0b1011;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = dst;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = src;
+ let Inst{7} = 0b1;
+ let Inst{5-0} = addr;
+ }
-def : Pat <(i32 (extloadi16 (add IntRegs:$src1, IntRegs:$src2))),
- (L4_loadruh_rr IntRegs:$src1, IntRegs:$src2, 0)>,
- Requires<[HasV4T]>;
+def S4_storerb_ap : T_ST_absset <"memb", "STrib", IntRegs, 0b000, ByteAccess>;
+def S4_storerh_ap : T_ST_absset <"memh", "STrih", IntRegs, 0b010,
+ HalfWordAccess>;
+def S4_storeri_ap : T_ST_absset <"memw", "STriw", IntRegs, 0b100, WordAccess>;
-def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))),
- (L4_loadri_rr IntRegs:$src1, IntRegs:$src2, 0)>,
- Requires<[HasV4T]>;
+let isNVStorable = 0 in {
+ def S4_storerf_ap : T_ST_absset <"memh", "STrif", IntRegs,
+ 0b011, HalfWordAccess, 1>;
+ def S4_storerd_ap : T_ST_absset <"memd", "STrid", DoubleRegs,
+ 0b110, DoubleWordAccess>;
}
-// zext i1->i64
-def : Pat <(i64 (zext (i1 PredRegs:$src1))),
- (i64 (A4_combineir 0, (C2_muxii (i1 PredRegs:$src1), 1, 0)))>,
- Requires<[HasV4T]>;
+let opExtendable = 1, isNewValue = 1, isNVStore = 1, opNewValue = 2,
+isExtended = 1, opExtentBits= 6 in
+class T_ST_absset_nv <string mnemonic, string BaseOp, bits<2> MajOp,
+ MemAccessSize AccessSz >
+ : NVInst <(outs IntRegs:$dst),
+ (ins u6Ext:$addr, IntRegs:$src),
+ mnemonic#"($dst = #$addr) = $src.new">, NewValueRel {
+ bits<5> dst;
+ bits<6> addr;
+ bits<3> src;
+ let accessSize = AccessSz;
+ let BaseOpcode = BaseOp#"_AbsSet";
-// zext i32->i64
-def : Pat <(i64 (zext (i32 IntRegs:$src1))),
- (i64 (A4_combineir 0, (i32 IntRegs:$src1)))>,
- Requires<[HasV4T]>;
-// zext i8->i64
-def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
- (i64 (A4_combineir 0, (L2_loadrub_io AddrFI:$src1, 0)))>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 20 in
-def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
- s11_0ExtPred:$offset))),
- (i64 (A4_combineir 0, (L2_loadrub_io IntRegs:$src1,
- s11_0ExtPred:$offset)))>,
- Requires<[HasV4T]>;
+ let IClass = 0b1010;
-// zext i1->i64
-def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
- (i64 (A4_combineir 0, (L2_loadrub_io AddrFI:$src1, 0)))>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 20 in
-def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
- s11_0ExtPred:$offset))),
- (i64 (A4_combineir 0, (L2_loadrub_io IntRegs:$src1,
- s11_0ExtPred:$offset)))>,
- Requires<[HasV4T]>;
-
-// zext i16->i64
-def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
- (i64 (A4_combineir 0, (L2_loadruh_io AddrFI:$src1, 0)))>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 20 in
-def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
- s11_1ExtPred:$offset))),
- (i64 (A4_combineir 0, (L2_loadruh_io IntRegs:$src1,
- s11_1ExtPred:$offset)))>,
- Requires<[HasV4T]>;
-
-// anyext i16->i64
-def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
- (i64 (A4_combineir 0, (L2_loadrh_io AddrFI:$src1, 0)))>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 20 in
-def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
- s11_1ExtPred:$offset))),
- (i64 (A4_combineir 0, (L2_loadrh_io IntRegs:$src1,
- s11_1ExtPred:$offset)))>,
- Requires<[HasV4T]>;
+ let Inst{27-21} = 0b1011101;
+ let Inst{20-16} = dst;
+ let Inst{13-11} = 0b000;
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src;
+ let Inst{7} = 0b1;
+ let Inst{5-0} = addr;
+ }
-// zext i32->i64
-def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
- (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>,
- Requires<[HasV4T]>;
+let mayStore = 1, addrMode = AbsoluteSet in {
+ def S4_storerbnew_ap : T_ST_absset_nv <"memb", "STrib", 0b00, ByteAccess>;
+ def S4_storerhnew_ap : T_ST_absset_nv <"memh", "STrih", 0b01, HalfWordAccess>;
+ def S4_storerinew_ap : T_ST_absset_nv <"memw", "STriw", 0b10, WordAccess>;
+}
-let AddedComplexity = 100 in
-def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
- (i64 (A4_combineir 0, (L2_loadri_io IntRegs:$src1,
- s11_2ExtPred:$offset)))>,
- Requires<[HasV4T]>;
+let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm",
+addrMode = BaseLongOffset, AddedComplexity = 40 in
+class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC,
+ bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
+ : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, RC:$src4),
+ mnemonic#"($src1<<#$src2 + #$src3) = $src4"#!if(isHalf, ".h",""),
+ []>, ImmRegShl, NewValueRel {
+
+ bits<5> src1;
+ bits<2> src2;
+ bits<6> src3;
+ bits<5> src4;
-// anyext i32->i64
-def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
- (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>,
- Requires<[HasV4T]>;
+ let accessSize = AccessSz;
+ let CextOpcode = CextOp;
+ let BaseOpcode = CextOp#"_shl";
+ let IClass = 0b1010;
-let AddedComplexity = 100 in
-def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
- (i64 (A4_combineir 0, (L2_loadri_io IntRegs:$src1,
- s11_2ExtPred:$offset)))>,
- Requires<[HasV4T]>;
+ let Inst{27-24} =0b1101;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13} = src2{1};
+ let Inst{12-8} = src4;
+ let Inst{7} = 0b1;
+ let Inst{6} = src2{0};
+ let Inst{5-0} = src3;
+}
+def S4_storerb_ur : T_StoreAbsReg <"memb", "STrib", IntRegs, 0b000, ByteAccess>;
+def S4_storerh_ur : T_StoreAbsReg <"memh", "STrih", IntRegs, 0b010,
+ HalfWordAccess>;
+def S4_storerf_ur : T_StoreAbsReg <"memh", "STrif", IntRegs, 0b011,
+ HalfWordAccess, 1>;
+def S4_storeri_ur : T_StoreAbsReg <"memw", "STriw", IntRegs, 0b100, WordAccess>;
+def S4_storerd_ur : T_StoreAbsReg <"memd", "STrid", DoubleRegs, 0b110,
+ DoubleWordAccess>;
+let AddedComplexity = 40 in
+multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
+ PatFrag stOp> {
+ def : Pat<(stOp (VT RC:$src4),
+ (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+ u32ImmPred:$src3)),
+ (MI IntRegs:$src1, u2ImmPred:$src2, u32ImmPred:$src3, RC:$src4)>;
-//===----------------------------------------------------------------------===//
-// LD -
-//===----------------------------------------------------------------------===//
+ def : Pat<(stOp (VT RC:$src4),
+ (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$src3))),
+ (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
-//===----------------------------------------------------------------------===//
-// ST +
-//===----------------------------------------------------------------------===//
-///
-//===----------------------------------------------------------------------===//
-// Template class for store instructions with Absolute set addressing mode.
-//===----------------------------------------------------------------------===//
-let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT,
-addrMode = AbsoluteSet in
-class T_ST_abs_set<string mnemonic, RegisterClass RC>:
- STInst2<(outs IntRegs:$dst1),
- (ins RC:$src1, u0AlwaysExt:$src2),
- mnemonic#"($dst1=##$src2) = $src1",
- []>,
- Requires<[HasV4T]>;
+ def : Pat<(stOp (VT RC:$src4),
+ (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))),
+ (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
+}
-def STrid_abs_set_V4 : T_ST_abs_set <"memd", DoubleRegs>;
-def STrib_abs_set_V4 : T_ST_abs_set <"memb", IntRegs>;
-def STrih_abs_set_V4 : T_ST_abs_set <"memh", IntRegs>;
-def STriw_abs_set_V4 : T_ST_abs_set <"memw", IntRegs>;
+defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>;
+defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>;
+defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>;
+defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>;
+
+let mayStore = 1, isNVStore = 1, isExtended = 1, addrMode = BaseLongOffset,
+ opExtentBits = 6, isNewValue = 1, opNewValue = 3, opExtendable = 2 in
+class T_StoreAbsRegNV <string mnemonic, string CextOp, bits<2> MajOp,
+ MemAccessSize AccessSz>
+ : NVInst <(outs ),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, IntRegs:$src4),
+ mnemonic#"($src1<<#$src2 + #$src3) = $src4.new">, NewValueRel {
+ bits<5> src1;
+ bits<2> src2;
+ bits<6> src3;
+ bits<3> src4;
+
+ let CextOpcode = CextOp;
+ let BaseOpcode = CextOp#"_shl";
+ let IClass = 0b1010;
+
+ let Inst{27-21} = 0b1101101;
+ let Inst{12-11} = 0b00;
+ let Inst{7} = 0b1;
+ let Inst{20-16} = src1;
+ let Inst{13} = src2{1};
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src4;
+ let Inst{6} = src2{0};
+ let Inst{5-0} = src3;
+ }
+
+def S4_storerbnew_ur : T_StoreAbsRegNV <"memb", "STrib", 0b00, ByteAccess>;
+def S4_storerhnew_ur : T_StoreAbsRegNV <"memh", "STrih", 0b01, HalfWordAccess>;
+def S4_storerinew_ur : T_StoreAbsRegNV <"memw", "STriw", 0b10, WordAccess>;
//===----------------------------------------------------------------------===//
// Template classes for the non-predicated store instructions with
@@ -804,8 +1003,7 @@ multiclass ST_Idxd_shl_nv <string mnemonic, string CextOp, RegisterClass RC,
}
}
-let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0,
- isCodeGenOnly = 0 in {
+let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0 in {
let accessSize = ByteAccess in
defm storerb: ST_Idxd_shl<"memb", "STrib", IntRegs, 0b000>,
ST_Idxd_shl_nv<"memb", "STrib", IntRegs, 0b00>;
@@ -825,83 +1023,18 @@ let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0,
defm storerf: ST_Idxd_shl<"memh", "STrif", IntRegs, 0b011, 1>;
}
-let Predicates = [HasV4T], AddedComplexity = 10 in {
-def : Pat<(truncstorei8 (i32 IntRegs:$src4),
- (add IntRegs:$src1, (shl IntRegs:$src2,
- u2ImmPred:$src3))),
- (S4_storerb_rr IntRegs:$src1, IntRegs:$src2,
- u2ImmPred:$src3, IntRegs:$src4)>;
-
-def : Pat<(truncstorei16 (i32 IntRegs:$src4),
- (add IntRegs:$src1, (shl IntRegs:$src2,
- u2ImmPred:$src3))),
- (S4_storerh_rr IntRegs:$src1, IntRegs:$src2,
- u2ImmPred:$src3, IntRegs:$src4)>;
-
-def : Pat<(store (i32 IntRegs:$src4),
- (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))),
- (S4_storeri_rr IntRegs:$src1, IntRegs:$src2,
- u2ImmPred:$src3, IntRegs:$src4)>;
-
-def : Pat<(store (i64 DoubleRegs:$src4),
- (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))),
- (S4_storerd_rr IntRegs:$src1, IntRegs:$src2,
- u2ImmPred:$src3, DoubleRegs:$src4)>;
-}
-
-let isExtended = 1, opExtendable = 2 in
-class T_ST_LongOff <string mnemonic, PatFrag stOp, RegisterClass RC, ValueType VT> :
- STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, RC:$src4),
- mnemonic#"($src1<<#$src2+##$src3) = $src4",
- [(stOp (VT RC:$src4),
- (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u0AlwaysExtPred:$src3))]>,
- Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 2, mayStore = 1, isNVStore = 1 in
-class T_ST_LongOff_nv <string mnemonic> :
- NVInst_V4<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
- mnemonic#"($src1<<#$src2+##$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
-multiclass ST_LongOff <string mnemonic, string BaseOp, PatFrag stOp> {
- let BaseOpcode = BaseOp#"_shl" in {
- let isNVStorable = 1 in
- def NAME#_V4 : T_ST_LongOff<mnemonic, stOp, IntRegs, i32>;
-
- def NAME#_nv_V4 : T_ST_LongOff_nv<mnemonic>;
- }
-}
-
-let AddedComplexity = 10, validSubTargets = HasV4SubT in {
- def STrid_shl_V4 : T_ST_LongOff<"memd", store, DoubleRegs, i64>;
- defm STrib_shl : ST_LongOff <"memb", "STrib", truncstorei8>, NewValueRel;
- defm STrih_shl : ST_LongOff <"memh", "Strih", truncstorei16>, NewValueRel;
- defm STriw_shl : ST_LongOff <"memw", "STriw", store>, NewValueRel;
-}
-
-let AddedComplexity = 40 in
-multiclass T_ST_LOff_Pats <InstHexagon I, RegisterClass RC, ValueType VT,
- PatFrag stOp> {
- def : Pat<(stOp (VT RC:$src4),
- (add (shl IntRegs:$src1, u2ImmPred:$src2),
- (NumUsesBelowThresCONST32 tglobaladdr:$src3))),
- (I IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
+class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+ : Pat<(Store Value:$Ru, (add (i32 IntRegs:$Rs),
+ (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2)))),
+ (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>;
- def : Pat<(stOp (VT RC:$src4),
- (add IntRegs:$src1,
- (NumUsesBelowThresCONST32 tglobaladdr:$src3))),
- (I IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
+let AddedComplexity = 40 in {
+ def: Storexs_pat<truncstorei8, I32, S4_storerb_rr>;
+ def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>;
+ def: Storexs_pat<store, I32, S4_storeri_rr>;
+ def: Storexs_pat<store, I64, S4_storerd_rr>;
}
-defm : T_ST_LOff_Pats<STrid_shl_V4, DoubleRegs, i64, store>;
-defm : T_ST_LOff_Pats<STriw_shl_V4, IntRegs, i32, store>;
-defm : T_ST_LOff_Pats<STrib_shl_V4, IntRegs, i32, truncstorei8>;
-defm : T_ST_LOff_Pats<STrih_shl_V4, IntRegs, i32, truncstorei16>;
-
// memd(Rx++#s4:3)=Rtt
// memd(Rx++#s4:3:circ(Mu))=Rtt
// memd(Rx++I:circ(Mu))=Rtt
@@ -1004,8 +1137,8 @@ multiclass ST_Imm <string mnemonic, string CextOp, Operand OffsetOp,
}
}
-let hasSideEffects = 0, validSubTargets = HasV4SubT, addrMode = BaseImmOffset,
- InputType = "imm", isCodeGenOnly = 0 in {
+let hasSideEffects = 0, addrMode = BaseImmOffset,
+ InputType = "imm" in {
let accessSize = ByteAccess in
defm S4_storeirb : ST_Imm<"memb", "STrib", u6_0Imm, 0b00>;
@@ -1016,22 +1149,49 @@ let hasSideEffects = 0, validSubTargets = HasV4SubT, addrMode = BaseImmOffset,
defm S4_storeiri : ST_Imm<"memw", "STriw", u6_2Imm, 0b10>;
}
-let Predicates = [HasV4T], AddedComplexity = 10 in {
-def: Pat<(truncstorei8 s8ExtPred:$src3, (add IntRegs:$src1, u6_0ImmPred:$src2)),
- (S4_storeirb_io IntRegs:$src1, u6_0ImmPred:$src2, s8ExtPred:$src3)>;
+def IMM_BYTE : SDNodeXForm<imm, [{
+ // -1 etc is represented as 255 etc
+ // assigning to a byte restores our desired signed value.
+ int8_t imm = N->getSExtValue();
+ return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def IMM_HALF : SDNodeXForm<imm, [{
+ // -1 etc is represented as 65535 etc
+ // assigning to a short restores our desired signed value.
+ int16_t imm = N->getSExtValue();
+ return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def IMM_WORD : SDNodeXForm<imm, [{
+ // -1 etc can be represented as 4294967295 etc
+ // Currently, it's not doing this. But some optimization
+ // might convert -1 to a large +ve number.
+ // assigning to a word restores our desired signed value.
+ int32_t imm = N->getSExtValue();
+ return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
-def: Pat<(truncstorei16 s8ExtPred:$src3, (add IntRegs:$src1,
- u6_1ImmPred:$src2)),
- (S4_storeirh_io IntRegs:$src1, u6_1ImmPred:$src2, s8ExtPred:$src3)>;
+def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>;
+def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>;
+def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>;
-def: Pat<(store s8ExtPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2)),
- (S4_storeiri_io IntRegs:$src1, u6_2ImmPred:$src2, s8ExtPred:$src3)>;
+let AddedComplexity = 40 in {
+ // Not using frameindex patterns for these stores, because the offset
+ // is not extendable. This could cause problems during removing the frame
+ // indices, since the offset with respect to R29/R30 may not fit in the
+ // u6 field.
+ def: Storexm_add_pat<truncstorei8, s32ImmPred, u6_0ImmPred, ToImmByte,
+ S4_storeirb_io>;
+ def: Storexm_add_pat<truncstorei16, s32ImmPred, u6_1ImmPred, ToImmHalf,
+ S4_storeirh_io>;
+ def: Storexm_add_pat<store, s32ImmPred, u6_2ImmPred, ToImmWord,
+ S4_storeiri_io>;
}
-let AddedComplexity = 6 in
-def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
- (S4_storeirb_io IntRegs:$src1, 0, s8ExtPred:$src2)>,
- Requires<[HasV4T]>;
+def: Storexm_simple_pat<truncstorei8, s32ImmPred, ToImmByte, S4_storeirb_io>;
+def: Storexm_simple_pat<truncstorei16, s32ImmPred, ToImmHalf, S4_storeirh_io>;
+def: Storexm_simple_pat<store, s32ImmPred, ToImmWord, S4_storeiri_io>;
// memb(Rx++#s4:0:circ(Mu))=Rt
// memb(Rx++I:circ(Mu))=Rt
@@ -1039,16 +1199,10 @@ def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
// memb(Rx++Mu:brev)=Rt
// memb(gp+#u16:0)=Rt
-
// Store halfword.
// TODO: needs to be implemented
// memh(Re=#U6)=Rt.H
// memh(Rs+#s11:1)=Rt.H
-let AddedComplexity = 6 in
-def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
- (S4_storeirh_io IntRegs:$src1, 0, s8ExtPred:$src2)>,
- Requires<[HasV4T]>;
-
// memh(Rs+Ru<<#u2)=Rt.H
// TODO: needs to be implemented.
@@ -1065,7 +1219,6 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
// if ([!]Pv[.new]) memh(#u6)=Rt.H
// if ([!]Pv[.new]) memh(#u6)=Rt
-
// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
// TODO: needs to be implemented.
@@ -1075,20 +1228,6 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
// Store word.
// memw(Re=#U6)=Rt
// TODO: Needs to be implemented.
-
-// Store predicate:
-let hasSideEffects = 0 in
-def STriw_pred_V4 : STInst2<(outs),
- (ins MEMri:$addr, PredRegs:$src1),
- "Error; should not emit",
- []>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 6 in
-def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)),
- (S4_storeiri_io IntRegs:$src1, 0, s8ExtPred:$src2)>,
- Requires<[HasV4T]>;
-
// memw(Rx++#s4:2)=Rt
// memw(Rx++#s4:2:circ(Mu))=Rt
// memw(Rx++I:circ(Mu))=Rt
@@ -1203,7 +1342,7 @@ multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC,
}
}
-let addrMode = BaseImmOffset, InputType = "imm", isCodeGenOnly = 0 in {
+let addrMode = BaseImmOffset, InputType = "imm" in {
let accessSize = ByteAccess in
defm storerb: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext,
u6_0Ext, 0b00>, AddrModeRel;
@@ -1218,11 +1357,45 @@ let addrMode = BaseImmOffset, InputType = "imm", isCodeGenOnly = 0 in {
}
//===----------------------------------------------------------------------===//
+// Post increment loads with register offset.
+//===----------------------------------------------------------------------===//
+
+let hasNewValue = 1 in
+def L2_loadbsw2_pr : T_load_pr <"membh", IntRegs, 0b0001, HalfWordAccess>;
+
+def L2_loadbsw4_pr : T_load_pr <"membh", DoubleRegs, 0b0111, WordAccess>;
+
+let hasSideEffects = 0, addrMode = PostInc in
+class T_loadalign_pr <string mnemonic, bits<4> MajOp, MemAccessSize AccessSz>
+ : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$_dst_),
+ (ins DoubleRegs:$src1, IntRegs:$src2, ModRegs:$src3),
+ "$dst = "#mnemonic#"($src2++$src3)", [],
+ "$src1 = $dst, $src2 = $_dst_"> {
+ bits<5> dst;
+ bits<5> src2;
+ bits<1> src3;
+
+ let accessSize = AccessSz;
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b110;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13} = src3;
+ let Inst{12} = 0b0;
+ let Inst{7} = 0b0;
+ let Inst{4-0} = dst;
+ }
+
+def L2_loadalignb_pr : T_loadalign_pr <"memb_fifo", 0b0100, ByteAccess>;
+def L2_loadalignh_pr : T_loadalign_pr <"memh_fifo", 0b0010, HalfWordAccess>;
+
+//===----------------------------------------------------------------------===//
// Template class for non-predicated post increment .new stores
// mem[bhwd](Rx++#s4:[0123])=Nt.new
//===----------------------------------------------------------------------===//
-let isPredicable = 1, hasSideEffects = 0, validSubTargets = HasV4SubT,
- addrMode = PostInc, isNVStore = 1, isNewValue = 1, opNewValue = 3 in
+let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1,
+ isNewValue = 1, opNewValue = 3 in
class T_StorePI_nv <string mnemonic, Operand ImmOp, bits<2> MajOp >
: NVInstPI_V4 <(outs IntRegs:$_dst_),
(ins IntRegs:$src1, ImmOp:$offset, IntRegs:$src2),
@@ -1254,8 +1427,8 @@ class T_StorePI_nv <string mnemonic, Operand ImmOp, bits<2> MajOp >
// Template class for predicated post increment .new stores
// if([!]Pv[.new]) mem[bhwd](Rx++#s4:[0123])=Nt.new
//===----------------------------------------------------------------------===//
-let isPredicated = 1, hasSideEffects = 0, validSubTargets = HasV4SubT,
- addrMode = PostInc, isNVStore = 1, isNewValue = 1, opNewValue = 4 in
+let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1,
+ isNewValue = 1, opNewValue = 4 in
class T_StorePI_nv_pred <string mnemonic, Operand ImmOp,
bits<2> MajOp, bit isPredNot, bit isPredNew >
: NVInstPI_V4 <(outs IntRegs:$_dst_),
@@ -1310,13 +1483,13 @@ multiclass ST_PostInc_nv<string mnemonic, string BaseOp, Operand ImmOp,
}
}
-let accessSize = ByteAccess, isCodeGenOnly = 0 in
+let accessSize = ByteAccess in
defm storerbnew: ST_PostInc_nv <"memb", "STrib", s4_0Imm, 0b00>;
-let accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+let accessSize = HalfWordAccess in
defm storerhnew: ST_PostInc_nv <"memh", "STrih", s4_1Imm, 0b01>;
-let accessSize = WordAccess, isCodeGenOnly = 0 in
+let accessSize = WordAccess in
defm storerinew: ST_PostInc_nv <"memw", "STriw", s4_2Imm, 0b10>;
//===----------------------------------------------------------------------===//
@@ -1343,15 +1516,12 @@ class T_StorePI_RegNV <string mnemonic, bits<2> MajOp, MemAccessSize AccessSz>
let Inst{7} = 0b0;
}
-let isCodeGenOnly = 0 in {
def S2_storerbnew_pr : T_StorePI_RegNV<"memb", 0b00, ByteAccess>;
def S2_storerhnew_pr : T_StorePI_RegNV<"memh", 0b01, HalfWordAccess>;
def S2_storerinew_pr : T_StorePI_RegNV<"memw", 0b10, WordAccess>;
-}
// memb(Rx++#s4:0:circ(Mu))=Nt.new
// memb(Rx++I:circ(Mu))=Nt.new
-// memb(Rx++Mu)=Nt.new
// memb(Rx++Mu:brev)=Nt.new
// memh(Rx++#s4:1:circ(Mu))=Nt.new
// memh(Rx++I:circ(Mu))=Nt.new
@@ -1401,7 +1571,7 @@ class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
let RegOp = !if(!eq(NvOpNum, 0), src2, src1);
let IClass = 0b0010;
- let Inst{26} = 0b0;
+ let Inst{27-26} = 0b00;
let Inst{25-23} = majOp;
let Inst{22} = isNegCond;
let Inst{18-16} = Ns;
@@ -1415,9 +1585,9 @@ class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum,
bit isNegCond> {
// Branch not taken:
- def _nt_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>;
+ def _nt: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>;
// Branch taken:
- def _t_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>;
+ def _t : NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>;
}
// NvOpNum = 0 -> First Operand is a new-value Register
@@ -1426,8 +1596,8 @@ multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum,
multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp,
bit NvOpNum> {
let BaseOpcode = BaseOp#_NVJ in {
- defm _t_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond
- defm _f_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond
+ defm _t_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond
+ defm _f_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond
}
}
@@ -1438,13 +1608,12 @@ multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp,
// if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2
let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
- Defs = [PC], hasSideEffects = 0, validSubTargets = HasV4SubT,
- isCodeGenOnly = 0 in {
- defm CMPEQrr : NVJrr_base<"cmp.eq", "CMPEQ", 0b000, 0>, PredRel;
- defm CMPGTrr : NVJrr_base<"cmp.gt", "CMPGT", 0b001, 0>, PredRel;
- defm CMPGTUrr : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel;
- defm CMPLTrr : NVJrr_base<"cmp.gt", "CMPLT", 0b011, 1>, PredRel;
- defm CMPLTUrr : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel;
+ Defs = [PC], hasSideEffects = 0 in {
+ defm J4_cmpeq : NVJrr_base<"cmp.eq", "CMPEQ", 0b000, 0>, PredRel;
+ defm J4_cmpgt : NVJrr_base<"cmp.gt", "CMPGT", 0b001, 0>, PredRel;
+ defm J4_cmpgtu : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel;
+ defm J4_cmplt : NVJrr_base<"cmp.gt", "CMPLT", 0b011, 1>, PredRel;
+ defm J4_cmpltu : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel;
}
//===----------------------------------------------------------------------===//
@@ -1482,15 +1651,15 @@ class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond,
multiclass NVJri_cond<string mnemonic, bits<3> majOp, bit isNegCond> {
// Branch not taken:
- def _nt_V4: NVJri_template<mnemonic, majOp, isNegCond, 0>;
+ def _nt: NVJri_template<mnemonic, majOp, isNegCond, 0>;
// Branch taken:
- def _t_V4: NVJri_template<mnemonic, majOp, isNegCond, 1>;
+ def _t : NVJri_template<mnemonic, majOp, isNegCond, 1>;
}
multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> {
let BaseOpcode = BaseOp#_NVJri in {
- defm _t_Jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond
- defm _f_Jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond
+ defm _t_jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond
+ defm _f_jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond
}
}
@@ -1499,11 +1668,10 @@ multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> {
// if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2
let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
- Defs = [PC], hasSideEffects = 0, validSubTargets = HasV4SubT,
- isCodeGenOnly = 0 in {
- defm CMPEQri : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel;
- defm CMPGTri : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel;
- defm CMPGTUri : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel;
+ Defs = [PC], hasSideEffects = 0 in {
+ defm J4_cmpeqi : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel;
+ defm J4_cmpgti : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel;
+ defm J4_cmpgtui : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel;
}
//===----------------------------------------------------------------------===//
@@ -1540,16 +1708,16 @@ class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal,
multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal,
bit isNegCond> {
// Branch not taken:
- def _nt_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>;
+ def _nt: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>;
// Branch taken:
- def _t_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>;
+ def _t : NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>;
}
multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp,
string ImmVal> {
let BaseOpcode = BaseOp#_NVJ_ConstImm in {
- defm _t_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True
- defm _f_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False
+ defm _t_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True
+ defm _f_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False
}
}
@@ -1558,14 +1726,14 @@ multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp,
// if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2
let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1,
- Defs = [PC], hasSideEffects = 0, isCodeGenOnly = 0 in {
- defm TSTBIT0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel;
- defm CMPEQn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "-1">, PredRel;
- defm CMPGTn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "-1">, PredRel;
+ Defs = [PC], hasSideEffects = 0 in {
+ defm J4_tstbit0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel;
+ defm J4_cmpeqn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "-1">, PredRel;
+ defm J4_cmpgtn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "-1">, PredRel;
}
// J4_hintjumpr: Hint indirect conditional jump.
-let isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+let isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
def J4_hintjumpr: JRInst <
(outs),
(ins IntRegs:$Rs),
@@ -1586,8 +1754,7 @@ def J4_hintjumpr: JRInst <
// PC-relative add
let hasNewValue = 1, isExtendable = 1, opExtendable = 1,
- isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0,
- Uses = [PC], validSubTargets = HasV4SubT in
+ isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0, Uses = [PC] in
def C4_addipc : CRInst <(outs IntRegs:$Rd), (ins u6Ext:$u6),
"$Rd = add(pc, #$u6)", [], "", CR_tc_2_SLOT3 > {
bits<5> Rd;
@@ -1625,7 +1792,6 @@ class T_LOGICAL_3OP<string MnOp1, string MnOp2, bits<2> OpBits, bit IsNeg>
let Inst{1-0} = Pd;
}
-let isCodeGenOnly = 0 in {
def C4_and_and : T_LOGICAL_3OP<"and", "and", 0b00, 0>;
def C4_and_or : T_LOGICAL_3OP<"and", "or", 0b01, 0>;
def C4_or_and : T_LOGICAL_3OP<"or", "and", 0b10, 0>;
@@ -1634,7 +1800,69 @@ def C4_and_andn : T_LOGICAL_3OP<"and", "and", 0b00, 1>;
def C4_and_orn : T_LOGICAL_3OP<"and", "or", 0b01, 1>;
def C4_or_andn : T_LOGICAL_3OP<"or", "and", 0b10, 1>;
def C4_or_orn : T_LOGICAL_3OP<"or", "or", 0b11, 1>;
-}
+
+// op(Ps, op(Pt, Pu))
+class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
+ : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))),
+ (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
+
+// op(Ps, op(Pt, ~Pu))
+class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
+ : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))),
+ (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
+
+def: LogLog_pat<and, and, C4_and_and>;
+def: LogLog_pat<and, or, C4_and_or>;
+def: LogLog_pat<or, and, C4_or_and>;
+def: LogLog_pat<or, or, C4_or_or>;
+
+def: LogLogNot_pat<and, and, C4_and_andn>;
+def: LogLogNot_pat<and, or, C4_and_orn>;
+def: LogLogNot_pat<or, and, C4_or_andn>;
+def: LogLogNot_pat<or, or, C4_or_orn>;
+
+//===----------------------------------------------------------------------===//
+// PIC: Support for PIC compilations. The patterns and SD nodes defined
+// below are needed to support code generation for PIC
+//===----------------------------------------------------------------------===//
+
+def SDT_HexagonPICAdd
+ : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def SDT_HexagonGOTAdd
+ : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+def SDT_HexagonGOTAddInternal : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+def SDT_HexagonGOTAddInternalJT : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+def SDT_HexagonGOTAddInternalBA : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+
+def Hexagonpic_add : SDNode<"HexagonISD::PIC_ADD", SDT_HexagonPICAdd>;
+def Hexagonat_got : SDNode<"HexagonISD::AT_GOT", SDT_HexagonGOTAdd>;
+def Hexagongat_pcrel : SDNode<"HexagonISD::AT_PCREL",
+ SDT_HexagonGOTAddInternal>;
+def Hexagongat_pcrel_jt : SDNode<"HexagonISD::AT_PCREL",
+ SDT_HexagonGOTAddInternalJT>;
+def Hexagongat_pcrel_ba : SDNode<"HexagonISD::AT_PCREL",
+ SDT_HexagonGOTAddInternalBA>;
+
+// PIC: Map from a block address computation to a PC-relative add
+def: Pat<(Hexagongat_pcrel_ba tblockaddress:$src1),
+ (C4_addipc u32ImmPred:$src1)>;
+
+// PIC: Map from the computation to generate a GOT pointer to a PC-relative add
+def: Pat<(Hexagonpic_add texternalsym:$src1),
+ (C4_addipc u32ImmPred:$src1)>;
+
+// PIC: Map from a jump table address computation to a PC-relative add
+def: Pat<(Hexagongat_pcrel_jt tjumptable:$src1),
+ (C4_addipc u32ImmPred:$src1)>;
+
+// PIC: Map from a GOT-relative symbol reference to a load
+def: Pat<(Hexagonat_got (i32 IntRegs:$src1), tglobaladdr:$src2),
+ (L2_loadri_io IntRegs:$src1, s30_2ImmPred:$src2)>;
+
+// PIC: Map from a static symbol reference to a PC-relative add
+def: Pat<(Hexagongat_pcrel tglobaladdr:$src1),
+ (C4_addipc u32ImmPred:$src1)>;
//===----------------------------------------------------------------------===//
// CR -
@@ -1645,12 +1873,15 @@ def C4_or_orn : T_LOGICAL_3OP<"or", "or", 0b11, 1>;
//===----------------------------------------------------------------------===//
// Logical with-not instructions.
-let validSubTargets = HasV4SubT, isCodeGenOnly = 0 in {
- def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>;
- def A4_ornp : T_ALU64_logical<"or", 0b011, 1, 0, 1>;
-}
+def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>;
+def A4_ornp : T_ALU64_logical<"or", 0b011, 1, 0, 1>;
+
+def: Pat<(i64 (and (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
+ (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+def: Pat<(i64 (or (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
+ (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
-let hasNewValue = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasNewValue = 1, hasSideEffects = 0 in
def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
"$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
bits<5> Rd;
@@ -1663,15 +1894,16 @@ def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
let Inst{12-8} = Rt;
let Inst{4-0} = Rd;
}
+
// Add and accumulate.
// Rd=add(Rs,add(Ru,#s6))
let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 6,
- opExtendable = 3, isCodeGenOnly = 0 in
+ opExtendable = 3 in
def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
(ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6),
"$Rd = add($Rs, add($Ru, #$s6))" ,
[(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs),
- (add (i32 IntRegs:$Ru), s6_16ExtPred:$s6)))],
+ (add (i32 IntRegs:$Ru), s16_16ImmPred:$s6)))],
"", ALU64_tc_2_SLOT23> {
bits<5> Rd;
bits<5> Rs;
@@ -1690,7 +1922,7 @@ def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
}
let isExtentSigned = 1, hasSideEffects = 0, hasNewValue = 1, isExtendable = 1,
- opExtentBits = 6, opExtendable = 2, isCodeGenOnly = 0 in
+ opExtentBits = 6, opExtendable = 2 in
def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd),
(ins IntRegs:$Rs, s6Ext:$s6, IntRegs:$Ru),
"$Rd = add($Rs, sub(#$s6, $Ru))",
@@ -1710,31 +1942,64 @@ def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd),
let Inst{7-5} = s6{2-0};
let Inst{4-0} = Ru;
}
-
+
+// Rd=add(Rs,sub(#s6,Ru))
+def: Pat<(add (i32 IntRegs:$src1), (sub s32ImmPred:$src2,
+ (i32 IntRegs:$src3))),
+ (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
+
+// Rd=sub(add(Rs,#s6),Ru)
+def: Pat<(sub (add (i32 IntRegs:$src1), s32ImmPred:$src2),
+ (i32 IntRegs:$src3)),
+ (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
+
+// Rd=add(sub(Rs,Ru),#s6)
+def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)),
+ (s32ImmPred:$src2)),
+ (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
+
+
+// Add or subtract doublewords with carry.
+//TODO:
+// Rdd=add(Rss,Rtt,Px):carry
+//TODO:
+// Rdd=sub(Rss,Rtt,Px):carry
+
// Extract bitfield
// Rdd=extract(Rss,#u6,#U6)
// Rdd=extract(Rss,Rtt)
// Rd=extract(Rs,Rtt)
// Rd=extract(Rs,#u5,#U5)
-let isCodeGenOnly = 0 in {
def S4_extractp_rp : T_S3op_64 < "extract", 0b11, 0b100, 0>;
def S4_extractp : T_S2op_extract <"extract", 0b1010, DoubleRegs, u6Imm>;
-}
-let hasNewValue = 1, isCodeGenOnly = 0 in {
+let hasNewValue = 1 in {
def S4_extract_rp : T_S3op_extract<"extract", 0b01>;
def S4_extract : T_S2op_extract <"extract", 0b1101, IntRegs, u5Imm>;
}
-let Itinerary = M_tc_3x_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in {
+// Complex add/sub halfwords/words
+let Defs = [USR_OVF] in {
+ def S4_vxaddsubh : T_S3op_64 < "vxaddsubh", 0b01, 0b100, 0, 1>;
+ def S4_vxaddsubw : T_S3op_64 < "vxaddsubw", 0b01, 0b000, 0, 1>;
+ def S4_vxsubaddh : T_S3op_64 < "vxsubaddh", 0b01, 0b110, 0, 1>;
+ def S4_vxsubaddw : T_S3op_64 < "vxsubaddw", 0b01, 0b010, 0, 1>;
+}
+
+let Defs = [USR_OVF] in {
+ def S4_vxaddsubhr : T_S3op_64 < "vxaddsubh", 0b11, 0b000, 0, 1, 1, 1>;
+ def S4_vxsubaddhr : T_S3op_64 < "vxsubaddh", 0b11, 0b010, 0, 1, 1, 1>;
+}
+
+let Itinerary = M_tc_3x_SLOT23, Defs = [USR_OVF] in {
def M4_mac_up_s1_sat: T_MType_acc_rr<"+= mpy", 0b011, 0b000, 0, [], 0, 1, 1>;
def M4_nac_up_s1_sat: T_MType_acc_rr<"-= mpy", 0b011, 0b001, 0, [], 0, 1, 1>;
}
// Logical xor with xor accumulation.
// Rxx^=xor(Rss,Rtt)
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0 in
def M4_xor_xacc
: SInst <(outs DoubleRegs:$Rxx),
(ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
@@ -1749,36 +2014,102 @@ def M4_xor_xacc
let IClass = 0b1100;
- let Inst{27-23} = 0b10101;
+ let Inst{27-22} = 0b101010;
let Inst{20-16} = Rss;
let Inst{12-8} = Rtt;
+ let Inst{7-5} = 0b000;
let Inst{4-0} = Rxx;
}
-
+
+// Rotate and reduce bytes
+// Rdd=vrcrotate(Rss,Rt,#u2)
+let hasSideEffects = 0 in
+def S4_vrcrotate
+ : SInst <(outs DoubleRegs:$Rdd),
+ (ins DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2),
+ "$Rdd = vrcrotate($Rss, $Rt, #$u2)",
+ [], "", S_3op_tc_3x_SLOT23> {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rt;
+ bits<2> u2;
+
+ let IClass = 0b1100;
+
+ let Inst{27-22} = 0b001111;
+ let Inst{20-16} = Rss;
+ let Inst{13} = u2{1};
+ let Inst{12-8} = Rt;
+ let Inst{7-6} = 0b11;
+ let Inst{5} = u2{0};
+ let Inst{4-0} = Rdd;
+ }
+
+// Rotate and reduce bytes with accumulation
+// Rxx+=vrcrotate(Rss,Rt,#u2)
+let hasSideEffects = 0 in
+def S4_vrcrotate_acc
+ : SInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2),
+ "$Rxx += vrcrotate($Rss, $Rt, #$u2)", [],
+ "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Rt;
+ bits<2> u2;
+
+ let IClass = 0b1100;
+
+ let Inst{27-21} = 0b1011101;
+ let Inst{20-16} = Rss;
+ let Inst{13} = u2{1};
+ let Inst{12-8} = Rt;
+ let Inst{5} = u2{0};
+ let Inst{4-0} = Rxx;
+ }
+
+// Vector reduce conditional negate halfwords
+let hasSideEffects = 0 in
+def S2_vrcnegh
+ : SInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt),
+ "$Rxx += vrcnegh($Rss, $Rt)", [],
+ "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Rt;
+
+ let IClass = 0b1100;
+
+ let Inst{27-21} = 0b1011001;
+ let Inst{20-16} = Rss;
+ let Inst{13} = 0b1;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = 0b111;
+ let Inst{4-0} = Rxx;
+ }
+
// Split bitfield
-let isCodeGenOnly = 0 in
def A4_bitspliti : T_S2op_2_di <"bitsplit", 0b110, 0b100>;
// Arithmetic/Convergent round
-let isCodeGenOnly = 0 in
def A4_cround_ri : T_S2op_2_ii <"cround", 0b111, 0b000>;
-let isCodeGenOnly = 0 in
def A4_round_ri : T_S2op_2_ii <"round", 0b111, 0b100>;
-let Defs = [USR_OVF], isCodeGenOnly = 0 in
+let Defs = [USR_OVF] in
def A4_round_ri_sat : T_S2op_2_ii <"round", 0b111, 0b110, 1>;
// Logical-logical words.
// Compound or-and -- Rx=or(Ru,and(Rx,#s10))
let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 10,
- opExtendable = 3, isCodeGenOnly = 0 in
+ opExtendable = 3 in
def S4_or_andix:
ALU64Inst<(outs IntRegs:$Rx),
(ins IntRegs:$Ru, IntRegs:$_src_, s10Ext:$s10),
"$Rx = or($Ru, and($_src_, #$s10))" ,
[(set (i32 IntRegs:$Rx),
- (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s10ExtPred:$s10)))] ,
+ (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32ImmPred:$s10)))] ,
"$_src_ = $Rx", ALU64_tc_2_SLOT23> {
bits<5> Rx;
bits<5> Ru;
@@ -1795,7 +2126,7 @@ def S4_or_andix:
// Miscellaneous ALU64 instructions.
//
-let hasNewValue = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasNewValue = 1, hasSideEffects = 0 in
def A4_modwrapu: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
"$Rd = modwrap($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
bits<5> Rd;
@@ -1810,7 +2141,7 @@ def A4_modwrapu: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
let Inst{4-0} = Rd;
}
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0 in
def A4_bitsplit: ALU64Inst<(outs DoubleRegs:$Rd),
(ins IntRegs:$Rs, IntRegs:$Rt),
"$Rd = bitsplit($Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
@@ -1826,7 +2157,54 @@ def A4_bitsplit: ALU64Inst<(outs DoubleRegs:$Rd),
let Inst{4-0} = Rd;
}
-let isCodeGenOnly = 0 in {
+let hasSideEffects = 0 in
+def dep_S2_packhl: ALU64Inst<(outs DoubleRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = packhl($Rs, $Rt):deprecated", [], "", ALU64_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = 0b0100;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{4-0} = Rd;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+def dep_A2_addsat: ALU64Inst<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = add($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-21} = 0b0101100;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{7} = 0b0;
+ let Inst{4-0} = Rd;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+def dep_A2_subsat: ALU64Inst<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = sub($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-21} = 0b0101100;
+ let Inst{20-16} = Rt;
+ let Inst{12-8} = Rs;
+ let Inst{7} = 0b1;
+ let Inst{4-0} = Rd;
+}
+
// Rx[&|]=xor(Rs,Rt)
def M4_or_xor : T_MType_acc_rr < "|= xor", 0b110, 0b001, 0>;
def M4_and_xor : T_MType_acc_rr < "&= xor", 0b010, 0b010, 0>;
@@ -1849,7 +2227,24 @@ def M4_and_and : T_MType_acc_rr < "&= and", 0b010, 0b000, 0>;
def M4_xor_andn : T_MType_acc_rr < "^= and", 0b001, 0b010, 0, [], 1>;
def M4_or_andn : T_MType_acc_rr < "|= and", 0b001, 0b000, 0, [], 1>;
def M4_and_andn : T_MType_acc_rr < "&= and", 0b001, 0b001, 0, [], 1>;
-}
+
+def: T_MType_acc_pat2 <M4_or_xor, xor, or>;
+def: T_MType_acc_pat2 <M4_and_xor, xor, and>;
+def: T_MType_acc_pat2 <M4_or_and, and, or>;
+def: T_MType_acc_pat2 <M4_and_and, and, and>;
+def: T_MType_acc_pat2 <M4_xor_and, and, xor>;
+def: T_MType_acc_pat2 <M4_or_or, or, or>;
+def: T_MType_acc_pat2 <M4_and_or, or, and>;
+def: T_MType_acc_pat2 <M4_xor_or, or, xor>;
+
+class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp>
+ : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2,
+ (not IntRegs:$src3)))),
+ (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>;
+
+def: T_MType_acc_pat3 <M4_or_andn, and, or>;
+def: T_MType_acc_pat3 <M4_and_andn, and, and>;
+def: T_MType_acc_pat3 <M4_xor_andn, and, xor>;
// Compound or-or and or-and
let isExtentSigned = 1, InputType = "imm", hasNewValue = 1, isExtendable = 1,
@@ -1859,7 +2254,7 @@ class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode>
(ins IntRegs:$src1, IntRegs:$Rs, s10Ext:$s10),
"$Rx |= "#mnemonic#"($Rs, #$s10)",
[(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1),
- (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10)))],
+ (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10)))],
"$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel {
bits<5> Rx;
bits<5> Rs;
@@ -1875,10 +2270,10 @@ class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode>
let Inst{4-0} = Rx;
}
-let CextOpcode = "ORr_ANDr", isCodeGenOnly = 0 in
+let CextOpcode = "ORr_ANDr" in
def S4_or_andi : T_CompOR <"and", 0b00, and>;
-let CextOpcode = "ORr_ORr", isCodeGenOnly = 0 in
+let CextOpcode = "ORr_ORr" in
def S4_or_ori : T_CompOR <"or", 0b10, or>;
// Modulo wrap
@@ -1923,22 +2318,33 @@ def S4_or_ori : T_CompOR <"or", 0b10, or>;
//===----------------------------------------------------------------------===//
// Bit reverse
-let isCodeGenOnly = 0 in
def S2_brevp : T_S2op_3 <"brev", 0b11, 0b110>;
// Bit count
-let isCodeGenOnly = 0 in {
def S2_ct0p : T_COUNT_LEADING_64<"ct0", 0b111, 0b010>;
def S2_ct1p : T_COUNT_LEADING_64<"ct1", 0b111, 0b100>;
def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>;
-}
-def: Pat<(i32 (trunc (cttz (i64 DoubleRegs:$Rss)))),
- (S2_ct0p (i64 DoubleRegs:$Rss))>;
-def: Pat<(i32 (trunc (cttz (not (i64 DoubleRegs:$Rss))))),
- (S2_ct1p (i64 DoubleRegs:$Rss))>;
+// Count trailing zeros: 64-bit.
+def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;
+def: Pat<(i32 (trunc (cttz_zero_undef I64:$Rss))), (S2_ct0p I64:$Rss)>;
+
+// Count trailing ones: 64-bit.
+def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
+def: Pat<(i32 (trunc (cttz_zero_undef (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
+
+// Define leading/trailing patterns that require zero-extensions to 64 bits.
+def: Pat<(i64 (ctlz I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>;
+def: Pat<(i64 (ctlz_zero_undef I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>;
+def: Pat<(i64 (cttz I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>;
+def: Pat<(i64 (cttz_zero_undef I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>;
+def: Pat<(i64 (ctlz (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>;
+def: Pat<(i64 (ctlz_zero_undef (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>;
+def: Pat<(i64 (cttz (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>;
+def: Pat<(i64 (cttz_zero_undef (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>;
-let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+
+let hasSideEffects = 0, hasNewValue = 1 in
def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6),
"$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
bits<5> Rs;
@@ -1953,7 +2359,7 @@ def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6),
let Inst{4-0} = Rd;
}
-let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+let hasSideEffects = 0, hasNewValue = 1 in
def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6),
"$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
bits<5> Rs;
@@ -1970,10 +2376,8 @@ def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6),
// Bit test/set/clear
-let isCodeGenOnly = 0 in {
def S4_ntstbit_i : T_TEST_BIT_IMM<"!tstbit", 0b001>;
def S4_ntstbit_r : T_TEST_BIT_REG<"!tstbit", 1>;
-}
let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
def: Pat<(i1 (seteq (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
@@ -1993,11 +2397,9 @@ let AddedComplexity = 100 in
def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
(S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
-let isCodeGenOnly = 0 in {
def C4_nbitsset : T_TEST_BITS_REG<"!bitsset", 0b01, 1>;
def C4_nbitsclr : T_TEST_BITS_REG<"!bitsclr", 0b10, 1>;
def C4_nbitsclri : T_TEST_BITS_IMM<"!bitsclr", 0b10, 1>;
-}
// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
// represented as a compare against "value & 0xFF", which is an exact match
@@ -2022,14 +2424,13 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
// Rd=add(#u6,mpyi(Rs,#U6)) -- Multiply by immed and add immed.
-let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1,
- isCodeGenOnly = 0 in
+let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
def M4_mpyri_addi : MInst<(outs IntRegs:$Rd),
(ins u6Ext:$u6, IntRegs:$Rs, u6Imm:$U6),
"$Rd = add(#$u6, mpyi($Rs, #$U6))" ,
[(set (i32 IntRegs:$Rd),
(add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6),
- u6ExtPred:$u6))] ,"",ALU64_tc_3x_SLOT23> {
+ u32ImmPred:$u6))] ,"",ALU64_tc_3x_SLOT23> {
bits<5> Rd;
bits<6> u6;
bits<5> Rs;
@@ -2049,12 +2450,12 @@ def M4_mpyri_addi : MInst<(outs IntRegs:$Rd),
// Rd=add(#u6,mpyi(Rs,Rt))
let CextOpcode = "ADD_MPY", InputType = "imm", hasNewValue = 1,
- isExtendable = 1, opExtentBits = 6, opExtendable = 1, isCodeGenOnly = 0 in
+ isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd),
(ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt),
"$Rd = add(#$u6, mpyi($Rs, $Rt))" ,
[(set (i32 IntRegs:$Rd),
- (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u6ExtPred:$u6))],
+ (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32ImmPred:$u6))],
"", ALU64_tc_3x_SLOT23>, ImmRegRel {
bits<5> Rd;
bits<6> u6;
@@ -2099,18 +2500,16 @@ class T_AddMpy <bit MajOp, PatLeaf ImmPred, dag ins>
let Inst{4-0} = src1;
}
-let isCodeGenOnly = 0 in
def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred,
(ins IntRegs:$src1, u6_2Imm:$src2, IntRegs:$src3)>;
let isExtendable = 1, opExtentBits = 6, opExtendable = 3,
- CextOpcode = "ADD_MPY", InputType = "imm", isCodeGenOnly = 0 in
-def M4_mpyri_addr : T_AddMpy<0b1, u6ExtPred,
+ CextOpcode = "ADD_MPY", InputType = "imm" in
+def M4_mpyri_addr : T_AddMpy<0b1, u32ImmPred,
(ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel;
// Rx=add(Ru,mpyi(Rx,Rs))
-let validSubTargets = HasV4SubT, CextOpcode = "ADD_MPY", InputType = "reg",
- hasNewValue = 1, isCodeGenOnly = 0 in
+let CextOpcode = "ADD_MPY", InputType = "reg", hasNewValue = 1 in
def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx),
(ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs),
"$Rx = add($Ru, mpyi($_src_, $Rs))",
@@ -2129,51 +2528,101 @@ def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx),
let Inst{20-16} = Rs;
}
-// Rd=add(##,mpyi(Rs,#U6))
-def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
- (HexagonCONST32 tglobaladdr:$src1)),
- (i32 (M4_mpyri_addi tglobaladdr:$src1, IntRegs:$src2,
- u6ImmPred:$src3))>;
-// Rd=add(##,mpyi(Rs,Rt))
-def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
- (HexagonCONST32 tglobaladdr:$src1)),
- (i32 (M4_mpyrr_addi tglobaladdr:$src1, IntRegs:$src2,
- IntRegs:$src3))>;
+// Vector reduce multiply word by signed half (32x16)
+//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+def M4_vrmpyeh_s0 : T_M2_vmpy<"vrmpyweh", 0b010, 0b100, 0, 0, 0>;
+def M4_vrmpyeh_s1 : T_M2_vmpy<"vrmpyweh", 0b110, 0b100, 1, 0, 0>;
-// Polynomial multiply words
-// Rdd=pmpyw(Rs,Rt)
-// Rxx^=pmpyw(Rs,Rt)
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def M4_vrmpyoh_s0 : T_M2_vmpy<"vrmpywoh", 0b001, 0b010, 0, 0, 0>;
+def M4_vrmpyoh_s1 : T_M2_vmpy<"vrmpywoh", 0b101, 0b010, 1, 0, 0>;
-// Vector reduce multiply word by signed half (32x16)
-// Rdd=vrmpyweh(Rss,Rtt)[:<<1]
-// Rdd=vrmpywoh(Rss,Rtt)[:<<1]
-// Rxx+=vrmpyweh(Rss,Rtt)[:<<1]
-// Rxx+=vrmpywoh(Rss,Rtt)[:<<1]
-
-// Multiply and use upper result
-// Rd=mpy(Rs,Rt.H):<<1:sat
-// Rd=mpy(Rs,Rt.L):<<1:sat
-// Rd=mpy(Rs,Rt):<<1
-// Rd=mpy(Rs,Rt):<<1:sat
-// Rd=mpysu(Rs,Rt)
-// Rx+=mpy(Rs,Rt):<<1:sat
-// Rx-=mpy(Rs,Rt):<<1:sat
-
-// Vector multiply bytes
-// Rdd=vmpybsu(Rs,Rt)
-// Rdd=vmpybu(Rs,Rt)
-// Rxx+=vmpybsu(Rs,Rt)
-// Rxx+=vmpybu(Rs,Rt)
+//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
+def M4_vrmpyeh_acc_s0: T_M2_vmpy_acc<"vrmpyweh", 0b001, 0b110, 0, 0>;
+def M4_vrmpyeh_acc_s1: T_M2_vmpy_acc<"vrmpyweh", 0b101, 0b110, 1, 0>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def M4_vrmpyoh_acc_s0: T_M2_vmpy_acc<"vrmpywoh", 0b011, 0b110, 0, 0>;
+def M4_vrmpyoh_acc_s1: T_M2_vmpy_acc<"vrmpywoh", 0b111, 0b110, 1, 0>;
+
+// Vector multiply halfwords, signed by unsigned
+// Rdd=vmpyhsu(Rs,Rt)[:<<]:sat
+def M2_vmpy2su_s0 : T_XTYPE_mpy64 < "vmpyhsu", 0b000, 0b111, 1, 0, 0>;
+def M2_vmpy2su_s1 : T_XTYPE_mpy64 < "vmpyhsu", 0b100, 0b111, 1, 1, 0>;
+
+// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
+def M2_vmac2su_s0 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b011, 0b101, 1, 0, 0>;
+def M2_vmac2su_s1 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b111, 0b101, 1, 1, 0>;
// Vector polynomial multiply halfwords
// Rdd=vpmpyh(Rs,Rt)
+def M4_vpmpyh : T_XTYPE_mpy64 < "vpmpyh", 0b110, 0b111, 0, 0, 0>;
+
// Rxx^=vpmpyh(Rs,Rt)
+def M4_vpmpyh_acc : T_XTYPE_mpy64_acc < "vpmpyh", "^", 0b101, 0b111, 0, 0, 0>;
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+def M4_pmpyw : T_XTYPE_mpy64 < "pmpyw", 0b010, 0b111, 0, 0, 0>;
+
+// Rxx^=pmpyw(Rs,Rt)
+def M4_pmpyw_acc : T_XTYPE_mpy64_acc < "pmpyw", "^", 0b001, 0b111, 0, 0, 0>;
//===----------------------------------------------------------------------===//
// XTYPE/MPY -
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// ALU64/Vector compare
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Template class for vector compare
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0 in
+class T_vcmpImm <string Str, bits<2> cmpOp, bits<2> minOp, Operand ImmOprnd>
+ : ALU64_rr <(outs PredRegs:$Pd),
+ (ins DoubleRegs:$Rss, ImmOprnd:$Imm),
+ "$Pd = "#Str#"($Rss, #$Imm)",
+ [], "", ALU64_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rss;
+ bits<32> Imm;
+ bits<8> ImmBits;
+ let ImmBits{6-0} = Imm{6-0};
+ let ImmBits{7} = !if (!eq(cmpOp,0b10), 0b0, Imm{7}); // 0 for vcmp[bhw].gtu
+
+ let IClass = 0b1101;
+
+ let Inst{27-24} = 0b1100;
+ let Inst{22-21} = cmpOp;
+ let Inst{20-16} = Rss;
+ let Inst{12-5} = ImmBits;
+ let Inst{4-3} = minOp;
+ let Inst{1-0} = Pd;
+ }
+
+// Vector compare bytes
+def A4_vcmpbgt : T_vcmp <"vcmpb.gt", 0b1010>;
+def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>;
+
+let AsmString = "$Pd = any8(vcmpb.eq($Rss, $Rtt))" in
+def A4_vcmpbeq_any : T_vcmp <"any8(vcmpb.gt", 0b1000>;
+
+def A4_vcmpbeqi : T_vcmpImm <"vcmpb.eq", 0b00, 0b00, u8Imm>;
+def A4_vcmpbgti : T_vcmpImm <"vcmpb.gt", 0b01, 0b00, s8Imm>;
+def A4_vcmpbgtui : T_vcmpImm <"vcmpb.gtu", 0b10, 0b00, u7Imm>;
+
+// Vector compare halfwords
+def A4_vcmpheqi : T_vcmpImm <"vcmph.eq", 0b00, 0b01, s8Imm>;
+def A4_vcmphgti : T_vcmpImm <"vcmph.gt", 0b01, 0b01, s8Imm>;
+def A4_vcmphgtui : T_vcmpImm <"vcmph.gtu", 0b10, 0b01, u7Imm>;
+
+// Vector compare words
+def A4_vcmpweqi : T_vcmpImm <"vcmpw.eq", 0b00, 0b10, s8Imm>;
+def A4_vcmpwgti : T_vcmpImm <"vcmpw.gt", 0b01, 0b10, s8Imm>;
+def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7Imm>;
//===----------------------------------------------------------------------===//
// XTYPE/SHIFT +
@@ -2184,13 +2633,13 @@ def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
// Rx=and(#u8,asl(Rx,#U5)) Rx=and(#u8,lsr(Rx,#U5))
// Rx=or(#u8,asl(Rx,#U5)) Rx=or(#u8,lsr(Rx,#U5))
let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
- hasNewValue = 1, opNewValue = 0, validSubTargets = HasV4SubT in
+ hasNewValue = 1, opNewValue = 0 in
class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh,
bit asl_lsr, bits<2> MajOp, InstrItinClass Itin>
: MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5),
"$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))",
[(set (i32 IntRegs:$Rd),
- (Op (Sh I32:$Rx, u5ImmPred:$U5), u8ExtPred:$u8))],
+ (Op (Sh I32:$Rx, u5ImmPred:$U5), u32ImmPred:$u8))],
"$Rd = $Rx", Itin> {
bits<5> Rd;
@@ -2216,29 +2665,48 @@ multiclass T_ShiftOperate<string mnemonic, SDNode Op, bits<2> MajOp,
def _lsr_ri : T_S4_ShiftOperate<mnemonic, "lsr", Op, srl, 1, MajOp, Itin>;
}
-let AddedComplexity = 200, isCodeGenOnly = 0 in {
+let AddedComplexity = 200 in {
defm S4_addi : T_ShiftOperate<"add", add, 0b10, ALU64_tc_2_SLOT23>;
defm S4_andi : T_ShiftOperate<"and", and, 0b00, ALU64_tc_2_SLOT23>;
}
-let AddedComplexity = 30, isCodeGenOnly = 0 in
+let AddedComplexity = 30 in
defm S4_ori : T_ShiftOperate<"or", or, 0b01, ALU64_tc_1_SLOT23>;
-let isCodeGenOnly = 0 in
defm S4_subi : T_ShiftOperate<"sub", sub, 0b11, ALU64_tc_1_SLOT23>;
+let AddedComplexity = 200 in {
+ def: Pat<(add addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)),
+ (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+ def: Pat<(add addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)),
+ (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+ def: Pat<(sub addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)),
+ (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+ def: Pat<(sub addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)),
+ (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+}
+
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
+def S2_vcnegh : T_S3op_shiftVect < "vcnegh", 0b11, 0b01>;
// Rd=[cround|round](Rs,Rt)
-let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23, isCodeGenOnly = 0 in {
+let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in {
def A4_cround_rr : T_S3op_3 < "cround", IntRegs, 0b11, 0b00>;
def A4_round_rr : T_S3op_3 < "round", IntRegs, 0b11, 0b10>;
}
// Rd=round(Rs,Rt):sat
-let hasNewValue = 1, Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23,
- isCodeGenOnly = 0 in
+let hasNewValue = 1, Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
def A4_round_rr_sat : T_S3op_3 < "round", IntRegs, 0b11, 0b11, 1>;
+// Rd=[cmpyiwh|cmpyrwh](Rss,Rt):<<1:rnd:sat
+let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in {
+ def M4_cmpyi_wh : T_S3op_8<"cmpyiwh", 0b100, 1, 1, 1>;
+ def M4_cmpyr_wh : T_S3op_8<"cmpyrwh", 0b110, 1, 1, 1>;
+}
+
// Rdd=[add|sub](Rss,Rtt,Px):carry
let isPredicateLate = 1, hasSideEffects = 0 in
class T_S3op_carry <string mnemonic, bits<3> MajOp>
@@ -2261,13 +2729,51 @@ class T_S3op_carry <string mnemonic, bits<3> MajOp>
let Inst{4-0} = Rdd;
}
-let isCodeGenOnly = 0 in {
def A4_addp_c : T_S3op_carry < "add", 0b110 >;
def A4_subp_c : T_S3op_carry < "sub", 0b111 >;
-}
+
+let Itinerary = S_3op_tc_3_SLOT23, hasSideEffects = 0 in
+class T_S3op_6 <string mnemonic, bits<3> MinOp, bit isUnsigned>
+ : SInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Ru),
+ "$Rxx = "#mnemonic#"($Rss, $Ru)" ,
+ [] , "$dst2 = $Rxx"> {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Ru;
+
+ let IClass = 0b1100;
+
+ let Inst{27-21} = 0b1011001;
+ let Inst{20-16} = Rss;
+ let Inst{13} = isUnsigned;
+ let Inst{12-8} = Rxx;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Ru;
+ }
+
+// Vector reduce maximum halfwords
+// Rxx=vrmax[u]h(Rss,Ru)
+def A4_vrmaxh : T_S3op_6 < "vrmaxh", 0b001, 0>;
+def A4_vrmaxuh : T_S3op_6 < "vrmaxuh", 0b001, 1>;
+
+// Vector reduce maximum words
+// Rxx=vrmax[u]w(Rss,Ru)
+def A4_vrmaxw : T_S3op_6 < "vrmaxw", 0b010, 0>;
+def A4_vrmaxuw : T_S3op_6 < "vrmaxuw", 0b010, 1>;
+
+// Vector reduce minimum halfwords
+// Rxx=vrmin[u]h(Rss,Ru)
+def A4_vrminh : T_S3op_6 < "vrminh", 0b101, 0>;
+def A4_vrminuh : T_S3op_6 < "vrminuh", 0b101, 1>;
+
+// Vector reduce minimum words
+// Rxx=vrmin[u]w(Rss,Ru)
+def A4_vrminw : T_S3op_6 < "vrminw", 0b110, 0>;
+def A4_vrminuw : T_S3op_6 < "vrminuw", 0b110, 1>;
// Shift an immediate left by register amount.
-let hasNewValue = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasNewValue = 1, hasSideEffects = 0 in
def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6Imm:$s6, IntRegs:$Rt),
"$Rd = lsl(#$s6, $Rt)" ,
[(set (i32 IntRegs:$Rd), (shl s6ImmPred:$s6,
@@ -2299,7 +2805,7 @@ def MEMOPIMM : SDNodeXForm<imm, [{
// Call the transformation function XformM5ToU5Imm to get the negative
// immediate's positive counterpart.
int32_t imm = N->getSExtValue();
- return XformM5ToU5Imm(imm);
+ return XformM5ToU5Imm(imm, SDLoc(N));
}]>;
def MEMOPIMM_HALF : SDNodeXForm<imm, [{
@@ -2308,7 +2814,7 @@ def MEMOPIMM_HALF : SDNodeXForm<imm, [{
// Call the transformation function XformM5ToU5Imm to get the negative
// immediate's positive counterpart.
int16_t imm = N->getSExtValue();
- return XformM5ToU5Imm(imm);
+ return XformM5ToU5Imm(imm, SDLoc(N));
}]>;
def MEMOPIMM_BYTE : SDNodeXForm<imm, [{
@@ -2317,14 +2823,14 @@ def MEMOPIMM_BYTE : SDNodeXForm<imm, [{
// Call the transformation function XformM5ToU5Imm to get the negative
// immediate's positive counterpart.
int8_t imm = N->getSExtValue();
- return XformM5ToU5Imm(imm);
+ return XformM5ToU5Imm(imm, SDLoc(N));
}]>;
def SETMEMIMM : SDNodeXForm<imm, [{
// Return the bit position we will set [0-31].
// As an SDNode.
int32_t imm = N->getSExtValue();
- return XformMskToBitPosU5Imm(imm);
+ return XformMskToBitPosU5Imm(imm, SDLoc(N));
}]>;
def CLRMEMIMM : SDNodeXForm<imm, [{
@@ -2332,14 +2838,14 @@ def CLRMEMIMM : SDNodeXForm<imm, [{
// As an SDNode.
// we bit negate the value first
int32_t imm = ~(N->getSExtValue());
- return XformMskToBitPosU5Imm(imm);
+ return XformMskToBitPosU5Imm(imm, SDLoc(N));
}]>;
def SETMEMIMM_SHORT : SDNodeXForm<imm, [{
// Return the bit position we will set [0-15].
// As an SDNode.
int16_t imm = N->getSExtValue();
- return XformMskToBitPosU4Imm(imm);
+ return XformMskToBitPosU4Imm(imm, SDLoc(N));
}]>;
def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{
@@ -2347,14 +2853,14 @@ def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{
// As an SDNode.
// we bit negate the value first
int16_t imm = ~(N->getSExtValue());
- return XformMskToBitPosU4Imm(imm);
+ return XformMskToBitPosU4Imm(imm, SDLoc(N));
}]>;
def SETMEMIMM_BYTE : SDNodeXForm<imm, [{
// Return the bit position we will set [0-7].
// As an SDNode.
int8_t imm = N->getSExtValue();
- return XformMskToBitPosU3Imm(imm);
+ return XformMskToBitPosU3Imm(imm, SDLoc(N));
}]>;
def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{
@@ -2362,7 +2868,7 @@ def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{
// As an SDNode.
// we bit negate the value first
int8_t imm = ~(N->getSExtValue());
- return XformMskToBitPosU3Imm(imm);
+ return XformMskToBitPosU3Imm(imm, SDLoc(N));
}]>;
//===----------------------------------------------------------------------===//
@@ -2450,15 +2956,14 @@ multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> {
}
// Define MemOp instructions.
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0,
- validSubTargets =HasV4SubT in {
- let opExtentBits = 6, accessSize = ByteAccess, isCodeGenOnly = 0 in
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in {
+ let opExtentBits = 6, accessSize = ByteAccess in
defm memopb_io : MemOp_base <"memb", 0b00, u6_0Ext>;
- let opExtentBits = 7, accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+ let opExtentBits = 7, accessSize = HalfWordAccess in
defm memoph_io : MemOp_base <"memh", 0b01, u6_1Ext>;
- let opExtentBits = 8, accessSize = WordAccess, isCodeGenOnly = 0 in
+ let opExtentBits = 8, accessSize = WordAccess in
defm memopw_io : MemOp_base <"memw", 0b10, u6_2Ext>;
}
@@ -2469,43 +2974,43 @@ let isExtendable = 1, opExtendable = 1, isExtentSigned = 0,
// mem[bh](Rs+#u6) += #U5
//===----------------------------------------------------------------------===//
-multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
InstHexagon MI, SDNode OpNode> {
let AddedComplexity = 180 in
- def : Pat < (stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend),
- IntRegs:$addr),
- (MI IntRegs:$addr, #0, u5ImmPred:$addend )>;
+ def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend),
+ IntRegs:$addr),
+ (MI IntRegs:$addr, 0, u5ImmPred:$addend)>;
let AddedComplexity = 190 in
- def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)),
- u5ImmPred:$addend),
- (add IntRegs:$base, ExtPred:$offset)),
- (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>;
+ def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ImmPred:$offset)),
+ u5ImmPred:$addend),
+ (add IntRegs:$base, ImmPred:$offset)),
+ (MI IntRegs:$base, ImmPred:$offset, u5ImmPred:$addend)>;
}
-multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
InstHexagon addMI, InstHexagon subMI> {
- defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>;
- defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>;
+ defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, addMI, add>;
+ defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, subMI, sub>;
}
multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
// Half Word
- defm : MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred,
- L4_iadd_memoph_io, L4_isub_memoph_io>;
+ defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred,
+ L4_iadd_memoph_io, L4_isub_memoph_io>;
// Byte
- defm : MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred,
- L4_iadd_memopb_io, L4_isub_memopb_io>;
+ defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u32ImmPred,
+ L4_iadd_memopb_io, L4_isub_memopb_io>;
}
-let Predicates = [HasV4T, UseMEMOP] in {
- defm : MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend
- defm : MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend
- defm : MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend
+let Predicates = [UseMEMOP] in {
+ defm: MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm: MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm: MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend
// Word
- defm : MemOpi_u5ALUOp <load, store, u6_2ExtPred, L4_iadd_memopw_io,
- L4_isub_memopw_io>;
+ defm: MemOpi_u5ALUOp <load, store, u30_2ImmPred, L4_iadd_memopw_io,
+ L4_isub_memopw_io>;
}
//===----------------------------------------------------------------------===//
@@ -2515,38 +3020,37 @@ let Predicates = [HasV4T, UseMEMOP] in {
// mem[bh](Rs+#u6) += #m5
//===----------------------------------------------------------------------===//
-multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
- PatLeaf immPred, ComplexPattern addrPred,
- SDNodeXForm xformFunc, InstHexagon MI> {
+multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
+ PatLeaf immPred, SDNodeXForm xformFunc,
+ InstHexagon MI> {
let AddedComplexity = 190 in
- def : Pat <(stOp (add (ldOp IntRegs:$addr), immPred:$subend),
- IntRegs:$addr),
- (MI IntRegs:$addr, #0, (xformFunc immPred:$subend) )>;
+ def: Pat<(stOp (add (ldOp IntRegs:$addr), immPred:$subend), IntRegs:$addr),
+ (MI IntRegs:$addr, 0, (xformFunc immPred:$subend))>;
let AddedComplexity = 195 in
- def : Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)),
- immPred:$subend),
- (add IntRegs:$base, extPred:$offset)),
- (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>;
+ def: Pat<(stOp (add (ldOp (add IntRegs:$base, ImmPred:$offset)),
+ immPred:$subend),
+ (add IntRegs:$base, ImmPred:$offset)),
+ (MI IntRegs:$base, ImmPred:$offset, (xformFunc immPred:$subend))>;
}
multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
// Half Word
- defm : MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred,
- ADDRriU6_1, MEMOPIMM_HALF, L4_isub_memoph_io>;
+ defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u31_1ImmPred, m5HImmPred,
+ MEMOPIMM_HALF, L4_isub_memoph_io>;
// Byte
- defm : MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred,
- ADDRriU6_0, MEMOPIMM_BYTE, L4_isub_memopb_io>;
+ defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u32ImmPred, m5BImmPred,
+ MEMOPIMM_BYTE, L4_isub_memopb_io>;
}
-let Predicates = [HasV4T, UseMEMOP] in {
- defm : MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend
- defm : MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend
- defm : MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend
+let Predicates = [UseMEMOP] in {
+ defm: MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm: MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm: MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend
// Word
- defm : MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred,
- ADDRriU6_2, MEMOPIMM, L4_isub_memopw_io>;
+ defm: MemOpi_m5Pats <load, store, u30_2ImmPred, m5ImmPred,
+ MEMOPIMM, L4_isub_memopw_io>;
}
//===----------------------------------------------------------------------===//
@@ -2556,52 +3060,50 @@ let Predicates = [HasV4T, UseMEMOP] in {
//===----------------------------------------------------------------------===//
multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred,
- PatLeaf extPred, ComplexPattern addrPred,
- SDNodeXForm xformFunc, InstHexagon MI, SDNode OpNode> {
+ PatLeaf extPred, SDNodeXForm xformFunc, InstHexagon MI,
+ SDNode OpNode> {
// mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5)
let AddedComplexity = 250 in
- def : Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
- immPred:$bitend),
- (add IntRegs:$base, extPred:$offset)),
- (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>;
+ def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+ immPred:$bitend),
+ (add IntRegs:$base, extPred:$offset)),
+ (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>;
// mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
let AddedComplexity = 225 in
- def : Pat <(stOp (OpNode (ldOp (addrPred IntRegs:$addr, extPred:$offset)),
- immPred:$bitend),
- (addrPred (i32 IntRegs:$addr), extPred:$offset)),
- (MI IntRegs:$addr, extPred:$offset, (xformFunc immPred:$bitend))>;
+ def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), immPred:$bitend), IntRegs:$addr),
+ (MI IntRegs:$addr, 0, (xformFunc immPred:$bitend))>;
}
-multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf> {
// Byte - clrbit
- defm : MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred,
- ADDRriU6_0, CLRMEMIMM_BYTE, L4_iand_memopb_io, and>;
+ defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u32ImmPred,
+ CLRMEMIMM_BYTE, L4_iand_memopb_io, and>;
// Byte - setbit
- defm : MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred,
- ADDRriU6_0, SETMEMIMM_BYTE, L4_ior_memopb_io, or>;
+ defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u32ImmPred,
+ SETMEMIMM_BYTE, L4_ior_memopb_io, or>;
// Half Word - clrbit
- defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred,
- ADDRriU6_1, CLRMEMIMM_SHORT, L4_iand_memoph_io, and>;
+ defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u31_1ImmPred,
+ CLRMEMIMM_SHORT, L4_iand_memoph_io, and>;
// Half Word - setbit
- defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred,
- ADDRriU6_1, SETMEMIMM_SHORT, L4_ior_memoph_io, or>;
+ defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u31_1ImmPred,
+ SETMEMIMM_SHORT, L4_ior_memoph_io, or>;
}
-let Predicates = [HasV4T, UseMEMOP] in {
+let Predicates = [UseMEMOP] in {
// mem[bh](Rs+#0) = [clrbit|setbit](#U5)
// mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5)
- defm : MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend
- defm : MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend
- defm : MemOpi_bitExtType<extloadi8, extloadi16>; // any extend
+ defm: MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend
+ defm: MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend
+ defm: MemOpi_bitExtType<extloadi8, extloadi16>; // any extend
// memw(Rs+#0) = [clrbit|setbit](#U5)
// memw(Rs+#u6:2) = [clrbit|setbit](#U5)
- defm : MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, ADDRriU6_2,
- CLRMEMIMM, L4_iand_memopw_io, and>;
- defm : MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, ADDRriU6_2,
- SETMEMIMM, L4_ior_memopw_io, or>;
+ defm: MemOpi_bitPats<load, store, Clr5ImmPred, u30_2ImmPred, CLRMEMIMM,
+ L4_iand_memopw_io, and>;
+ defm: MemOpi_bitPats<load, store, Set5ImmPred, u30_2ImmPred, SETMEMIMM,
+ L4_ior_memopw_io, or>;
}
//===----------------------------------------------------------------------===//
@@ -2611,54 +3113,51 @@ let Predicates = [HasV4T, UseMEMOP] in {
// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
//===----------------------------------------------------------------------===//
-multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, ComplexPattern addrPred,
- PatLeaf extPred, InstHexagon MI, SDNode OpNode> {
+multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
+ InstHexagon MI, SDNode OpNode> {
let AddedComplexity = 141 in
// mem[bhw](Rs+#0) [+-&|]= Rt
- def : Pat <(stOp (OpNode (ldOp (addrPred IntRegs:$addr, extPred:$offset)),
- (i32 IntRegs:$addend)),
- (addrPred (i32 IntRegs:$addr), extPred:$offset)),
- (MI IntRegs:$addr, extPred:$offset, (i32 IntRegs:$addend) )>;
+ def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), (i32 IntRegs:$addend)),
+ IntRegs:$addr),
+ (MI IntRegs:$addr, 0, (i32 IntRegs:$addend))>;
// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
let AddedComplexity = 150 in
- def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
- (i32 IntRegs:$orend)),
- (add IntRegs:$base, extPred:$offset)),
- (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend) )>;
+ def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+ (i32 IntRegs:$orend)),
+ (add IntRegs:$base, extPred:$offset)),
+ (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend))>;
}
-multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp,
- ComplexPattern addrPred, PatLeaf extPred,
+multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
InstHexagon addMI, InstHexagon subMI,
- InstHexagon andMI, InstHexagon orMI > {
-
- defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, addMI, add>;
- defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, subMI, sub>;
- defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, andMI, and>;
- defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, orMI, or>;
+ InstHexagon andMI, InstHexagon orMI> {
+ defm: MemOpr_Pats <ldOp, stOp, extPred, addMI, add>;
+ defm: MemOpr_Pats <ldOp, stOp, extPred, subMI, sub>;
+ defm: MemOpr_Pats <ldOp, stOp, extPred, andMI, and>;
+ defm: MemOpr_Pats <ldOp, stOp, extPred, orMI, or>;
}
multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
// Half Word
- defm : MemOPr_ALUOp <ldOpHalf, truncstorei16, ADDRriU6_1, u6_1ExtPred,
- L4_add_memoph_io, L4_sub_memoph_io,
- L4_and_memoph_io, L4_or_memoph_io>;
+ defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred,
+ L4_add_memoph_io, L4_sub_memoph_io,
+ L4_and_memoph_io, L4_or_memoph_io>;
// Byte
- defm : MemOPr_ALUOp <ldOpByte, truncstorei8, ADDRriU6_0, u6ExtPred,
- L4_add_memopb_io, L4_sub_memopb_io,
- L4_and_memopb_io, L4_or_memopb_io>;
+ defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u32ImmPred,
+ L4_add_memopb_io, L4_sub_memopb_io,
+ L4_and_memopb_io, L4_or_memopb_io>;
}
// Define 'def Pats' for MemOps with register addend.
-let Predicates = [HasV4T, UseMEMOP] in {
+let Predicates = [UseMEMOP] in {
// Byte, Half Word
- defm : MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend
- defm : MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend
- defm : MemOPr_ExtType<extloadi8, extloadi16>; // any extend
+ defm: MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm: MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm: MemOPr_ExtType<extloadi8, extloadi16>; // any extend
// Word
- defm : MemOPr_ALUOp <load, store, ADDRriU6_2, u6_2ExtPred, L4_add_memopw_io,
- L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io >;
+ defm: MemOPr_ALUOp <load, store, u30_2ImmPred, L4_add_memopw_io,
+ L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io>;
}
//===----------------------------------------------------------------------===//
@@ -2676,311 +3175,41 @@ let Predicates = [HasV4T, UseMEMOP] in {
// incorrect code for negative numbers.
// Pd=cmpb.eq(Rs,#u8)
-let isCompare = 1, isExtendable = 1, opExtendable = 2, hasSideEffects = 0,
- validSubTargets = HasV4SubT in
-class CMP_NOT_REG_IMM<string OpName, bits<2> op, Operand ImmOp,
- list<dag> Pattern>
- : ALU32Inst <(outs PredRegs:$dst), (ins IntRegs:$src1, ImmOp:$src2),
- "$dst = !cmp."#OpName#"($src1, #$src2)",
- Pattern,
- "", ALU32_2op_tc_2early_SLOT0123> {
- bits<2> dst;
- bits<5> src1;
- bits<10> src2;
+// p=!cmp.eq(r1,#s10)
+def C4_cmpneqi : T_CMP <"cmp.eq", 0b00, 1, s10Ext>;
+def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10Ext>;
+def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9Ext>;
- let IClass = 0b0111;
- let Inst{27-24} = 0b0101;
- let Inst{23-22} = op;
- let Inst{20-16} = src1;
- let Inst{21} = !if (!eq(OpName, "gtu"), 0b0, src2{9});
- let Inst{13-5} = src2{8-0};
- let Inst{4-2} = 0b100;
- let Inst{1-0} = dst;
-}
-
-let opExtentBits = 10, isExtentSigned = 1 in {
-def C4_cmpneqi : CMP_NOT_REG_IMM <"eq", 0b00, s10Ext, [(set (i1 PredRegs:$dst),
- (setne (i32 IntRegs:$src1), s10ExtPred:$src2))]>;
-
-def C4_cmpltei : CMP_NOT_REG_IMM <"gt", 0b01, s10Ext, [(set (i1 PredRegs:$dst),
- (not (setgt (i32 IntRegs:$src1), s10ExtPred:$src2)))]>;
-
-}
-let opExtentBits = 9 in
-def C4_cmplteui : CMP_NOT_REG_IMM <"gtu", 0b10, u9Ext, [(set (i1 PredRegs:$dst),
- (not (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)))]>;
-
-
-
-// p=!cmp.eq(r1,r2)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = !cmp.eq($src1, $src2)",
- [(set (i1 PredRegs:$dst),
- (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2)))]>,
- Requires<[HasV4T]>;
-
-// p=!cmp.gt(r1,r2)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = !cmp.gt($src1, $src2)",
- [(set (i1 PredRegs:$dst),
- (not (setgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
- Requires<[HasV4T]>;
-
-
-// p=!cmp.gtu(r1,r2)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPnotGTU_rr : ALU32_rr<(outs PredRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = !cmp.gtu($src1, $src2)",
- [(set (i1 PredRegs:$dst),
- (not (setugt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
- Requires<[HasV4T]>;
-
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, u8Imm:$src2),
- "$dst = cmpb.eq($src1, #$src2)",
- [(set (i1 PredRegs:$dst),
- (seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>,
- Requires<[HasV4T]>;
-
-def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)),
- bb:$offset),
- (J2_jumpf (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2),
- bb:$offset)>,
- Requires<[HasV4T]>;
-
-// Pd=cmpb.eq(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = cmpb.eq($src1, $src2)",
- [(set (i1 PredRegs:$dst),
- (seteq (and (xor (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)), 255), 0))]>,
- Requires<[HasV4T]>;
-
-// Pd=cmpb.eq(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = cmpb.eq($src1, $src2)",
- [(set (i1 PredRegs:$dst),
- (seteq (shl (i32 IntRegs:$src1), (i32 24)),
- (shl (i32 IntRegs:$src2), (i32 24))))]>,
- Requires<[HasV4T]>;
-
-// Pd=cmpb.gt(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = cmpb.gt($src1, $src2)",
- [(set (i1 PredRegs:$dst),
- (setgt (shl (i32 IntRegs:$src1), (i32 24)),
- (shl (i32 IntRegs:$src2), (i32 24))))]>,
- Requires<[HasV4T]>;
-
-// Pd=cmpb.gtu(Rs,#u7)
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7,
-isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", InputType = "imm" in
-def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, u7Ext:$src2),
- "$dst = cmpb.gtu($src1, #$src2)",
- [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
- u7ExtPred:$src2))]>,
- Requires<[HasV4T]>, ImmRegRel;
+def : T_CMP_pat <C4_cmpneqi, setne, s32ImmPred>;
+def : T_CMP_pat <C4_cmpltei, setle, s32ImmPred>;
+def : T_CMP_pat <C4_cmplteui, setule, u9ImmPred>;
+
+// rs <= rt -> !(rs > rt).
+/*
+def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>;
+// (C4_cmpltei IntRegs:$src1, s32ImmPred:$src2)>;
+*/
+// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1).
+def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>;
+
+// rs != rt -> !(rs == rt).
+def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C4_cmpneqi IntRegs:$src1, s32ImmPred:$src2)>;
// SDNode for converting immediate C to C-1.
def DEC_CONST_BYTE : SDNodeXForm<imm, [{
// Return the byte immediate const-1 as an SDNode.
int32_t imm = N->getSExtValue();
- return XformU7ToU7M1Imm(imm);
+ return XformU7ToU7M1Imm(imm, SDLoc(N));
}]>;
// For the sequence
-// zext( seteq ( and(Rs, 255), u8))
-// Generate
-// Pd=cmpb.eq(Rs, #u8)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)),
- u8ExtPred:$u8)))),
- (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs),
- (u8ExtPred:$u8))),
- 1, 0))>,
- Requires<[HasV4T]>;
-
-// For the sequence
-// zext( setne ( and(Rs, 255), u8))
-// Generate
-// Pd=cmpb.eq(Rs, #u8)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)),
- u8ExtPred:$u8)))),
- (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs),
- (u8ExtPred:$u8))),
- 0, 1))>,
- Requires<[HasV4T]>;
-
-// For the sequence
-// zext( seteq (Rs, and(Rt, 255)))
-// Generate
-// Pd=cmpb.eq(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt),
- (i32 (and (i32 IntRegs:$Rs), 255)))))),
- (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 1, 0))>,
- Requires<[HasV4T]>;
-
-// For the sequence
-// zext( setne (Rs, and(Rt, 255)))
-// Generate
-// Pd=cmpb.eq(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt),
- (i32 (and (i32 IntRegs:$Rs), 255)))))),
- (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 0, 1))>,
- Requires<[HasV4T]>;
-
-// For the sequence
-// zext( setugt ( and(Rs, 255), u8))
-// Generate
-// Pd=cmpb.gtu(Rs, #u8)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)),
- u8ExtPred:$u8)))),
- (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs),
- (u8ExtPred:$u8))),
- 1, 0))>,
- Requires<[HasV4T]>;
-
-// For the sequence
-// zext( setugt ( and(Rs, 254), u8))
-// Generate
-// Pd=cmpb.gtu(Rs, #u8)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)),
- u8ExtPred:$u8)))),
- (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs),
- (u8ExtPred:$u8))),
- 1, 0))>,
- Requires<[HasV4T]>;
-
-// For the sequence
-// zext( setult ( Rs, Rt))
-// Generate
-// Pd=cmp.ltu(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt),
- (i32 IntRegs:$Rs))),
- 1, 0))>,
- Requires<[HasV4T]>;
-
-// For the sequence
-// zext( setlt ( Rs, Rt))
-// Generate
-// Pd=cmp.lt(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt),
- (i32 IntRegs:$Rs))),
- 1, 0))>,
- Requires<[HasV4T]>;
-
-// For the sequence
-// zext( setugt ( Rs, Rt))
-// Generate
-// Pd=cmp.gtu(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 1, 0))>,
- Requires<[HasV4T]>;
-
-// This pattern interefers with coremark performance, not implementing at this
-// time.
-// For the sequence
-// zext( setgt ( Rs, Rt))
-// Generate
-// Pd=cmp.gt(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-
-// For the sequence
-// zext( setuge ( Rs, Rt))
-// Generate
-// Pd=cmp.ltu(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt),
- (i32 IntRegs:$Rs))),
- 0, 1))>,
- Requires<[HasV4T]>;
-
-// For the sequence
-// zext( setge ( Rs, Rt))
-// Generate
-// Pd=cmp.lt(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt),
- (i32 IntRegs:$Rs))),
- 0, 1))>,
- Requires<[HasV4T]>;
-
-// For the sequence
-// zext( setule ( Rs, Rt))
-// Generate
-// Pd=cmp.gtu(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 0, 1))>,
- Requires<[HasV4T]>;
-
-// For the sequence
-// zext( setle ( Rs, Rt))
-// Generate
-// Pd=cmp.gt(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 0, 1))>,
- Requires<[HasV4T]>;
-
-// For the sequence
// zext( setult ( and(Rs, 255), u8))
// Use the isdigit transformation below
-// Generate code of the form 'mux_ii(cmpbgtu(Rdd, C-1),0,1)'
+// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)'
// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
// The isdigit transformation relies on two 'clever' aspects:
// 1) The data type is unsigned which allows us to eliminate a zero test after
@@ -2993,130 +3222,11 @@ def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
// The code is transformed upstream of llvm into
// retval = (c-48) < 10 ? 1 : 0;
let AddedComplexity = 139 in
-def : Pat <(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
- u7StrictPosImmPred:$src2)))),
- (i32 (C2_muxii (i1 (CMPbGTUri_V4 (i32 IntRegs:$src1),
- (DEC_CONST_BYTE u7StrictPosImmPred:$src2))),
- 0, 1))>,
- Requires<[HasV4T]>;
-
-// Pd=cmpb.gtu(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU",
-InputType = "reg" in
-def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = cmpb.gtu($src1, $src2)",
- [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
- (and (i32 IntRegs:$src2), 255)))]>,
- Requires<[HasV4T]>, ImmRegRel;
-
-// Following instruction is not being extended as it results into the incorrect
-// code for negative numbers.
-
-// Signed half compare(.eq) ri.
-// Pd=cmph.eq(Rs,#s8)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, s8Imm:$src2),
- "$dst = cmph.eq($src1, #$src2)",
- [(set (i1 PredRegs:$dst), (seteq (and (i32 IntRegs:$src1), 65535),
- s8ImmPred:$src2))]>,
- Requires<[HasV4T]>;
-
-// Signed half compare(.eq) rr.
-// Case 1: xor + and, then compare:
-// r0=xor(r0,r1)
-// r0=and(r0,#0xffff)
-// p0=cmp.eq(r0,#0)
-// Pd=cmph.eq(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = cmph.eq($src1, $src2)",
- [(set (i1 PredRegs:$dst), (seteq (and (xor (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)),
- 65535), 0))]>,
- Requires<[HasV4T]>;
-
-// Signed half compare(.eq) rr.
-// Case 2: shift left 16 bits then compare:
-// r0=asl(r0,16)
-// r1=asl(r1,16)
-// p0=cmp.eq(r0,r1)
-// Pd=cmph.eq(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = cmph.eq($src1, $src2)",
- [(set (i1 PredRegs:$dst),
- (seteq (shl (i32 IntRegs:$src1), (i32 16)),
- (shl (i32 IntRegs:$src2), (i32 16))))]>,
- Requires<[HasV4T]>;
-
-/* Incorrect Pattern -- immediate should be right shifted before being
-used in the cmph.gt instruction.
-// Signed half compare(.gt) ri.
-// Pd=cmph.gt(Rs,#s8)
-
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
-isCompare = 1, validSubTargets = HasV4SubT in
-def CMPhGTri_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, s8Ext:$src2),
- "$dst = cmph.gt($src1, #$src2)",
- [(set (i1 PredRegs:$dst),
- (setgt (shl (i32 IntRegs:$src1), (i32 16)),
- s8ExtPred:$src2))]>,
- Requires<[HasV4T]>;
-*/
-
-// Signed half compare(.gt) rr.
-// Pd=cmph.gt(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = cmph.gt($src1, $src2)",
- [(set (i1 PredRegs:$dst),
- (setgt (shl (i32 IntRegs:$src1), (i32 16)),
- (shl (i32 IntRegs:$src2), (i32 16))))]>,
- Requires<[HasV4T]>;
-
-// Unsigned half compare rr (.gtu).
-// Pd=cmph.gtu(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU",
-InputType = "reg" in
-def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = cmph.gtu($src1, $src2)",
- [(set (i1 PredRegs:$dst),
- (setugt (and (i32 IntRegs:$src1), 65535),
- (and (i32 IntRegs:$src2), 65535)))]>,
- Requires<[HasV4T]>, ImmRegRel;
-
-// Unsigned half compare ri (.gtu).
-// Pd=cmph.gtu(Rs,#u7)
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7,
-isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU",
-InputType = "imm" in
-def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, u7Ext:$src2),
- "$dst = cmph.gtu($src1, #$src2)",
- [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535),
- u7ExtPred:$src2))]>,
- Requires<[HasV4T]>, ImmRegRel;
-
-let validSubTargets = HasV4SubT in
-def NTSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = !tstbit($src1, $src2)",
- [(set (i1 PredRegs:$dst),
- (seteq (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>,
- Requires<[HasV4T]>;
-
-let validSubTargets = HasV4SubT in
-def NTSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- "$dst = !tstbit($src1, $src2)",
- [(set (i1 PredRegs:$dst),
- (seteq (and (shl 1, u5ImmPred:$src2), (i32 IntRegs:$src1)), 0))]>,
- Requires<[HasV4T]>;
+def: Pat<(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
+ u7StrictPosImmPred:$src2)))),
+ (C2_muxii (A4_cmpbgtui IntRegs:$src1,
+ (DEC_CONST_BYTE u7StrictPosImmPred:$src2)),
+ 0, 1)>;
//===----------------------------------------------------------------------===//
// XTYPE/PRED -
@@ -3173,40 +3283,23 @@ multiclass LD_MISC_L4_RETURN<string mnemonic> {
}
let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R30], hasSideEffects = 0,
- validSubTargets = HasV4SubT, isCodeGenOnly = 0 in
+ Defs = [R29, R30, R31, PC], Uses = [R30], hasSideEffects = 0 in
defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel;
// Restore registers and dealloc return function call.
let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC] in {
-let validSubTargets = HasV4SubT in
- def RESTORE_DEALLOC_RET_JMP_V4 : JInst<(outs),
- (ins calltarget:$dst),
- "jump $dst",
- []>,
- Requires<[HasV4T]>;
+ Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in {
+ def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">;
}
// Restore registers and dealloc frame before a tail call.
-let isCall = 1, isBarrier = 1,
- Defs = [R29, R30, R31, PC] in {
-let validSubTargets = HasV4SubT in
- def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : JInst<(outs),
- (ins calltarget:$dst),
- "call $dst",
- []>,
- Requires<[HasV4T]>;
+let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in {
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel;
}
// Save registers function call.
-let isCall = 1, isBarrier = 1,
- Uses = [R29, R31] in {
- def SAVE_REGISTERS_CALL_V4 : JInst<(outs),
- (ins calltarget:$dst),
- "call $dst // Save_calle_saved_registers",
- []>,
- Requires<[HasV4T]>;
+let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
+ def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel;
}
//===----------------------------------------------------------------------===//
@@ -3278,7 +3371,7 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
//===----------------------------------------------------------------------===//
class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
bits<2> MajOp, bit isHalf>
- : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1, isHalf>,
+ : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1, isHalf>,
AddrModeRel {
string ImmOpStr = !cast<string>(ImmOp);
let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3295,7 +3388,7 @@ class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
//===----------------------------------------------------------------------===//
// Multiclass for store instructions with absolute addressing.
//===----------------------------------------------------------------------===//
-let validSubTargets = HasV4SubT, addrMode = Absolute, isExtended = 1 in
+let addrMode = Absolute, isExtended = 1 in
multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC,
Operand ImmOp, bits<2> MajOp, bit isHalf = 0> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
@@ -3319,7 +3412,7 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC,
let hasSideEffects = 0, isPredicable = 1, mayStore = 1, isNVStore = 1,
isNewValue = 1, opNewValue = 1 in
class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs>
- : NVInst_V4<(outs), (ins u0AlwaysExt:$addr, IntRegs:$src),
+ : NVInst_V4<(outs), (ins u32Imm:$addr, IntRegs:$src),
mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src.new",
[], "", V2LDST_tc_st_SLOT0> {
bits<19> addr;
@@ -3397,7 +3490,7 @@ class T_StoreAbs_NV <string mnemonic, Operand ImmOp, bits<2> MajOp>
//===----------------------------------------------------------------------===//
// Multiclass for new-value store instructions with absolute addressing.
//===----------------------------------------------------------------------===//
-let validSubTargets = HasV4SubT, addrMode = Absolute, isExtended = 1 in
+let addrMode = Absolute, isExtended = 1 in
multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp,
bits<2> MajOp> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
@@ -3417,22 +3510,22 @@ multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp,
//===----------------------------------------------------------------------===//
// Stores with absolute addressing
//===----------------------------------------------------------------------===//
-let accessSize = ByteAccess, isCodeGenOnly = 0 in
+let accessSize = ByteAccess in
defm storerb : ST_Abs <"memb", "STrib", IntRegs, u16_0Imm, 0b00>,
ST_Abs_NV <"memb", "STrib", u16_0Imm, 0b00>;
-let accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+let accessSize = HalfWordAccess in
defm storerh : ST_Abs <"memh", "STrih", IntRegs, u16_1Imm, 0b01>,
ST_Abs_NV <"memh", "STrih", u16_1Imm, 0b01>;
-let accessSize = WordAccess, isCodeGenOnly = 0 in
+let accessSize = WordAccess in
defm storeri : ST_Abs <"memw", "STriw", IntRegs, u16_2Imm, 0b10>,
ST_Abs_NV <"memw", "STriw", u16_2Imm, 0b10>;
-let isNVStorable = 0, accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+let isNVStorable = 0, accessSize = DoubleWordAccess in
defm storerd : ST_Abs <"memd", "STrid", DoubleRegs, u16_3Imm, 0b11>;
-let isNVStorable = 0, accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+let isNVStorable = 0, accessSize = HalfWordAccess in
defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>;
//===----------------------------------------------------------------------===//
@@ -3442,7 +3535,7 @@ defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>;
// if ([!]Pv[.new]) mem[bhwd](##global)=Rt
//===----------------------------------------------------------------------===//
-let validSubTargets = HasV4SubT in
+let isAsmParserOnly = 1 in
class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC,
Operand ImmOp, bits<2> MajOp, bit isHalf = 0>
: T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0, isHalf> {
@@ -3452,7 +3545,7 @@ class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC,
let BaseOpcode = BaseOp#_abs;
}
-let validSubTargets = HasV4SubT in
+let isAsmParserOnly = 1 in
multiclass ST_GP <string mnemonic, string BaseOp, Operand ImmOp,
bits<2> MajOp, bit isHalf = 0> {
// Set BaseOpcode same as absolute addressing instructions so that
@@ -3483,77 +3576,44 @@ let isNVStorable = 0, accessSize = HalfWordAccess in
def S2_storerfgp : T_StoreGP <"memh", "STrif", IntRegs,
u16_1Imm, 0b01, 1>, PredNewRel;
-let Predicates = [HasV4T], AddedComplexity = 30 in {
-def : Pat<(truncstorei8 (i32 IntRegs:$src1),
- (HexagonCONST32 tglobaladdr:$absaddr)),
- (S2_storerbabs tglobaladdr: $absaddr, IntRegs: $src1)>;
-
-def : Pat<(truncstorei16 (i32 IntRegs:$src1),
- (HexagonCONST32 tglobaladdr:$absaddr)),
- (S2_storerhabs tglobaladdr: $absaddr, IntRegs: $src1)>;
-
-def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)),
- (S2_storeriabs tglobaladdr: $absaddr, IntRegs: $src1)>;
-
-def : Pat<(store (i64 DoubleRegs:$src1),
- (HexagonCONST32 tglobaladdr:$absaddr)),
- (S2_storerdabs tglobaladdr: $absaddr, DoubleRegs: $src1)>;
+class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI>
+ : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>;
+
+class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod,
+ InstHexagon MI>
+ : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>;
+
+class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI>
+ : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>;
+
+class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
+ InstHexagon MI>
+ : Pat<(Store Value:$val, Addr:$addr),
+ (MI Addr:$addr, (ValueMod Value:$val))>;
+
+def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbgp>;
+def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
+def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
+def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
+
+let AddedComplexity = 100 in {
+ def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>;
+ def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
+ def: Storea_pat<store, I32, addrgp, S2_storerigp>;
+ def: Storea_pat<store, I64, addrgp, S2_storerdgp>;
+
+ // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
+ // to "r0 = 1; memw(#foo) = r0"
+ let AddedComplexity = 100 in
+ def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>;
}
-// 64 bit atomic store
-def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
- (i64 DoubleRegs:$src1)),
- (S2_storerdgp tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memd(#foo)
-let AddedComplexity = 100 in
-def : Pat <(store (i64 DoubleRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (S2_storerdgp tglobaladdr:$global, (i64 DoubleRegs:$src1))>;
-
-// 8 bit atomic store
-def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (S2_storerbgp tglobaladdr:$global, (i32 IntRegs:$src1))>;
-
-// Map from store(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei8 (i32 IntRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (S2_storerbgp tglobaladdr:$global, (i32 IntRegs:$src1))>;
-
-// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
-// to "r0 = 1; memw(#foo) = r0"
-let AddedComplexity = 100 in
-def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
- (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>;
-
-def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (S2_storerhgp tglobaladdr:$global, (i32 IntRegs:$src1))>;
-
-// Map from store(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei16 (i32 IntRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (S2_storerhgp tglobaladdr:$global, (i32 IntRegs:$src1))>;
-
-// 32 bit atomic store
-def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (S2_storerigp tglobaladdr:$global, (i32 IntRegs:$src1))>;
-
-// Map from store(globaladdress) -> memw(#foo)
-let AddedComplexity = 100 in
-def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
- (S2_storerigp tglobaladdr:$global, (i32 IntRegs:$src1))>;
-
//===----------------------------------------------------------------------===//
// Template class for non predicated load instructions with
// absolute addressing mode.
//===----------------------------------------------------------------------===//
-let isPredicable = 1, hasSideEffects = 0, validSubTargets = HasV4SubT in
+let isPredicable = 1, hasSideEffects = 0 in
class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
bits<3> MajOp, Operand AddrOp, bit isAbs>
: LDInst <(outs RC:$dst), (ins AddrOp:$addr),
@@ -3582,7 +3642,7 @@ class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
bits<3> MajOp>
- : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1>, AddrModeRel {
+ : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1>, AddrModeRel {
string ImmOpStr = !cast<string>(ImmOp);
let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3595,11 +3655,12 @@ class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
!if (!eq(ImmOpStr, "u16_1Imm"), 1,
/* u16_0Imm */ 0)));
}
+
//===----------------------------------------------------------------------===//
// Template class for predicated load instructions with
// absolute addressing mode.
//===----------------------------------------------------------------------===//
-let isPredicated = 1, hasNewValue = 1, opExtentBits = 6, opExtendable = 2 in
+let isPredicated = 1, opExtentBits = 6, opExtendable = 2 in
class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp,
bit isPredNot, bit isPredNew>
: LDInst <(outs RC:$dst), (ins PredRegs:$src1, u6Ext:$absaddr),
@@ -3611,6 +3672,7 @@ class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp,
let isPredicatedNew = isPredNew;
let isPredicatedFalse = isPredNot;
+ let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
let IClass = 0b1001;
@@ -3649,20 +3711,20 @@ multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC,
}
}
-let accessSize = ByteAccess, hasNewValue = 1, isCodeGenOnly = 0 in {
+let accessSize = ByteAccess, hasNewValue = 1 in {
defm loadrb : LD_Abs<"memb", "LDrib", IntRegs, u16_0Imm, 0b000>;
defm loadrub : LD_Abs<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>;
}
-let accessSize = HalfWordAccess, hasNewValue = 1, isCodeGenOnly = 0 in {
+let accessSize = HalfWordAccess, hasNewValue = 1 in {
defm loadrh : LD_Abs<"memh", "LDrih", IntRegs, u16_1Imm, 0b010>;
defm loadruh : LD_Abs<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>;
}
-let accessSize = WordAccess, hasNewValue = 1, isCodeGenOnly = 0 in
+let accessSize = WordAccess, hasNewValue = 1 in
defm loadri : LD_Abs<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>;
-let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+let accessSize = DoubleWordAccess in
defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
//===----------------------------------------------------------------------===//
@@ -3672,6 +3734,7 @@ defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
// if ([!]Pv[.new]) Rx=mem[bhwd](##global)
//===----------------------------------------------------------------------===//
+let isAsmParserOnly = 1 in
class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp,
bits<3> MajOp>
: T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0>, PredNewRel {
@@ -3694,439 +3757,175 @@ def L2_loadrigp : T_LoadGP<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>;
let accessSize = DoubleWordAccess in
def L2_loadrdgp : T_LoadGP<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
-let Predicates = [HasV4T], AddedComplexity = 30 in {
-def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))),
- (L4_loadri_abs tglobaladdr: $absaddr)>;
-
-def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
- (L4_loadrb_abs tglobaladdr:$absaddr)>;
-
-def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
- (L4_loadrub_abs tglobaladdr:$absaddr)>;
-
-def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
- (L4_loadrh_abs tglobaladdr:$absaddr)>;
-
-def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
- (L4_loadruh_abs tglobaladdr:$absaddr)>;
-}
-
-def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
- (i64 (L2_loadrdgp tglobaladdr:$global))>;
-
-def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (L2_loadrigp tglobaladdr:$global))>;
-
-def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (L2_loadruhgp tglobaladdr:$global))>;
-
-def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (L2_loadrubgp tglobaladdr:$global))>;
-
-// Map from load(globaladdress) -> memw(#foo + 0)
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i64 (L2_loadrdgp tglobaladdr:$global))>;
+def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>;
+def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
+def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>;
+def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>;
// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
-let AddedComplexity = 100 in
-def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i1 (C2_tfrrp (i32 (L2_loadrbgp tglobaladdr:$global))))>;
+def: Loadam_pat<load, i1, addrga, I32toI1, L4_loadrub_abs>;
+def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>;
+
+def: Stoream_pat<store, I1, addrga, I1toI32, S2_storerbabs>;
+def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>;
+
+// Map from load(globaladdress) -> mem[u][bhwd](#foo)
+class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
+ : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))),
+ (VT (MI tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in {
+ def: LoadGP_pats <extloadi8, L2_loadrbgp>;
+ def: LoadGP_pats <sextloadi8, L2_loadrbgp>;
+ def: LoadGP_pats <zextloadi8, L2_loadrubgp>;
+ def: LoadGP_pats <extloadi16, L2_loadrhgp>;
+ def: LoadGP_pats <sextloadi16, L2_loadrhgp>;
+ def: LoadGP_pats <zextloadi16, L2_loadruhgp>;
+ def: LoadGP_pats <load, L2_loadrigp>;
+ def: LoadGP_pats <load, L2_loadrdgp, i64>;
+}
// When the Interprocedural Global Variable optimizer realizes that a certain
// global variable takes only two constant values, it shrinks the global to
// a boolean. Catch those loads here in the following 3 patterns.
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (L2_loadrbgp tglobaladdr:$global))>;
+let AddedComplexity = 100 in {
+ def: LoadGP_pats <extloadi1, L2_loadrubgp>;
+ def: LoadGP_pats <zextloadi1, L2_loadrubgp>;
+}
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (L2_loadrbgp tglobaladdr:$global))>;
+// Transfer global address into a register
+def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi s16Ext:$Rs)>;
+def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi s16Ext:$Rs)>;
+def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi s16Ext:$Rs)>;
+
+let AddedComplexity = 30 in {
+ def: Storea_pat<truncstorei8, I32, u32ImmPred, S2_storerbabs>;
+ def: Storea_pat<truncstorei16, I32, u32ImmPred, S2_storerhabs>;
+ def: Storea_pat<store, I32, u32ImmPred, S2_storeriabs>;
+}
-// Map from load(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (L2_loadrbgp tglobaladdr:$global))>;
+let AddedComplexity = 30 in {
+ def: Loada_pat<load, i32, u32ImmPred, L4_loadri_abs>;
+ def: Loada_pat<sextloadi8, i32, u32ImmPred, L4_loadrb_abs>;
+ def: Loada_pat<zextloadi8, i32, u32ImmPred, L4_loadrub_abs>;
+ def: Loada_pat<sextloadi16, i32, u32ImmPred, L4_loadrh_abs>;
+ def: Loada_pat<zextloadi16, i32, u32ImmPred, L4_loadruh_abs>;
+}
-// Map from load(globaladdress) -> memb(#foo)
+// Indexed store word - global address.
+// memw(Rs+#u6:2)=#S8
let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (L2_loadrbgp tglobaladdr:$global))>;
+def: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>;
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (L2_loadrubgp tglobaladdr:$global))>;
+// Load from a global address that has only one use in the current basic block.
+let AddedComplexity = 100 in {
+ def: Loada_pat<extloadi8, i32, addrga, L4_loadrub_abs>;
+ def: Loada_pat<sextloadi8, i32, addrga, L4_loadrb_abs>;
+ def: Loada_pat<zextloadi8, i32, addrga, L4_loadrub_abs>;
-// Map from load(globaladdress) -> memub(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (L2_loadrubgp tglobaladdr:$global))>;
+ def: Loada_pat<extloadi16, i32, addrga, L4_loadruh_abs>;
+ def: Loada_pat<sextloadi16, i32, addrga, L4_loadrh_abs>;
+ def: Loada_pat<zextloadi16, i32, addrga, L4_loadruh_abs>;
-// Map from load(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (L2_loadrhgp tglobaladdr:$global))>;
+ def: Loada_pat<load, i32, addrga, L4_loadri_abs>;
+ def: Loada_pat<load, i64, addrga, L4_loadrd_abs>;
+}
-// Map from load(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (L2_loadrhgp tglobaladdr:$global))>;
+// Store to a global address that has only one use in the current basic block.
+let AddedComplexity = 100 in {
+ def: Storea_pat<truncstorei8, I32, addrga, S2_storerbabs>;
+ def: Storea_pat<truncstorei16, I32, addrga, S2_storerhabs>;
+ def: Storea_pat<store, I32, addrga, S2_storeriabs>;
+ def: Storea_pat<store, I64, addrga, S2_storerdabs>;
-// Map from load(globaladdress) -> memuh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (L2_loadruhgp tglobaladdr:$global))>;
+ def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>;
+}
-// Map from load(globaladdress) -> memw(#foo)
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
let AddedComplexity = 100 in
-def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (L2_loadrigp tglobaladdr:$global))>;
-
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i1 (C2_tfrrp (i32 (L2_loadrbgp tglobaladdr:$global))))>;
// Transfer global address into a register
let isExtended = 1, opExtendable = 1, AddedComplexity=50, isMoveImm = 1,
-isAsCheapAsAMove = 1, isReMaterializable = 1, validSubTargets = HasV4SubT in
+isAsCheapAsAMove = 1, isReMaterializable = 1, isCodeGenOnly = 1 in
def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
"$dst = #$src1",
- [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>,
- Requires<[HasV4T]>;
+ [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>;
// Transfer a block address into a register
def : Pat<(HexagonCONST32_GP tblockaddress:$src1),
- (TFRI_V4 tblockaddress:$src1)>,
- Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 2, AddedComplexity=50,
-hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in
-def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst),
- (ins PredRegs:$src1, s16Ext:$src2),
- "if($src1) $dst = #$src2",
- []>,
- Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 2, AddedComplexity=50, isPredicatedFalse = 1,
-hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in
-def TFRI_cNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
- (ins PredRegs:$src1, s16Ext:$src2),
- "if(!$src1) $dst = #$src2",
- []>,
- Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 2, AddedComplexity=50,
-hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in
-def TFRI_cdnPt_V4 : ALU32_ri<(outs IntRegs:$dst),
- (ins PredRegs:$src1, s16Ext:$src2),
- "if($src1.new) $dst = #$src2",
- []>,
- Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 2, AddedComplexity=50, isPredicatedFalse = 1,
-hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in
-def TFRI_cdnNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
- (ins PredRegs:$src1, s16Ext:$src2),
- "if(!$src1.new) $dst = #$src2",
- []>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 50, Predicates = [HasV4T] in
-def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
- (TFRI_V4 tglobaladdr:$src1)>,
- Requires<[HasV4T]>;
-
-
-// Load - Indirect with long offset: These instructions take global address
-// as an operand
-let isExtended = 1, opExtendable = 3, AddedComplexity = 40,
-validSubTargets = HasV4SubT in
-def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst),
- (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset),
- "$dst=memd($src1<<#$src2+##$offset)",
- [(set (i64 DoubleRegs:$dst),
- (load (add (shl IntRegs:$src1, u2ImmPred:$src2),
- (HexagonCONST32 tglobaladdr:$offset))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 40 in
-multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> {
-let isExtended = 1, opExtendable = 3, validSubTargets = HasV4SubT in
- def _lo_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset),
- !strconcat("$dst = ",
- !strconcat(OpcStr, "($src1<<#$src2+##$offset)")),
- [(set IntRegs:$dst,
- (i32 (OpNode (add (shl IntRegs:$src1, u2ImmPred:$src2),
- (HexagonCONST32 tglobaladdr:$offset)))))]>,
- Requires<[HasV4T]>;
-}
-
-defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>;
-defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>;
-defm LDriub_ind_anyext : LD_indirect_lo<"memub", extloadi8>;
-defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>;
-defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>;
-defm LDriuh_ind_anyext : LD_indirect_lo<"memuh", extloadi16>;
-defm LDriw_ind : LD_indirect_lo<"memw", load>;
-
-let AddedComplexity = 40 in
-def : Pat <(i32 (sextloadi8 (add IntRegs:$src1,
- (NumUsesBelowThresCONST32 tglobaladdr:$offset)))),
- (i32 (LDrib_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 40 in
-def : Pat <(i32 (zextloadi8 (add IntRegs:$src1,
- (NumUsesBelowThresCONST32 tglobaladdr:$offset)))),
- (i32 (LDriub_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>,
- Requires<[HasV4T]>;
-
-let Predicates = [HasV4T], AddedComplexity = 30 in {
-def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
- (S2_storerbabs u0AlwaysExtPred:$src2, IntRegs: $src1)>;
-
-def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
- (S2_storerhabs u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+ (TFRI_V4 tblockaddress:$src1)>;
-def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
- (S2_storeriabs u0AlwaysExtPred:$src2, IntRegs: $src1)>;
-}
-
-let Predicates = [HasV4T], AddedComplexity = 30 in {
-def : Pat<(i32 (load u0AlwaysExtPred:$src)),
- (L4_loadri_abs u0AlwaysExtPred:$src)>;
-
-def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)),
- (L4_loadrb_abs u0AlwaysExtPred:$src)>;
-
-def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)),
- (L4_loadrub_abs u0AlwaysExtPred:$src)>;
-
-def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)),
- (L4_loadrh_abs u0AlwaysExtPred:$src)>;
-
-def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)),
- (L4_loadruh_abs u0AlwaysExtPred:$src)>;
-}
-
-// Indexed store word - global address.
-// memw(Rs+#u6:2)=#S8
-let AddedComplexity = 10 in
-def STriw_offset_ext_V4 : STInst<(outs),
- (ins IntRegs:$src1, u6_2Imm:$src2, globaladdress:$src3),
- "memw($src1+#$src2) = ##$src3",
- [(store (HexagonCONST32 tglobaladdr:$src3),
- (add IntRegs:$src1, u6_2ImmPred:$src2))]>,
- Requires<[HasV4T]>;
-
-def : Pat<(i64 (ctlz (i64 DoubleRegs:$src1))),
- (i64 (A4_combineir (i32 0), (i32 (CTLZ64_rr DoubleRegs:$src1))))>,
- Requires<[HasV4T]>;
-
-def : Pat<(i64 (cttz (i64 DoubleRegs:$src1))),
- (i64 (A4_combineir (i32 0), (i32 (CTTZ64_rr DoubleRegs:$src1))))>,
- Requires<[HasV4T]>;
-
-
-// i8 -> i64 loads
-// We need a complexity of 120 here to override preceding handling of
-// zextloadi8.
-let Predicates = [HasV4T], AddedComplexity = 120 in {
-def: Pat <(i64 (extloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (A4_combineir 0, (L4_loadrb_abs tglobaladdr:$addr)))>;
-
-def: Pat <(i64 (zextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (A4_combineir 0, (L4_loadrub_abs tglobaladdr:$addr)))>;
-
-def: Pat <(i64 (sextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (A2_sxtw (L4_loadrb_abs tglobaladdr:$addr)))>;
-
-def: Pat <(i64 (extloadi8 FoldGlobalAddr:$addr)),
- (i64 (A4_combineir 0, (L4_loadrb_abs FoldGlobalAddr:$addr)))>;
-
-def: Pat <(i64 (zextloadi8 FoldGlobalAddr:$addr)),
- (i64 (A4_combineir 0, (L4_loadrub_abs FoldGlobalAddr:$addr)))>;
+let AddedComplexity = 50 in
+def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
+ (TFRI_V4 tglobaladdr:$src1)>;
-def: Pat <(i64 (sextloadi8 FoldGlobalAddr:$addr)),
- (i64 (A2_sxtw (L4_loadrb_abs FoldGlobalAddr:$addr)))>;
-}
-// i16 -> i64 loads
+// i8/i16/i32 -> i64 loads
// We need a complexity of 120 here to override preceding handling of
-// zextloadi16.
+// zextload.
let AddedComplexity = 120 in {
-def: Pat <(i64 (extloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (A4_combineir 0, (L4_loadrh_abs tglobaladdr:$addr)))>,
- Requires<[HasV4T]>;
-
-def: Pat <(i64 (zextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (A4_combineir 0, (L4_loadruh_abs tglobaladdr:$addr)))>,
- Requires<[HasV4T]>;
+ def: Loadam_pat<extloadi8, i64, addrga, Zext64, L4_loadrub_abs>;
+ def: Loadam_pat<sextloadi8, i64, addrga, Sext64, L4_loadrb_abs>;
+ def: Loadam_pat<zextloadi8, i64, addrga, Zext64, L4_loadrub_abs>;
-def: Pat <(i64 (sextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (A2_sxtw (L4_loadrh_abs tglobaladdr:$addr)))>,
- Requires<[HasV4T]>;
+ def: Loadam_pat<extloadi16, i64, addrga, Zext64, L4_loadruh_abs>;
+ def: Loadam_pat<sextloadi16, i64, addrga, Sext64, L4_loadrh_abs>;
+ def: Loadam_pat<zextloadi16, i64, addrga, Zext64, L4_loadruh_abs>;
-def: Pat <(i64 (extloadi16 FoldGlobalAddr:$addr)),
- (i64 (A4_combineir 0, (L4_loadrh_abs FoldGlobalAddr:$addr)))>,
- Requires<[HasV4T]>;
-
-def: Pat <(i64 (zextloadi16 FoldGlobalAddr:$addr)),
- (i64 (A4_combineir 0, (L4_loadruh_abs FoldGlobalAddr:$addr)))>,
- Requires<[HasV4T]>;
-
-def: Pat <(i64 (sextloadi16 FoldGlobalAddr:$addr)),
- (i64 (A2_sxtw (L4_loadrh_abs FoldGlobalAddr:$addr)))>,
- Requires<[HasV4T]>;
+ def: Loadam_pat<extloadi32, i64, addrga, Zext64, L4_loadri_abs>;
+ def: Loadam_pat<sextloadi32, i64, addrga, Sext64, L4_loadri_abs>;
+ def: Loadam_pat<zextloadi32, i64, addrga, Zext64, L4_loadri_abs>;
}
-// i32->i64 loads
-// We need a complexity of 120 here to override preceding handling of
-// zextloadi32.
-let AddedComplexity = 120 in {
-def: Pat <(i64 (extloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (A4_combineir 0, (L4_loadri_abs tglobaladdr:$addr)))>,
- Requires<[HasV4T]>;
-def: Pat <(i64 (zextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (A4_combineir 0, (L4_loadri_abs tglobaladdr:$addr)))>,
- Requires<[HasV4T]>;
+let AddedComplexity = 100 in {
+ def: Loada_pat<extloadi8, i32, addrgp, L4_loadrub_abs>;
+ def: Loada_pat<sextloadi8, i32, addrgp, L4_loadrb_abs>;
+ def: Loada_pat<zextloadi8, i32, addrgp, L4_loadrub_abs>;
-def: Pat <(i64 (sextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
- (i64 (A2_sxtw (L4_loadri_abs tglobaladdr:$addr)))>,
- Requires<[HasV4T]>;
+ def: Loada_pat<extloadi16, i32, addrgp, L4_loadruh_abs>;
+ def: Loada_pat<sextloadi16, i32, addrgp, L4_loadrh_abs>;
+ def: Loada_pat<zextloadi16, i32, addrgp, L4_loadruh_abs>;
-def: Pat <(i64 (extloadi32 FoldGlobalAddr:$addr)),
- (i64 (A4_combineir 0, (L4_loadri_abs FoldGlobalAddr:$addr)))>,
- Requires<[HasV4T]>;
-
-def: Pat <(i64 (zextloadi32 FoldGlobalAddr:$addr)),
- (i64 (A4_combineir 0, (L4_loadri_abs FoldGlobalAddr:$addr)))>,
- Requires<[HasV4T]>;
-
-def: Pat <(i64 (sextloadi32 FoldGlobalAddr:$addr)),
- (i64 (A2_sxtw (L4_loadri_abs FoldGlobalAddr:$addr)))>,
- Requires<[HasV4T]>;
+ def: Loada_pat<load, i32, addrgp, L4_loadri_abs>;
+ def: Loada_pat<load, i64, addrgp, L4_loadrd_abs>;
}
-// Indexed store double word - global address.
-// memw(Rs+#u6:2)=#S8
-let AddedComplexity = 10 in
-def STrih_offset_ext_V4 : STInst<(outs),
- (ins IntRegs:$src1, u6_1Imm:$src2, globaladdress:$src3),
- "memh($src1+#$src2) = ##$src3",
- [(truncstorei16 (HexagonCONST32 tglobaladdr:$src3),
- (add IntRegs:$src1, u6_1ImmPred:$src2))]>,
- Requires<[HasV4T]>;
-// Map from store(globaladdress + x) -> memd(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(store (i64 DoubleRegs:$src1),
- FoldGlobalAddrGP:$addr),
- (S2_storerdabs FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_64 FoldGlobalAddrGP:$addr,
- (i64 DoubleRegs:$src1)),
- (S2_storerdabs FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memb(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei8 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
- (S2_storerbabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_8 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
- (S2_storerbabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memh(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei16 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
- (S2_storerhabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_16 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
- (S2_storerhabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memw(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(store (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
- (S2_storeriabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_32 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
- (S2_storeriabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memd(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(i64 (load FoldGlobalAddrGP:$addr)),
- (i64 (L4_loadrd_abs FoldGlobalAddrGP:$addr))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_load_64 FoldGlobalAddrGP:$addr),
- (i64 (L4_loadrd_abs FoldGlobalAddrGP:$addr))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(i32 (extloadi8 FoldGlobalAddrGP:$addr)),
- (i32 (L4_loadrb_abs FoldGlobalAddrGP:$addr))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(i32 (sextloadi8 FoldGlobalAddrGP:$addr)),
- (i32 (L4_loadrb_abs FoldGlobalAddrGP:$addr))>,
- Requires<[HasV4T]>;
-
-//let AddedComplexity = 100 in
-let AddedComplexity = 100 in
-def : Pat<(i32 (extloadi16 FoldGlobalAddrGP:$addr)),
- (i32 (L4_loadrh_abs FoldGlobalAddrGP:$addr))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memh(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(i32 (sextloadi16 FoldGlobalAddrGP:$addr)),
- (i32 (L4_loadrh_abs FoldGlobalAddrGP:$addr))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memuh(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(i32 (zextloadi16 FoldGlobalAddrGP:$addr)),
- (i32 (L4_loadruh_abs FoldGlobalAddrGP:$addr))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_load_16 FoldGlobalAddrGP:$addr),
- (i32 (L4_loadruh_abs FoldGlobalAddrGP:$addr))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memub(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(i32 (zextloadi8 FoldGlobalAddrGP:$addr)),
- (i32 (L4_loadrub_abs FoldGlobalAddrGP:$addr))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_load_8 FoldGlobalAddrGP:$addr),
- (i32 (L4_loadrub_abs FoldGlobalAddrGP:$addr))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memw(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(i32 (load FoldGlobalAddrGP:$addr)),
- (i32 (L4_loadri_abs FoldGlobalAddrGP:$addr))>,
- Requires<[HasV4T]>;
+let AddedComplexity = 100 in {
+ def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbabs>;
+ def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhabs>;
+ def: Storea_pat<store, I32, addrgp, S2_storeriabs>;
+ def: Storea_pat<store, I64, addrgp, S2_storerdabs>;
+}
-def : Pat<(atomic_load_32 FoldGlobalAddrGP:$addr),
- (i32 (L4_loadri_abs FoldGlobalAddrGP:$addr))>,
- Requires<[HasV4T]>;
+def: Loada_pat<atomic_load_8, i32, addrgp, L4_loadrub_abs>;
+def: Loada_pat<atomic_load_16, i32, addrgp, L4_loadruh_abs>;
+def: Loada_pat<atomic_load_32, i32, addrgp, L4_loadri_abs>;
+def: Loada_pat<atomic_load_64, i64, addrgp, L4_loadrd_abs>;
+
+def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbabs>;
+def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhabs>;
+def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storeriabs>;
+def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdabs>;
+
+let Constraints = "@earlyclobber $dst" in
+def Insert4 : PseudoM<(outs DoubleRegs:$dst), (ins IntRegs:$a, IntRegs:$b,
+ IntRegs:$c, IntRegs:$d),
+ ".error \"Should never try to emit Insert4\"",
+ [(set (i64 DoubleRegs:$dst),
+ (or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))),
+ (i32 16)),
+ (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))),
+ (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))),
+ (i32 32))),
+ (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))))]>;
//===----------------------------------------------------------------------===//
// :raw for of boundscheck:hi:lo insns
//===----------------------------------------------------------------------===//
// A4_boundscheck_lo: Detect if a register is within bounds.
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0 in
def A4_boundscheck_lo: ALU64Inst <
(outs PredRegs:$Pd),
(ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
@@ -4146,7 +3945,7 @@ def A4_boundscheck_lo: ALU64Inst <
}
// A4_boundscheck_hi: Detect if a register is within bounds.
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0 in
def A4_boundscheck_hi: ALU64Inst <
(outs PredRegs:$Pd),
(ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
@@ -4165,13 +3964,13 @@ def A4_boundscheck_hi: ALU64Inst <
let Inst{12-8} = Rtt;
}
-let hasSideEffects = 0 in
+let hasSideEffects = 0, isAsmParserOnly = 1 in
def A4_boundscheck : MInst <
(outs PredRegs:$Pd), (ins IntRegs:$Rs, DoubleRegs:$Rtt),
"$Pd=boundscheck($Rs,$Rtt)">;
// A4_tlbmatch: Detect if a VA/ASID matches a TLB entry.
-let isPredicateLate = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+let isPredicateLate = 1, hasSideEffects = 0 in
def A4_tlbmatch : ALU64Inst<(outs PredRegs:$Pd),
(ins DoubleRegs:$Rs, IntRegs:$Rt),
"$Pd = tlbmatch($Rs, $Rt)",
@@ -4198,7 +3997,7 @@ def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH,
// Use LD0Inst for dcfetch, but set "mayLoad" to 0 because this doesn't
// really do a load.
-let hasSideEffects = 1, mayLoad = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 1, mayLoad = 0 in
def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3),
"dcfetch($Rs + #$u11_3)",
[(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3)],
@@ -4220,12 +4019,12 @@ def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3),
let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
isPredicated = 1, isPredicatedNew = 1, isExtendable = 1,
opExtentBits = 11, opExtentAlign = 2, opExtendable = 1,
- isTerminator = 1, validSubTargets = HasV4SubT in
+ isTerminator = 1 in
class CJInst_tstbit_R0<string px, bit np, string tnt>
: InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
""#px#" = tstbit($Rs, #0); if ("
#!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND, TypeCOMPOUND> {
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
bits<4> Rs;
bits<11> r9_2;
@@ -4248,14 +4047,14 @@ class CJInst_tstbit_R0<string px, bit np, string tnt>
let Inst{7-1} = r9_2{8-2};
}
-let Defs = [PC, P0], Uses = [P0], isCodeGenOnly = 0 in {
+let Defs = [PC, P0], Uses = [P0] in {
def J4_tstbit0_tp0_jump_nt : CJInst_tstbit_R0<"p0", 0, "nt">;
def J4_tstbit0_tp0_jump_t : CJInst_tstbit_R0<"p0", 0, "t">;
def J4_tstbit0_fp0_jump_nt : CJInst_tstbit_R0<"p0", 1, "nt">;
def J4_tstbit0_fp0_jump_t : CJInst_tstbit_R0<"p0", 1, "t">;
}
-let Defs = [PC, P1], Uses = [P1], isCodeGenOnly = 0 in {
+let Defs = [PC, P1], Uses = [P1] in {
def J4_tstbit0_tp1_jump_nt : CJInst_tstbit_R0<"p1", 0, "nt">;
def J4_tstbit0_tp1_jump_t : CJInst_tstbit_R0<"p1", 0, "t">;
def J4_tstbit0_fp1_jump_nt : CJInst_tstbit_R0<"p1", 1, "nt">;
@@ -4266,12 +4065,12 @@ let Defs = [PC, P1], Uses = [P1], isCodeGenOnly = 0 in {
let isBranch = 1, hasSideEffects = 0,
isExtentSigned = 1, isPredicated = 1, isPredicatedNew = 1,
isExtendable = 1, opExtentBits = 11, opExtentAlign = 2,
- opExtendable = 2, isTerminator = 1, validSubTargets = HasV4SubT in
+ opExtendable = 2, isTerminator = 1 in
class CJInst_RR<string px, string op, bit np, string tnt>
: InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2),
""#px#" = cmp."#op#"($Rs, $Rt); if ("
#!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND, TypeCOMPOUND> {
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
bits<4> Rs;
bits<4> Rt;
bits<11> r9_2;
@@ -4314,21 +4113,18 @@ multiclass T_pnp_CJInst_RR<string op>{
defm J4_cmp#NAME#_f : T_tnt_CJInst_RR<op, 1>;
}
// TypeCJ Instructions compare RR and jump
-let isCodeGenOnly = 0 in {
defm eq : T_pnp_CJInst_RR<"eq">;
defm gt : T_pnp_CJInst_RR<"gt">;
defm gtu : T_pnp_CJInst_RR<"gtu">;
-}
let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11,
- opExtentAlign = 2, opExtendable = 2, isTerminator = 1,
- validSubTargets = HasV4SubT in
+ opExtentAlign = 2, opExtendable = 2, isTerminator = 1 in
class CJInst_RU5<string px, string op, bit np, string tnt>
: InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2),
""#px#" = cmp."#op#"($Rs, #$U5); if ("
#!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND, TypeCOMPOUND> {
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
bits<4> Rs;
bits<5> U5;
bits<11> r9_2;
@@ -4371,21 +4167,19 @@ multiclass T_pnp_CJInst_RU5<string op>{
defm J4_cmp#NAME#i_f : T_tnt_CJInst_RU5<op, 1>;
}
// TypeCJ Instructions compare RI and jump
-let isCodeGenOnly = 0 in {
defm eq : T_pnp_CJInst_RU5<"eq">;
defm gt : T_pnp_CJInst_RU5<"gt">;
defm gtu : T_pnp_CJInst_RU5<"gtu">;
-}
let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1,
isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 1,
- isTerminator = 1, validSubTargets = HasV4SubT in
+ isTerminator = 1 in
class CJInst_Rn1<string px, string op, bit np, string tnt>
: InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
""#px#" = cmp."#op#"($Rs,#-1); if ("
#!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND, TypeCOMPOUND> {
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
bits<4> Rs;
bits<11> r9_2;
@@ -4427,16 +4221,13 @@ multiclass T_pnp_CJInst_Rn1<string op>{
defm J4_cmp#NAME#n1_f : T_tnt_CJInst_Rn1<op, 1>;
}
// TypeCJ Instructions compare -1 and jump
-let isCodeGenOnly = 0 in {
defm eq : T_pnp_CJInst_Rn1<"eq">;
defm gt : T_pnp_CJInst_Rn1<"gt">;
-}
// J4_jumpseti: Direct unconditional jump and set register to immediate.
let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11,
- opExtentAlign = 2, opExtendable = 2, validSubTargets = HasV4SubT,
- isCodeGenOnly = 0 in
+ opExtentAlign = 2, opExtendable = 2 in
def J4_jumpseti: CJInst <
(outs IntRegs:$Rd),
(ins u6Imm:$U6, brtarget:$r9_2),
@@ -4456,8 +4247,7 @@ def J4_jumpseti: CJInst <
// J4_jumpsetr: Direct unconditional jump and transfer register.
let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11,
- opExtentAlign = 2, opExtendable = 2, validSubTargets = HasV4SubT,
- isCodeGenOnly = 0 in
+ opExtentAlign = 2, opExtendable = 2 in
def J4_jumpsetr: CJInst <
(outs IntRegs:$Rd),
(ins IntRegs:$Rs, brtarget:$r9_2),
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV5.td b/lib/Target/Hexagon/HexagonInstrInfoV5.td
index 5674aa3ccd83..337f4ea2184a 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV5.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV5.td
@@ -15,7 +15,34 @@
// XTYPE/MPY
//===----------------------------------------------------------------------===//
-let isCodeGenOnly = 0 in
+ //Rdd[+]=vrmpybsu(Rss,Rtt)
+let Predicates = [HasV5T] in {
+ def M5_vrmpybsu: T_XTYPE_Vect<"vrmpybsu", 0b110, 0b001, 0>;
+ def M5_vrmacbsu: T_XTYPE_Vect_acc<"vrmpybsu", 0b110, 0b001, 0>;
+
+ //Rdd[+]=vrmpybu(Rss,Rtt)
+ def M5_vrmpybuu: T_XTYPE_Vect<"vrmpybu", 0b100, 0b001, 0>;
+ def M5_vrmacbuu: T_XTYPE_Vect_acc<"vrmpybu", 0b100, 0b001, 0>;
+
+ def M5_vdmpybsu: T_M2_vmpy<"vdmpybsu", 0b101, 0b001, 0, 0, 1>;
+ def M5_vdmacbsu: T_M2_vmpy_acc_sat <"vdmpybsu", 0b001, 0b001, 0, 0>;
+}
+
+// Vector multiply bytes
+// Rdd=vmpyb[s]u(Rs,Rt)
+let Predicates = [HasV5T] in {
+ def M5_vmpybsu: T_XTYPE_mpy64 <"vmpybsu", 0b010, 0b001, 0, 0, 0>;
+ def M5_vmpybuu: T_XTYPE_mpy64 <"vmpybu", 0b100, 0b001, 0, 0, 0>;
+
+ // Rxx+=vmpyb[s]u(Rs,Rt)
+ def M5_vmacbsu: T_XTYPE_mpy64_acc <"vmpybsu", "+", 0b110, 0b001, 0, 0, 0>;
+ def M5_vmacbuu: T_XTYPE_mpy64_acc <"vmpybu", "+", 0b100, 0b001, 0, 0, 0>;
+
+ // Rd=vaddhub(Rss,Rtt):sat
+ let hasNewValue = 1, opNewValue = 0 in
+ def A5_vaddhubs: T_S3op_1 <"vaddhub", IntRegs, 0b01, 0b001, 0, 1>;
+}
+
def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm,
[(set I64:$dst,
(sra (i64 (add (i64 (sra I64:$src1, u6ImmPred:$src2)), 1)),
@@ -25,41 +52,43 @@ def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm,
let Inst{13-8} = src2;
}
-let isCodeGenOnly = 0 in
+let isAsmParserOnly = 1 in
+def S2_asr_i_p_rnd_goodsyntax
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ "$dst = asrrnd($src1, #$src2)">;
+
def C4_fastcorner9 : T_LOGICAL_2OP<"fastcorner9", 0b000, 0, 0>,
Requires<[HasV5T]> {
let Inst{13,7,4} = 0b111;
}
-let isCodeGenOnly = 0 in
def C4_fastcorner9_not : T_LOGICAL_2OP<"!fastcorner9", 0b000, 0, 0>,
Requires<[HasV5T]> {
let Inst{20,13,7,4} = 0b1111;
}
-def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [
- SDTCisVT<0, f32>,
- SDTCisPtrTy<1>]>;
-def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>;
+def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, f32>,
+ SDTCisPtrTy<1>]>;
+def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>;
-let isReMaterializable = 1, isMoveImm = 1 in
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
def FCONST32_nsdata : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
- "$dst = CONST32(#$global)",
- [(set (f32 IntRegs:$dst),
- (HexagonFCONST32 tglobaladdr:$global))]>,
- Requires<[HasV5T]>;
+ "$dst = CONST32(#$global)",
+ [(set F32:$dst,
+ (HexagonFCONST32 tglobaladdr:$global))]>,
+ Requires<[HasV5T]>;
-let isReMaterializable = 1, isMoveImm = 1 in
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
def CONST64_Float_Real : LDInst<(outs DoubleRegs:$dst), (ins f64imm:$src1),
- "$dst = CONST64(#$src1)",
- [(set DoubleRegs:$dst, fpimm:$src1)]>,
- Requires<[HasV5T]>;
+ "$dst = CONST64(#$src1)",
+ [(set F64:$dst, fpimm:$src1)]>,
+ Requires<[HasV5T]>;
-let isReMaterializable = 1, isMoveImm = 1 in
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1),
- "$dst = CONST32(#$src1)",
- [(set IntRegs:$dst, fpimm:$src1)]>,
- Requires<[HasV5T]>;
+ "$dst = CONST32(#$src1)",
+ [(set F32:$dst, fpimm:$src1)]>,
+ Requires<[HasV5T]>;
// Transfer immediate float.
// Only works with single precision fp value.
@@ -68,35 +97,33 @@ def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1),
// Make sure that complexity is more than the CONST32 pattern in
// HexagonInstrInfo.td patterns.
let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1,
-isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT,
-isCodeGenOnly = 1 in
+ isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT,
+ isCodeGenOnly = 1 in
def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1),
- "$dst = #$src1",
- [(set IntRegs:$dst, fpimm:$src1)]>,
- Requires<[HasV5T]>;
+ "$dst = #$src1",
+ [(set F32:$dst, fpimm:$src1)]>,
+ Requires<[HasV5T]>;
let isExtended = 1, opExtendable = 2, isPredicated = 1,
-hasSideEffects = 0, validSubTargets = HasV5SubT in
+ hasSideEffects = 0, validSubTargets = HasV5SubT, isCodeGenOnly = 1 in
def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, f32Ext:$src2),
- "if ($src1) $dst = #$src2",
- []>,
- Requires<[HasV5T]>;
+ "if ($src1) $dst = #$src2", []>,
+ Requires<[HasV5T]>;
-let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1,
-hasSideEffects = 0, validSubTargets = HasV5SubT in
+let isPseudo = 1, isExtended = 1, opExtendable = 2, isPredicated = 1,
+ isPredicatedFalse = 1, hasSideEffects = 0, validSubTargets = HasV5SubT in
def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, f32Ext:$src2),
- "if (!$src1) $dst =#$src2",
- []>,
- Requires<[HasV5T]>;
+ "if (!$src1) $dst = #$src2", []>,
+ Requires<[HasV5T]>;
def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
SDTCisVT<1, i64>]>;
def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>;
-let hasNewValue = 1, validSubTargets = HasV5SubT, isCodeGenOnly = 0 in
+let hasNewValue = 1, validSubTargets = HasV5SubT in
def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
"$Rd = popcount($Rss)",
[(set I32:$Rd, (HexagonPOPCOUNT I64:$Rss))], "", S_2op_tc_2_SLOT23>,
@@ -112,6 +139,14 @@ def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
let Inst{20-16} = Rss;
}
+defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
+defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
+
+defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
+defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
+def: Storex_simple_pat<store, F32, S2_storeri_io>;
+def: Storex_simple_pat<store, F64, S2_storerd_io>;
+
let isFP = 1, hasNewValue = 1, opNewValue = 0 in
class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp>
: MInst<(outs IntRegs:$Rd),
@@ -134,27 +169,51 @@ class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp>
let Inst{4-0} = Rd;
}
-let isCommutable = 1, isCodeGenOnly = 0 in {
+let isCommutable = 1 in {
def F2_sfadd : T_MInstFloat < "sfadd", 0b000, 0b000>;
def F2_sfmpy : T_MInstFloat < "sfmpy", 0b010, 0b000>;
}
-let isCodeGenOnly = 0 in
def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>;
-let Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in {
+def: Pat<(f32 (fadd F32:$src1, F32:$src2)),
+ (F2_sfadd F32:$src1, F32:$src2)>;
+
+def: Pat<(f32 (fsub F32:$src1, F32:$src2)),
+ (F2_sfsub F32:$src1, F32:$src2)>;
+
+def: Pat<(f32 (fmul F32:$src1, F32:$src2)),
+ (F2_sfmpy F32:$src1, F32:$src2)>;
+
+let Itinerary = M_tc_3x_SLOT23 in {
def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>;
def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>;
}
-let isCodeGenOnly = 0 in {
+let AddedComplexity = 100, Predicates = [HasV5T] in {
+ def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)),
+ F32:$src1, F32:$src2)),
+ (F2_sfmin F32:$src1, F32:$src2)>;
+
+ def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)),
+ F32:$src2, F32:$src1)),
+ (F2_sfmin F32:$src1, F32:$src2)>;
+
+ def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)),
+ F32:$src1, F32:$src2)),
+ (F2_sfmax F32:$src1, F32:$src2)>;
+
+ def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)),
+ F32:$src2, F32:$src1)),
+ (F2_sfmax F32:$src1, F32:$src2)>;
+}
+
def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>;
def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>;
-}
// F2_sfrecipa: Reciprocal approximation for division.
let isPredicateLate = 1, isFP = 1,
-hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+hasSideEffects = 0, hasNewValue = 1 in
def F2_sfrecipa: MInst <
(outs IntRegs:$Rd, PredRegs:$Pe),
(ins IntRegs:$Rs, IntRegs:$Rt),
@@ -210,7 +269,6 @@ class T_fcmp32 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
let Inst{27-21} = 0b0111111;
}
-let isCodeGenOnly = 0 in {
def F2_dfcmpeq : T_fcmp64<"dfcmp.eq", setoeq, 0b000>;
def F2_dfcmpgt : T_fcmp64<"dfcmp.gt", setogt, 0b001>;
def F2_dfcmpge : T_fcmp64<"dfcmp.ge", setoge, 0b010>;
@@ -220,6 +278,250 @@ def F2_sfcmpge : T_fcmp32<"sfcmp.ge", setoge, 0b000>;
def F2_sfcmpuo : T_fcmp32<"sfcmp.uo", setuo, 0b001>;
def F2_sfcmpeq : T_fcmp32<"sfcmp.eq", setoeq, 0b011>;
def F2_sfcmpgt : T_fcmp32<"sfcmp.gt", setogt, 0b100>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for ordered gt, ge, eq operations.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasV5T] in
+multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
+ // IntRegs
+ def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
+ (IntMI F32:$src1, F32:$src2)>;
+ // DoubleRegs
+ def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
+ (DoubleMI F64:$src1, F64:$src2)>;
+}
+
+defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>;
+defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>;
+defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations.
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
+ // IntRegs
+ def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+ (IntMI F32:$src1, F32:$src2))>;
+
+ // DoubleRegs
+ def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+ (DoubleMI F64:$src1, F64:$src2))>;
+}
+
+defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>;
+defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>;
+defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for the following dags:
+// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2))
+// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2)
+// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2)
+// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2))
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI,
+ InstHexagon DoubleMI> {
+ // IntRegs
+ def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+ (C2_not (IntMI F32:$src1, F32:$src2))>;
+ def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+ (IntMI F32:$src1, F32:$src2)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+ (IntMI F32:$src1, F32:$src2)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+ (C2_not (IntMI F32:$src1, F32:$src2))>;
+
+ // DoubleRegs
+ def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (C2_not (DoubleMI F64:$src1, F64:$src2))>;
+ def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+ (DoubleMI F64:$src1, F64:$src2)>;
+ def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (DoubleMI F64:$src1, F64:$src2)>;
+ def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+ (C2_not (DoubleMI F64:$src1, F64:$src2))>;
+}
+
+defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>;
+defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>;
+defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for the following dags:
+// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1))
+// seteq(setolt(op1, op2), 1) -> setogt(op2, op1)
+// setne(setolt(op1, op2), 0) -> setogt(op2, op1)
+// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1))
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI,
+ InstHexagon DoubleMI> {
+ // IntRegs
+ def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+ (C2_not (IntMI F32:$src2, F32:$src1))>;
+ def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+ (IntMI F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+ (IntMI F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+ (C2_not (IntMI F32:$src2, F32:$src1))>;
+
+ // DoubleRegs
+ def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (C2_not (DoubleMI F64:$src2, F64:$src1))>;
+ def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+ (DoubleMI F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (DoubleMI F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (C2_not (DoubleMI F64:$src2, F64:$src1))>;
+}
+
+defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>;
+defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>;
+
+
+// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (seto F32:$src1, F32:$src2)),
+ (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>;
+ def: Pat<(i1 (seto F32:$src1, fpimm:$src2)),
+ (C2_not (F2_sfcmpuo (TFRI_f fpimm:$src2), F32:$src1))>;
+ def: Pat<(i1 (seto F64:$src1, F64:$src2)),
+ (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>;
+ def: Pat<(i1 (seto F64:$src1, fpimm:$src2)),
+ (C2_not (F2_dfcmpuo (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
+}
+
+// Ordered lt.
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (setolt F32:$src1, F32:$src2)),
+ (F2_sfcmpgt F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setolt F32:$src1, fpimm:$src2)),
+ (F2_sfcmpgt (f32 (TFRI_f fpimm:$src2)), F32:$src1)>;
+ def: Pat<(i1 (setolt F64:$src1, F64:$src2)),
+ (F2_dfcmpgt F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setolt F64:$src1, fpimm:$src2)),
+ (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+}
+
+// Unordered lt.
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (setult F32:$src1, F32:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+ (F2_sfcmpgt F32:$src2, F32:$src1))>;
+ def: Pat<(i1 (setult F32:$src1, fpimm:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)),
+ (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1))>;
+ def: Pat<(i1 (setult F64:$src1, F64:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+ (F2_dfcmpgt F64:$src2, F64:$src1))>;
+ def: Pat<(i1 (setult F64:$src1, fpimm:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)),
+ (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
+}
+
+// Ordered le.
+let Predicates = [HasV5T] in {
+ // rs <= rt -> rt >= rs.
+ def: Pat<(i1 (setole F32:$src1, F32:$src2)),
+ (F2_sfcmpge F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setole F32:$src1, fpimm:$src2)),
+ (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>;
+
+ // Rss <= Rtt -> Rtt >= Rss.
+ def: Pat<(i1 (setole F64:$src1, F64:$src2)),
+ (F2_dfcmpge F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setole F64:$src1, fpimm:$src2)),
+ (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+}
+
+// Unordered le.
+let Predicates = [HasV5T] in {
+// rs <= rt -> rt >= rs.
+ def: Pat<(i1 (setule F32:$src1, F32:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+ (F2_sfcmpge F32:$src2, F32:$src1))>;
+ def: Pat<(i1 (setule F32:$src1, fpimm:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)),
+ (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1))>;
+ def: Pat<(i1 (setule F64:$src1, F64:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+ (F2_dfcmpge F64:$src2, F64:$src1))>;
+ def: Pat<(i1 (setule F64:$src1, fpimm:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)),
+ (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
+}
+
+// Ordered ne.
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (setone F32:$src1, F32:$src2)),
+ (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
+ def: Pat<(i1 (setone F64:$src1, F64:$src2)),
+ (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
+ def: Pat<(i1 (setone F32:$src1, fpimm:$src2)),
+ (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>;
+ def: Pat<(i1 (setone F64:$src1, fpimm:$src2)),
+ (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>;
+}
+
+// Unordered ne.
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (setune F32:$src1, F32:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+ (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>;
+ def: Pat<(i1 (setune F64:$src1, F64:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+ (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>;
+ def: Pat<(i1 (setune F32:$src1, fpimm:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)),
+ (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2))))>;
+ def: Pat<(i1 (setune F64:$src1, fpimm:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)),
+ (C2_not (F2_dfcmpeq F64:$src1,
+ (CONST64_Float_Real fpimm:$src2))))>;
+}
+
+// Besides set[o|u][comparions], we also need set[comparisons].
+let Predicates = [HasV5T] in {
+ // lt.
+ def: Pat<(i1 (setlt F32:$src1, F32:$src2)),
+ (F2_sfcmpgt F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setlt F32:$src1, fpimm:$src2)),
+ (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1)>;
+ def: Pat<(i1 (setlt F64:$src1, F64:$src2)),
+ (F2_dfcmpgt F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setlt F64:$src1, fpimm:$src2)),
+ (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+
+ // le.
+ // rs <= rt -> rt >= rs.
+ def: Pat<(i1 (setle F32:$src1, F32:$src2)),
+ (F2_sfcmpge F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setle F32:$src1, fpimm:$src2)),
+ (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>;
+
+ // Rss <= Rtt -> Rtt >= Rss.
+ def: Pat<(i1 (setle F64:$src1, F64:$src2)),
+ (F2_dfcmpge F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setle F64:$src1, fpimm:$src2)),
+ (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+
+ // ne.
+ def: Pat<(i1 (setne F32:$src1, F32:$src2)),
+ (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
+ def: Pat<(i1 (setne F64:$src1, F64:$src2)),
+ (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
+ def: Pat<(i1 (setne F32:$src1, fpimm:$src2)),
+ (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>;
+ def: Pat<(i1 (setne F64:$src1, fpimm:$src2)),
+ (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>;
}
// F2 convert template classes:
@@ -302,7 +604,6 @@ class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp,
}
// Convert single precision to double precision and vice-versa.
-let isCodeGenOnly = 0 in {
def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000,
fextend, F64, F32>;
@@ -364,10 +665,9 @@ let AddedComplexity = 20, Predicates = [HasV5T, IEEERndNearV5T] in {
def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000,
fp_to_sint, I32, F32>;
}
-}
// Fix up radicand.
-let isFP = 1, hasNewValue = 1, isCodeGenOnly = 0 in
+let isFP = 1, hasNewValue = 1 in
def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs),
"$Rd = sffixupr($Rs)",
[], "" , S_2op_tc_3or4x_SLOT23>, Requires<[HasV5T]> {
@@ -382,6 +682,14 @@ def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs),
let Inst{4-0} = Rd;
}
+// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
+let Predicates = [HasV5T] in {
+ def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>;
+ def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>;
+ def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>;
+ def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>;
+}
+
// F2_sffma: Floating-point fused multiply add.
let isFP = 1, hasNewValue = 1 in
class T_sfmpy_acc <bit isSub, bit isLib>
@@ -406,15 +714,16 @@ class T_sfmpy_acc <bit isSub, bit isLib>
let Inst{4-0} = Rx;
}
-let isCodeGenOnly = 0 in {
def F2_sffma: T_sfmpy_acc <0, 0>;
def F2_sffms: T_sfmpy_acc <1, 0>;
def F2_sffma_lib: T_sfmpy_acc <0, 1>;
def F2_sffms_lib: T_sfmpy_acc <1, 1>;
-}
+
+def : Pat <(f32 (fma F32:$src2, F32:$src3, F32:$src1)),
+ (F2_sffma F32:$src1, F32:$src2, F32:$src3)>;
// Floating-point fused multiply add w/ additional scaling (2**pu).
-let isFP = 1, hasNewValue = 1, isCodeGenOnly = 0 in
+let isFP = 1, hasNewValue = 1 in
def F2_sffma_sc: MInst <
(outs IntRegs:$Rx),
(ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt, PredRegs:$Pu),
@@ -437,11 +746,147 @@ def F2_sffma_sc: MInst <
let Inst{4-0} = Rx;
}
+let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 3,
+ isPseudo = 1, InputType = "imm" in
+def MUX_ir_f : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, f32Ext:$src3),
+ "$dst = mux($src1, $src2, #$src3)",
+ [(set F32:$dst, (f32 (select I1:$src1, F32:$src2, fpimm:$src3)))]>,
+ Requires<[HasV5T]>;
+
+let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 2,
+ isPseudo = 1, InputType = "imm" in
+def MUX_ri_f : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, f32Ext:$src2, IntRegs:$src3),
+ "$dst = mux($src1, #$src2, $src3)",
+ [(set F32:$dst, (f32 (select I1:$src1, fpimm:$src2, F32:$src3)))]>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select I1:$src1, F32:$src2, F32:$src3),
+ (C2_mux I1:$src1, F32:$src2, F32:$src3)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4),
+ (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select I1:$src1, F64:$src2, F64:$src3),
+ (C2_vmux I1:$src1, F64:$src2, F64:$src3)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4),
+ (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>,
+ Requires<[HasV5T]>;
+
+// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
+// => r0 = MUX_ir_f(p0, #i, r1)
+def: Pat<(select (not I1:$src1), fpimm:$src2, F32:$src3),
+ (MUX_ir_f I1:$src1, F32:$src3, fpimm:$src2)>,
+ Requires<[HasV5T]>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
+// => r0 = MUX_ri_f(p0, r1, #i)
+def: Pat<(select (not I1:$src1), F32:$src2, fpimm:$src3),
+ (MUX_ri_f I1:$src1, fpimm:$src3, F32:$src2)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(i32 (fp_to_sint F64:$src1)),
+ (LoReg (F2_conv_df2d_chop F64:$src1))>,
+ Requires<[HasV5T]>;
+
+//===----------------------------------------------------------------------===//
+// :natural forms of vasrh and vasrhub insns
+//===----------------------------------------------------------------------===//
+// S5_asrhub_rnd_sat: Vector arithmetic shift right by immediate with round,
+// saturate, and pack.
+let Defs = [USR_OVF], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_ASRHUB<bit isSat>
+ : SInst <(outs IntRegs:$Rd),
+ (ins DoubleRegs:$Rss, u4Imm:$u4),
+ "$Rd = vasrhub($Rss, #$u4):"#!if(isSat, "sat", "raw"),
+ [], "", S_2op_tc_2_SLOT23>,
+ Requires<[HasV5T]> {
+ bits<5> Rd;
+ bits<5> Rss;
+ bits<4> u4;
+
+ let IClass = 0b1000;
+
+ let Inst{27-21} = 0b1000011;
+ let Inst{20-16} = Rss;
+ let Inst{13-12} = 0b00;
+ let Inst{11-8} = u4;
+ let Inst{7-6} = 0b10;
+ let Inst{5} = isSat;
+ let Inst{4-0} = Rd;
+ }
+
+def S5_asrhub_rnd_sat : T_ASRHUB <0>;
+def S5_asrhub_sat : T_ASRHUB <1>;
+
+let isAsmParserOnly = 1 in
+def S5_asrhub_rnd_sat_goodsyntax
+ : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, u4Imm:$u4),
+ "$Rd = vasrhub($Rss, #$u4):rnd:sat">, Requires<[HasV5T]>;
+
+// S5_vasrhrnd: Vector arithmetic shift right by immediate with round.
+let hasSideEffects = 0 in
+def S5_vasrhrnd : SInst <(outs DoubleRegs:$Rdd),
+ (ins DoubleRegs:$Rss, u4Imm:$u4),
+ "$Rdd = vasrh($Rss, #$u4):raw">,
+ Requires<[HasV5T]> {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<4> u4;
+
+ let IClass = 0b1000;
+
+ let Inst{27-21} = 0b0000001;
+ let Inst{20-16} = Rss;
+ let Inst{13-12} = 0b00;
+ let Inst{11-8} = u4;
+ let Inst{7-5} = 0b000;
+ let Inst{4-0} = Rdd;
+ }
+
+let isAsmParserOnly = 1 in
+def S5_vasrhrnd_goodsyntax
+ : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, u4Imm:$u4),
+ "$Rdd = vasrh($Rss,#$u4):rnd">, Requires<[HasV5T]>;
+
+// Floating point reciprocal square root approximation
+let Uses = [USR], isPredicateLate = 1, isFP = 1,
+ hasSideEffects = 0, hasNewValue = 1, opNewValue = 0,
+ validSubTargets = HasV5SubT in
+def F2_sfinvsqrta: SInst <
+ (outs IntRegs:$Rd, PredRegs:$Pe),
+ (ins IntRegs:$Rs),
+ "$Rd, $Pe = sfinvsqrta($Rs)" > ,
+ Requires<[HasV5T]> {
+ bits<5> Rd;
+ bits<2> Pe;
+ bits<5> Rs;
+
+ let IClass = 0b1000;
+
+ let Inst{27-21} = 0b1011111;
+ let Inst{20-16} = Rs;
+ let Inst{7} = 0b0;
+ let Inst{6-5} = Pe;
+ let Inst{4-0} = Rd;
+ }
+
+// Complex multiply 32x16
+let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in {
+ def M4_cmpyi_whc : T_S3op_8<"cmpyiwh", 0b101, 1, 1, 1, 1>;
+ def M4_cmpyr_whc : T_S3op_8<"cmpyrwh", 0b111, 1, 1, 1, 1>;
+}
+
// Classify floating-point value
-let isFP = 1, isCodeGenOnly = 0 in
+let isFP = 1 in
def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>;
-let isFP = 1, isCodeGenOnly = 0 in
+let isFP = 1 in
def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5),
"$Pd = dfclass($Rss, #$u5)",
[], "" , ALU64_tc_2early_SLOT23 > , Requires<[HasV5T]> {
@@ -459,7 +904,6 @@ def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5),
}
// Instructions to create floating point constant
-let hasNewValue = 1, opNewValue = 0 in
class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg>
: ALU64Inst<(outs RC:$dst), (ins u10Imm:$src),
"$dst = "#mnemonic#"(#$src)"#!if(isNeg, ":neg", ":pos"),
@@ -476,546 +920,13 @@ class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg>
let Inst{4-0} = dst;
}
-let isCodeGenOnly = 0 in {
+let hasNewValue = 1, opNewValue = 0 in {
def F2_sfimm_p : T_fimm <"sfmake", IntRegs, 0b0110, 0>;
def F2_sfimm_n : T_fimm <"sfmake", IntRegs, 0b0110, 1>;
-def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>;
-def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>;
}
-// Convert single precision to double precision and vice-versa.
-def CONVERT_sf2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
- "$dst = convert_sf2df($src)",
- [(set DoubleRegs:$dst, (fextend IntRegs:$src))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_df2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
- "$dst = convert_df2sf($src)",
- [(set IntRegs:$dst, (fround DoubleRegs:$src))]>,
- Requires<[HasV5T]>;
-
-
-// Load.
-def LDrid_f : LDInst<(outs DoubleRegs:$dst),
- (ins MEMri:$addr),
- "$dst = memd($addr)",
- [(set DoubleRegs:$dst, (f64 (load ADDRriS11_3:$addr)))]>,
- Requires<[HasV5T]>;
-
-
-let AddedComplexity = 20 in
-def LDrid_indexed_f : LDInst<(outs DoubleRegs:$dst),
- (ins IntRegs:$src1, s11_3Imm:$offset),
- "$dst = memd($src1+#$offset)",
- [(set DoubleRegs:$dst, (f64 (load (add IntRegs:$src1,
- s11_3ImmPred:$offset))))]>,
- Requires<[HasV5T]>;
-
-def LDriw_f : LDInst<(outs IntRegs:$dst),
- (ins MEMri:$addr), "$dst = memw($addr)",
- [(set IntRegs:$dst, (f32 (load ADDRriS11_2:$addr)))]>,
- Requires<[HasV5T]>;
-
-
-let AddedComplexity = 20 in
-def LDriw_indexed_f : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s11_2Imm:$offset),
- "$dst = memw($src1+#$offset)",
- [(set IntRegs:$dst, (f32 (load (add IntRegs:$src1,
- s11_2ImmPred:$offset))))]>,
- Requires<[HasV5T]>;
-
-// Store.
-def STriw_f : STInst<(outs),
- (ins MEMri:$addr, IntRegs:$src1),
- "memw($addr) = $src1",
- [(store (f32 IntRegs:$src1), ADDRriS11_2:$addr)]>,
- Requires<[HasV5T]>;
-
-let AddedComplexity = 10 in
-def STriw_indexed_f : STInst<(outs),
- (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
- "memw($src1+#$src2) = $src3",
- [(store (f32 IntRegs:$src3),
- (add IntRegs:$src1, s11_2ImmPred:$src2))]>,
- Requires<[HasV5T]>;
-
-def STrid_f : STInst<(outs),
- (ins MEMri:$addr, DoubleRegs:$src1),
- "memd($addr) = $src1",
- [(store (f64 DoubleRegs:$src1), ADDRriS11_2:$addr)]>,
- Requires<[HasV5T]>;
-
-// Indexed store double word.
-let AddedComplexity = 10 in
-def STrid_indexed_f : STInst<(outs),
- (ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3),
- "memd($src1+#$src2) = $src3",
- [(store (f64 DoubleRegs:$src3),
- (add IntRegs:$src1, s11_3ImmPred:$src2))]>,
- Requires<[HasV5T]>;
-
-
-// Add
-let isCommutable = 1 in
-def fADD_rr : ALU64_rr<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = sfadd($src1, $src2)",
- [(set IntRegs:$dst, (fadd IntRegs:$src1, IntRegs:$src2))]>,
- Requires<[HasV5T]>;
-
-let isCommutable = 1 in
-def fADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2),
- "$dst = dfadd($src1, $src2)",
- [(set DoubleRegs:$dst, (fadd DoubleRegs:$src1,
- DoubleRegs:$src2))]>,
- Requires<[HasV5T]>;
-
-def fSUB_rr : ALU64_rr<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = sfsub($src1, $src2)",
- [(set IntRegs:$dst, (fsub IntRegs:$src1, IntRegs:$src2))]>,
- Requires<[HasV5T]>;
-
-def fSUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2),
- "$dst = dfsub($src1, $src2)",
- [(set DoubleRegs:$dst, (fsub DoubleRegs:$src1,
- DoubleRegs:$src2))]>,
- Requires<[HasV5T]>;
-
-let isCommutable = 1 in
-def fMUL_rr : ALU64_rr<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = sfmpy($src1, $src2)",
- [(set IntRegs:$dst, (fmul IntRegs:$src1, IntRegs:$src2))]>,
- Requires<[HasV5T]>;
-
-let isCommutable = 1 in
-def fMUL64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
- DoubleRegs:$src2),
- "$dst = dfmpy($src1, $src2)",
- [(set DoubleRegs:$dst, (fmul DoubleRegs:$src1,
- DoubleRegs:$src2))]>,
- Requires<[HasV5T]>;
-
-// Compare.
-let isCompare = 1 in {
-multiclass FCMP64_rr<string OpcStr, PatFrag OpNode> {
- def _rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
- [(set PredRegs:$dst,
- (OpNode (f64 DoubleRegs:$b), (f64 DoubleRegs:$c)))]>,
- Requires<[HasV5T]>;
-}
-
-multiclass FCMP32_rr<string OpcStr, PatFrag OpNode> {
- def _rr : ALU64_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
- [(set PredRegs:$dst,
- (OpNode (f32 IntRegs:$b), (f32 IntRegs:$c)))]>,
- Requires<[HasV5T]>;
-}
-}
-
-defm FCMPOEQ64 : FCMP64_rr<"dfcmp.eq", setoeq>;
-defm FCMPUEQ64 : FCMP64_rr<"dfcmp.eq", setueq>;
-defm FCMPOGT64 : FCMP64_rr<"dfcmp.gt", setogt>;
-defm FCMPUGT64 : FCMP64_rr<"dfcmp.gt", setugt>;
-defm FCMPOGE64 : FCMP64_rr<"dfcmp.ge", setoge>;
-defm FCMPUGE64 : FCMP64_rr<"dfcmp.ge", setuge>;
-
-defm FCMPOEQ32 : FCMP32_rr<"sfcmp.eq", setoeq>;
-defm FCMPUEQ32 : FCMP32_rr<"sfcmp.eq", setueq>;
-defm FCMPOGT32 : FCMP32_rr<"sfcmp.gt", setogt>;
-defm FCMPUGT32 : FCMP32_rr<"sfcmp.gt", setugt>;
-defm FCMPOGE32 : FCMP32_rr<"sfcmp.ge", setoge>;
-defm FCMPUGE32 : FCMP32_rr<"sfcmp.ge", setuge>;
-
-// olt.
-def : Pat <(i1 (setolt (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
- (i1 (FCMPOGT32_rr IntRegs:$src2, IntRegs:$src1))>,
- Requires<[HasV5T]>;
-
-def : Pat <(i1 (setolt (f32 IntRegs:$src1), (fpimm:$src2))),
- (i1 (FCMPOGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>,
- Requires<[HasV5T]>;
-
-def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
- (i1 (FCMPOGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
- Requires<[HasV5T]>;
-
-def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (fpimm:$src2))),
- (i1 (FCMPOGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
- (f64 DoubleRegs:$src1)))>,
- Requires<[HasV5T]>;
-
-// gt.
-def : Pat <(i1 (setugt (f64 DoubleRegs:$src1), (fpimm:$src2))),
- (i1 (FCMPUGT64_rr (f64 DoubleRegs:$src1),
- (f64 (CONST64_Float_Real fpimm:$src2))))>,
- Requires<[HasV5T]>;
-
-def : Pat <(i1 (setugt (f32 IntRegs:$src1), (fpimm:$src2))),
- (i1 (FCMPUGT32_rr (f32 IntRegs:$src1), (f32 (TFRI_f fpimm:$src2))))>,
- Requires<[HasV5T]>;
-
-// ult.
-def : Pat <(i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
- (i1 (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1))>,
- Requires<[HasV5T]>;
-
-def : Pat <(i1 (setult (f32 IntRegs:$src1), (fpimm:$src2))),
- (i1 (FCMPUGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>,
- Requires<[HasV5T]>;
-
-def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
- (i1 (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
- Requires<[HasV5T]>;
-
-def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (fpimm:$src2))),
- (i1 (FCMPUGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
- (f64 DoubleRegs:$src1)))>,
- Requires<[HasV5T]>;
-
-// le.
-// rs <= rt -> rt >= rs.
-def : Pat<(i1 (setole (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
- (i1 (FCMPOGE32_rr IntRegs:$src2, IntRegs:$src1))>,
- Requires<[HasV5T]>;
-
-def : Pat<(i1 (setole (f32 IntRegs:$src1), (fpimm:$src2))),
- (i1 (FCMPOGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>,
- Requires<[HasV5T]>;
-
-
-// Rss <= Rtt -> Rtt >= Rss.
-def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
- (i1 (FCMPOGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
- Requires<[HasV5T]>;
-
-def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (fpimm:$src2))),
- (i1 (FCMPOGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
- DoubleRegs:$src1))>,
- Requires<[HasV5T]>;
-
-// rs <= rt -> rt >= rs.
-def : Pat<(i1 (setule (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
- (i1 (FCMPUGE32_rr IntRegs:$src2, IntRegs:$src1))>,
- Requires<[HasV5T]>;
-
-def : Pat<(i1 (setule (f32 IntRegs:$src1), (fpimm:$src2))),
- (i1 (FCMPUGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>,
- Requires<[HasV5T]>;
-
-// Rss <= Rtt -> Rtt >= Rss.
-def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
- (i1 (FCMPUGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
- Requires<[HasV5T]>;
-
-def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (fpimm:$src2))),
- (i1 (FCMPUGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
- DoubleRegs:$src1))>,
- Requires<[HasV5T]>;
-
-// ne.
-def : Pat<(i1 (setone (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
- (i1 (C2_not (FCMPOEQ32_rr IntRegs:$src1, IntRegs:$src2)))>,
- Requires<[HasV5T]>;
-
-def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
- (i1 (C2_not (FCMPOEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>,
- Requires<[HasV5T]>;
-
-def : Pat<(i1 (setune (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
- (i1 (C2_not (FCMPUEQ32_rr IntRegs:$src1, IntRegs:$src2)))>,
- Requires<[HasV5T]>;
-
-def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
- (i1 (C2_not (FCMPUEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>,
- Requires<[HasV5T]>;
-
-def : Pat<(i1 (setone (f32 IntRegs:$src1), (fpimm:$src2))),
- (i1 (C2_not (FCMPOEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>,
- Requires<[HasV5T]>;
-
-def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (fpimm:$src2))),
- (i1 (C2_not (FCMPOEQ64_rr DoubleRegs:$src1,
- (f64 (CONST64_Float_Real fpimm:$src2)))))>,
- Requires<[HasV5T]>;
-
-def : Pat<(i1 (setune (f32 IntRegs:$src1), (fpimm:$src2))),
- (i1 (C2_not (FCMPUEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>,
- Requires<[HasV5T]>;
-
-def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (fpimm:$src2))),
- (i1 (C2_not (FCMPUEQ64_rr DoubleRegs:$src1,
- (f64 (CONST64_Float_Real fpimm:$src2)))))>,
- Requires<[HasV5T]>;
-
-// Convert Integer to Floating Point.
-def CONVERT_d2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
- "$dst = convert_d2sf($src)",
- [(set (f32 IntRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_ud2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
- "$dst = convert_ud2sf($src)",
- [(set (f32 IntRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_uw2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
- "$dst = convert_uw2sf($src)",
- [(set (f32 IntRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_w2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
- "$dst = convert_w2sf($src)",
- [(set (f32 IntRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_d2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
- "$dst = convert_d2df($src)",
- [(set (f64 DoubleRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_ud2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
- "$dst = convert_ud2df($src)",
- [(set (f64 DoubleRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_uw2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
- "$dst = convert_uw2df($src)",
- [(set (f64 DoubleRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_w2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
- "$dst = convert_w2df($src)",
- [(set (f64 DoubleRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-// Convert Floating Point to Integer - default.
-def CONVERT_df2uw : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
- "$dst = convert_df2uw($src):chop",
- [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_df2w : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
- "$dst = convert_df2w($src):chop",
- [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_sf2uw : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
- "$dst = convert_sf2uw($src):chop",
- [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_sf2w : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
- "$dst = convert_sf2w($src):chop",
- [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_df2d : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
- "$dst = convert_df2d($src):chop",
- [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_df2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
- "$dst = convert_df2ud($src):chop",
- [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_sf2d : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
- "$dst = convert_sf2d($src):chop",
- [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-def CONVERT_sf2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
- "$dst = convert_sf2ud($src):chop",
- [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
- Requires<[HasV5T]>;
-
-// Convert Floating Point to Integer: non-chopped.
-let AddedComplexity = 20 in
-def CONVERT_df2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
- "$dst = convert_df2uw($src)",
- [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
- Requires<[HasV5T, IEEERndNearV5T]>;
-
-let AddedComplexity = 20 in
-def CONVERT_df2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
- "$dst = convert_df2w($src)",
- [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
- Requires<[HasV5T, IEEERndNearV5T]>;
-
-let AddedComplexity = 20 in
-def CONVERT_sf2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
- "$dst = convert_sf2uw($src)",
- [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
- Requires<[HasV5T, IEEERndNearV5T]>;
-
-let AddedComplexity = 20 in
-def CONVERT_sf2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
- "$dst = convert_sf2w($src)",
- [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
- Requires<[HasV5T, IEEERndNearV5T]>;
-
-let AddedComplexity = 20 in
-def CONVERT_df2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
- "$dst = convert_df2d($src)",
- [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
- Requires<[HasV5T, IEEERndNearV5T]>;
-
-let AddedComplexity = 20 in
-def CONVERT_df2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
- "$dst = convert_df2ud($src)",
- [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
- Requires<[HasV5T, IEEERndNearV5T]>;
-
-let AddedComplexity = 20 in
-def CONVERT_sf2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
- "$dst = convert_sf2d($src)",
- [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
- Requires<[HasV5T, IEEERndNearV5T]>;
-
-let AddedComplexity = 20 in
-def CONVERT_sf2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
- "$dst = convert_sf2ud($src)",
- [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
- Requires<[HasV5T, IEEERndNearV5T]>;
-
-
-
-// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
-def : Pat <(i32 (bitconvert (f32 IntRegs:$src))),
- (i32 (A2_tfr IntRegs:$src))>,
- Requires<[HasV5T]>;
-
-def : Pat <(f32 (bitconvert (i32 IntRegs:$src))),
- (f32 (A2_tfr IntRegs:$src))>,
- Requires<[HasV5T]>;
-
-def : Pat <(i64 (bitconvert (f64 DoubleRegs:$src))),
- (i64 (A2_tfrp DoubleRegs:$src))>,
- Requires<[HasV5T]>;
-
-def : Pat <(f64 (bitconvert (i64 DoubleRegs:$src))),
- (f64 (A2_tfrp DoubleRegs:$src))>,
- Requires<[HasV5T]>;
-
-def FMADD_sp : ALU64_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "$dst += sfmpy($src2, $src3)",
- [(set (f32 IntRegs:$dst),
- (fma IntRegs:$src2, IntRegs:$src3, IntRegs:$src1))],
- "$src1 = $dst">,
- Requires<[HasV5T]>;
-
-
-// Floating point max/min.
-
-let AddedComplexity = 100 in
-def FMAX_sp : ALU64_rr<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = sfmax($src1, $src2)",
- [(set IntRegs:$dst, (f32 (select (i1 (setolt IntRegs:$src2,
- IntRegs:$src1)),
- IntRegs:$src1,
- IntRegs:$src2)))]>,
- Requires<[HasV5T]>;
-
-let AddedComplexity = 100 in
-def FMIN_sp : ALU64_rr<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = sfmin($src1, $src2)",
- [(set IntRegs:$dst, (f32 (select (i1 (setogt IntRegs:$src2,
- IntRegs:$src1)),
- IntRegs:$src1,
- IntRegs:$src2)))]>,
- Requires<[HasV5T]>;
-
-// Pseudo instruction to encode a set of conditional transfers.
-// This instruction is used instead of a mux and trades-off codesize
-// for performance. We conduct this transformation optimistically in
-// the hope that these instructions get promoted to dot-new transfers.
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_rr_f : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
- IntRegs:$src2,
- IntRegs:$src3),
- "Error; should not emit",
- [(set IntRegs:$dst, (f32 (select PredRegs:$src1,
- IntRegs:$src2,
- IntRegs:$src3)))]>,
- Requires<[HasV5T]>;
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_rr64_f : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
- DoubleRegs:$src2,
- DoubleRegs:$src3),
- "Error; should not emit",
- [(set DoubleRegs:$dst, (f64 (select PredRegs:$src1,
- DoubleRegs:$src2,
- DoubleRegs:$src3)))]>,
- Requires<[HasV5T]>;
-
-
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ri_f : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, f32imm:$src3),
- "Error; should not emit",
- [(set IntRegs:$dst,
- (f32 (select PredRegs:$src1, IntRegs:$src2, fpimm:$src3)))]>,
- Requires<[HasV5T]>;
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ir_f : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, f32imm:$src2, IntRegs:$src3),
- "Error; should not emit",
- [(set IntRegs:$dst,
- (f32 (select PredRegs:$src1, fpimm:$src2, IntRegs:$src3)))]>,
- Requires<[HasV5T]>;
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ii_f : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, f32imm:$src2, f32imm:$src3),
- "Error; should not emit",
- [(set IntRegs:$dst, (f32 (select PredRegs:$src1,
- fpimm:$src2,
- fpimm:$src3)))]>,
- Requires<[HasV5T]>;
-
-
-def : Pat <(select (i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
- (f32 IntRegs:$src3),
- (f32 IntRegs:$src4)),
- (TFR_condset_rr_f (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1), IntRegs:$src4,
- IntRegs:$src3)>, Requires<[HasV5T]>;
-
-def : Pat <(select (i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
- (f64 DoubleRegs:$src3),
- (f64 DoubleRegs:$src4)),
- (TFR_condset_rr64_f (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1),
- DoubleRegs:$src4, DoubleRegs:$src3)>, Requires<[HasV5T]>;
-
-// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
-def : Pat <(select (not PredRegs:$src1), fpimm:$src2, fpimm:$src3),
- (TFR_condset_ii_f PredRegs:$src1, fpimm:$src3, fpimm:$src2)>;
-
-// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
-// => r0 = TFR_condset_ri(p0, r1, #i)
-def : Pat <(select (not PredRegs:$src1), fpimm:$src2, IntRegs:$src3),
- (TFR_condset_ri_f PredRegs:$src1, IntRegs:$src3, fpimm:$src2)>;
-
-// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
-// => r0 = TFR_condset_ir(p0, #i, r1)
-def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, fpimm:$src3),
- (TFR_condset_ir_f PredRegs:$src1, fpimm:$src3, IntRegs:$src2)>;
-
-def : Pat <(i32 (fp_to_sint (f64 DoubleRegs:$src1))),
- (i32 (EXTRACT_SUBREG (i64 (CONVERT_df2d (f64 DoubleRegs:$src1))), subreg_loreg))>,
- Requires<[HasV5T]>;
+def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>;
+def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>;
def : Pat <(fabs (f32 IntRegs:$src1)),
(S2_clrbit_i (f32 IntRegs:$src1), 31)>,
@@ -1024,13 +935,3 @@ def : Pat <(fabs (f32 IntRegs:$src1)),
def : Pat <(fneg (f32 IntRegs:$src1)),
(S2_togglebit_i (f32 IntRegs:$src1), 31)>,
Requires<[HasV5T]>;
-
-/*
-def : Pat <(fabs (f64 DoubleRegs:$src1)),
- (S2_clrbit_i (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>,
- Requires<[HasV5T]>;
-
-def : Pat <(fabs (f64 DoubleRegs:$src1)),
- (S2_clrbit_i (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>,
- Requires<[HasV5T]>;
- */
diff --git a/lib/Target/Hexagon/HexagonInstrInfoVector.td b/lib/Target/Hexagon/HexagonInstrInfoVector.td
new file mode 100644
index 000000000000..f4fb946d5bad
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfoVector.td
@@ -0,0 +1,483 @@
+//===- HexagonInstrInfoVector.td - Hexagon Vector Patterns -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon Vector instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def V2I1: PatLeaf<(v2i1 PredRegs:$R)>;
+def V4I1: PatLeaf<(v4i1 PredRegs:$R)>;
+def V8I1: PatLeaf<(v8i1 PredRegs:$R)>;
+def V4I8: PatLeaf<(v4i8 IntRegs:$R)>;
+def V2I16: PatLeaf<(v2i16 IntRegs:$R)>;
+def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>;
+def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
+def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
+
+
+multiclass bitconvert_32<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a IntRegs:$src))),
+ (b IntRegs:$src)>;
+ def : Pat <(a (bitconvert (b IntRegs:$src))),
+ (a IntRegs:$src)>;
+}
+
+multiclass bitconvert_64<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a DoubleRegs:$src))),
+ (b DoubleRegs:$src)>;
+ def : Pat <(a (bitconvert (b DoubleRegs:$src))),
+ (a DoubleRegs:$src)>;
+}
+
+// Bit convert vector types.
+defm : bitconvert_32<v4i8, i32>;
+defm : bitconvert_32<v2i16, i32>;
+defm : bitconvert_32<v2i16, v4i8>;
+
+defm : bitconvert_64<v8i8, i64>;
+defm : bitconvert_64<v4i16, i64>;
+defm : bitconvert_64<v2i32, i64>;
+defm : bitconvert_64<v8i8, v4i16>;
+defm : bitconvert_64<v8i8, v2i32>;
+defm : bitconvert_64<v4i16, v2i32>;
+
+
+// Vector shift support. Vector shifting in Hexagon is rather different
+// from internal representation of LLVM.
+// LLVM assumes all shifts (in vector case) will have the form
+// <VT> = SHL/SRA/SRL <VT> by <VT>
+// while Hexagon has the following format:
+// <VT> = SHL/SRA/SRL <VT> by <IT/i32>
+// As a result, special care is needed to guarantee correctness and
+// performance.
+class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
+ : S_2OpInstImm<Str, MajOp, MinOp, u4Imm,
+ [(set (v4i16 DoubleRegs:$dst),
+ (Op (v4i16 DoubleRegs:$src1), u4ImmPred:$src2))]> {
+ bits<4> src2;
+ let Inst{11-8} = src2;
+}
+
+class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
+ : S_2OpInstImm<Str, MajOp, MinOp, u5Imm,
+ [(set (v2i32 DoubleRegs:$dst),
+ (Op (v2i32 DoubleRegs:$src1), u5ImmPred:$src2))]> {
+ bits<5> src2;
+ let Inst{12-8} = src2;
+}
+
+def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+ (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
+
+def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+ (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
+
+def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>;
+def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>;
+def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>;
+
+def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>;
+def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>;
+def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>;
+
+
+def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
+def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
+
+// Replicate the low 8-bits from 32-bits input register into each of the
+// four bytes of 32-bits destination register.
+def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
+
+// Replicate the low 16-bits from 32-bits input register into each of the
+// four halfwords of 64-bits destination register.
+def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
+
+
+class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
+ : Pat <(Op Type:$Rss, Type:$Rtt),
+ (MI Type:$Rss, Type:$Rtt)>;
+
+def: VArith_pat <A2_vaddub, add, V8I8>;
+def: VArith_pat <A2_vaddh, add, V4I16>;
+def: VArith_pat <A2_vaddw, add, V2I32>;
+def: VArith_pat <A2_vsubub, sub, V8I8>;
+def: VArith_pat <A2_vsubh, sub, V4I16>;
+def: VArith_pat <A2_vsubw, sub, V2I32>;
+
+def: VArith_pat <A2_and, and, V2I16>;
+def: VArith_pat <A2_xor, xor, V2I16>;
+def: VArith_pat <A2_or, or, V2I16>;
+
+def: VArith_pat <A2_andp, and, V8I8>;
+def: VArith_pat <A2_andp, and, V4I16>;
+def: VArith_pat <A2_andp, and, V2I32>;
+def: VArith_pat <A2_orp, or, V8I8>;
+def: VArith_pat <A2_orp, or, V4I16>;
+def: VArith_pat <A2_orp, or, V2I32>;
+def: VArith_pat <A2_xorp, xor, V8I8>;
+def: VArith_pat <A2_xorp, xor, V4I16>;
+def: VArith_pat <A2_xorp, xor, V2I32>;
+
+def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+ (i32 u5ImmPred:$c))))),
+ (S2_asr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+ (i32 u5ImmPred:$c))))),
+ (S2_lsr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+ (i32 u5ImmPred:$c))))),
+ (S2_asl_i_vw V2I32:$b, imm:$c)>;
+
+def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+ (S2_asr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+ (S2_lsr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+ (S2_asl_i_vh V4I16:$b, imm:$c)>;
+
+
+def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
+def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
+
+def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
+
+def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)),
+ (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)),
+ (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)),
+ (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)),
+ (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)),
+ (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)),
+ (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
+
+// Vector shift words by register
+def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>;
+def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>;
+def S2_asl_r_vw : T_S3op_shiftVect < "vaslw", 0b00, 0b10>;
+def S2_lsl_r_vw : T_S3op_shiftVect < "vlslw", 0b00, 0b11>;
+
+// Vector shift halfwords by register
+def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>;
+def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>;
+def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>;
+def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>;
+
+class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+ : Pat <(Op Value:$Rs, I32:$Rt),
+ (MI Value:$Rs, I32:$Rt)>;
+
+def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
+def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
+def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
+def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
+def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
+def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
+
+
+def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
+def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
+def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
+
+def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
+
+
+class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+ : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
+ (MI Value:$Rs, Value:$Rt)>;
+
+def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>;
+def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>;
+def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
+
+def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>;
+def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>;
+def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
+
+def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
+
+
+class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
+ : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
+ (MI InVal:$Rs, InVal:$Rt)>;
+
+def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
+
+def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
+
+
+// Hexagon doesn't have a vector multiply with C semantics.
+// Instead, generate a pseudo instruction that gets expaneded into two
+// scalar MPYI instructions.
+// This is expanded by ExpandPostRAPseudos.
+let isPseudo = 1 in
+def VMULW : PseudoM<(outs DoubleRegs:$Rd),
+ (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+ ".error \"Should never try to emit VMULW\"",
+ [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>;
+
+let isPseudo = 1 in
+def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd),
+ (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt),
+ ".error \"Should never try to emit VMULW_ACC\"",
+ [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))],
+ "$Rd = $Rx">;
+
+// Adds two v4i8: Hexagon does not have an insn for this one, so we
+// use the double add v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+ (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>;
+
+// Subtract two v4i8: Hexagon does not have an insn for this one, so we
+// use the double sub v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+ (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>;
+
+//
+// No 32 bit vector mux.
+//
+def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
+ (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
+def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
+ (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
+
+//
+// 64-bit vector mux.
+//
+def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
+ (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
+def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
+ (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
+def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
+ (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
+
+//
+// No 32 bit vector compare.
+//
+def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
+ (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
+ (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
+ (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>;
+
+def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>;
+
+
+class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
+ ValueType CmpTy>
+ : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
+ (InvMI Value:$Rt, Value:$Rs)>;
+
+// Map from a compare operation to the corresponding instruction with the
+// order of operands reversed, e.g. x > y --> cmp.lt(y,x).
+def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>;
+def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>;
+def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>;
+
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>;
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
+
+// Map from vcmpne(Rss) -> !vcmpew(Rss).
+// rs != rt -> !(rs == rt).
+def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
+ (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
+
+
+// Truncate: from vector B copy all 'E'ven 'B'yte elements:
+// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6];
+def: Pat<(v4i8 (trunc V4I16:$Rs)),
+ (S2_vtrunehb V4I16:$Rs)>;
+
+// Truncate: from vector B copy all 'O'dd 'B'yte elements:
+// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7];
+// S2_vtrunohb
+
+// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
+// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2];
+// S2_vtruneh
+
+def: Pat<(v2i16 (trunc V2I32:$Rs)),
+ (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
+
+
+def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
+def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
+
+def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
+def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
+
+def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
+
+// Sign extends a v2i8 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
+ (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
+
+// Sign extends a v2i16 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
+ (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
+
+
+// Multiplies two v2i16 and returns a v2i32. We are using here the
+// saturating multiply, as hexagon does not provide a non saturating
+// vector multiply, and saturation does not impact the result that is
+// in double precision of the operands.
+
+// Multiplies two v2i16 vectors: as Hexagon does not have a multiply
+// with the C semantics for this one, this pattern uses the half word
+// multiply vmpyh that takes two v2i16 and returns a v2i32. This is
+// then truncated to fit this back into a v2i16 and to simulate the
+// wrap around semantics for unsigned in C.
+def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
+ (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
+
+def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
+ (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
+ (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
+
+// Multiplies two v4i16 vectors.
+def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
+ (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
+ (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
+
+def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
+ (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
+ (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
+
+// Multiplies two v4i8 vectors.
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+ (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
+ Requires<[HasV5T]>;
+
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+ (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
+
+// Multiplies two v8i8 vectors.
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+ (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
+ (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
+ Requires<[HasV5T]>;
+
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+ (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
+ (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
+
+
+class shuffler<SDNode Op, string Str>
+ : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c),
+ "$a = " # Str # "($b, $c)",
+ [(set (i64 DoubleRegs:$a),
+ (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))],
+ "", S_3op_tc_1_SLOT23>;
+
+def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
+
+def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
+def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
+def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
+def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
+
+class ShufflePat<InstHexagon MI, SDNode Op>
+ : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
+def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
+
+// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
+def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
+
+// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
+def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
+
+// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
+def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
+
+
+// Truncated store from v4i16 to v4i8.
+def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr),
+ [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
+
+// Truncated store from v2i32 to v2i16.
+def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr),
+ [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
+
+def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
+ (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
+ (LoReg $Rs))))>;
+
+def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
+ (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
+
+
+// Zero and sign extended load from v2i8 into v2i16.
+def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
+ [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
+ [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
+ (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
+ (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
+ (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
+
+def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
+ (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
index 7d3f9d92cbd4..4275230ba717 100644
--- a/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -13,3495 +13,1273 @@
// March 4, 2008
//===----------------------------------------------------------------------===//
-//
-// ALU 32 types.
-//
+class T_I_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID imm:$Is),
+ (MI imm:$Is)>;
-class qi_ALU32_sisi<string opc, Intrinsic IntID>
- : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class qi_ALU32_sis10<string opc, Intrinsic IntID>
- : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class qi_ALU32_sis8<string opc, Intrinsic IntID>
- : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class qi_ALU32_siu8<string opc, Intrinsic IntID>
- : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class qi_ALU32_siu9<string opc, Intrinsic IntID>
- : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_ALU32_qisisi<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
- IntRegs:$src3))]>;
-
-class si_ALU32_qis8si<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2,
- IntRegs:$src3),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, $src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
- IntRegs:$src3))]>;
-
-class si_ALU32_qisis8<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
- s8Imm:$src3),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
- imm:$src3))]>;
-
-class si_ALU32_qis8s8<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, s8Imm:$src3),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
-
-class si_ALU32_sisi<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU32_sisi_sat<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU32_sisi_rnd<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU32_sis16<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_ALU32_sis10<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_ALU32_s10si<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
- [(set IntRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
-
-class si_lo_ALU32_siu16<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
- !strconcat("$dst.l = ", !strconcat(opc , "#$src2")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_hi_ALU32_siu16<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
- !strconcat("$dst.h = ", !strconcat(opc , "#$src2")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_ALU32_s16<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins s16Imm:$src1),
- !strconcat("$dst = ", !strconcat(opc , "#$src1")),
- [(set IntRegs:$dst, (IntID imm:$src1))]>;
-
-class di_ALU32_s8<string opc, Intrinsic IntID>
- : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1),
- !strconcat("$dst = ", !strconcat(opc , "#$src1")),
- [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
-
-class di_ALU64_di<string opc, Intrinsic IntID>
- : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "$src")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
-
-class si_ALU32_si<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "($src)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
-
-class si_ALU32_si_tfr<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "$src")),
- [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+class T_R_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I32:$Rs),
+ (MI I32:$Rs)>;
-//
-// ALU 64 types.
-//
+class T_P_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs),
+ (MI DoubleRegs:$Rs)>;
+
+class T_II_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2>
+ : Pat<(IntID Imm1:$Is, Imm2:$It),
+ (MI Imm1:$Is, Imm2:$It)>;
+
+class T_RI_pat <InstHexagon MI, Intrinsic IntID, PatLeaf ImmPred = PatLeaf<(i32 imm)>>
+ : Pat<(IntID I32:$Rs, ImmPred:$It),
+ (MI I32:$Rs, ImmPred:$It)>;
+
+class T_IR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred = PatLeaf<(i32 imm)>>
+ : Pat<(IntID ImmPred:$Is, I32:$Rt),
+ (MI ImmPred:$Is, I32:$Rt)>;
+
+class T_PI_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID I64:$Rs, imm:$It),
+ (MI DoubleRegs:$Rs, imm:$It)>;
+
+class T_RP_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID I32:$Rs, I64:$Rt),
+ (MI I32:$Rs, DoubleRegs:$Rt)>;
+
+class T_RR_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I32:$Rs, I32:$Rt),
+ (MI I32:$Rs, I32:$Rt)>;
+
+class T_PP_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, I64:$Rt),
+ (MI DoubleRegs:$Rs, DoubleRegs:$Rt)>;
-class si_ALU64_si_sat<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
-
-class si_ALU64_didi<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class di_ALU64_sidi<string opc, Intrinsic IntID>
- : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
-
-class di_ALU64_didi<string opc, Intrinsic IntID>
- : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
-
-class di_ALU64_qididi<string opc, Intrinsic IntID>
- : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2,
- DoubleRegs:$src3),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2,
- DoubleRegs:$src3))]>;
-
-class di_ALU64_sisi<string opc, Intrinsic IntID>
- : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_ALU64_didi_sat<string opc, Intrinsic IntID>
- : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
-
-class di_ALU64_didi_rnd<string opc, Intrinsic IntID>
- : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
-
-class di_ALU64_didi_crnd<string opc, Intrinsic IntID>
- : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
-
-class di_ALU64_didi_rnd_sat<string opc, Intrinsic IntID>
- : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
-
-class di_ALU64_didi_crnd_sat<string opc, Intrinsic IntID>
- : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd:sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
-
-class qi_ALU64_didi<string opc, Intrinsic IntID>
- : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class si_ALU64_sisi<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_sat_lh<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_sat_hh<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_sat_lh<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_sat_hl<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_sat_ll<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_hh<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_hl<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_lh<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_ll<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_sat_hh<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.H, $src2.H):sat:<<16")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_sat_lh<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.L, $src2.H):sat:<<16")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_sat_hl<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.H, $src2.L):sat:<<16")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_sat_ll<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.L, $src2.L):sat:<<16")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_hh<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<16")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_hl<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<16")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_lh<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<16")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_ll<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<16")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_lh<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_ll<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_sat<string opc, Intrinsic IntID>
- : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+class T_QII_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2>
+ : Pat <(IntID (i32 PredRegs:$Ps), Imm1:$Is, Imm2:$It),
+ (MI PredRegs:$Ps, Imm1:$Is, Imm2:$It)>;
-//
-// SInst classes.
-//
+class T_QRI_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred>
+ : Pat <(IntID (i32 PredRegs:$Ps), I32:$Rs, ImmPred:$Is),
+ (MI PredRegs:$Ps, I32:$Rs, ImmPred:$Is)>;
-class qi_SInst_qi<string opc, Intrinsic IntID>
- : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "($src)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
-
-class qi_SInst_qi_pxfer<string opc, Intrinsic IntID>
- : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "$src")),
- [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
-
-class qi_SInst_qiqi<string opc, Intrinsic IntID>
- : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class qi_SInst_qiqi_neg<string opc, Intrinsic IntID>
- : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, !$src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_SInst_di<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "($src)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
-
-class di_SInst_di_sat<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
-
-class si_SInst_di<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "($src)")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
-
-class si_SInst_di_sat<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
-
-class di_SInst_disi<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
-
-class di_SInst_didi<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class di_SInst_si<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
- !strconcat("$dst = ", !strconcat(opc , "($src1)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
-
-class si_SInst_sisiu3<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u3Imm:$src3),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
- imm:$src3))]>;
-
-class si_SInst_diu5<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
-
-class si_SInst_disi<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
-
-class si_SInst_sidi<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
-
-class di_SInst_disisi<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2,
- IntRegs:$src3))]>;
-
-class di_SInst_sisi<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class qi_SInst_siu5<string opc, Intrinsic IntID>
- : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class qi_SInst_siu6<string opc, Intrinsic IntID>
- : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class qi_SInst_sisi<string opc, Intrinsic IntID>
- : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_SInst_si<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "($src)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
-
-class si_SInst_si_sat<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
-
-class di_SInst_qi<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "($src)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src))]>;
-
-class si_SInst_qi<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "$src")),
- [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
-
-class si_SInst_qiqi<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class qi_SInst_si<string opc, Intrinsic IntID>
- : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
- !strconcat("$dst = ", !strconcat(opc , "$src")),
- [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
-
-class si_SInst_sisi<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_SInst_diu6<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
-
-class si_SInst_siu5<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_SInst_siu5_rnd<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_SInst_siu5u5<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2, u5Imm:$src3),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
-
-class si_SInst_sisisi_acc<string opc, Intrinsic IntID>
- : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_SInst_sisisi_nac<string opc, Intrinsic IntID>
- : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_SInst_didisi_acc<string opc, Intrinsic IntID>
- : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
- DoubleRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_SInst_didisi_nac<string opc, Intrinsic IntID>
- : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
- DoubleRegs:$src1, IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_SInst_sisiu5u5<string opc, Intrinsic IntID>
- : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- u5Imm:$src2, u5Imm:$src3),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, #$src2, #$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- imm:$src2, imm:$src3))],
- "$dst2 = $dst">;
-
-class si_SInst_sisidi<string opc, Intrinsic IntID>
- : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- DoubleRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_SInst_didiu6u6<string opc, Intrinsic IntID>
- : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- u6Imm:$src2, u6Imm:$src3),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, #$src2, #$src3)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
- imm:$src2, imm:$src3))],
- "$dst2 = $dst">;
-
-class di_SInst_dididi<string opc, Intrinsic IntID>
- : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
- DoubleRegs:$src1,
- DoubleRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_SInst_diu6u6<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2,
- u6Imm:$src3),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2,
- imm:$src3))]>;
-
-class di_SInst_didiqi<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
- IntRegs:$src3))]>;
-
-class di_SInst_didiu3<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
- u3Imm:$src3),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
- imm:$src3))]>;
-
-class di_SInst_didisi_or<string opc, Intrinsic IntID>
- : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_SInst_didisi_and<string opc, Intrinsic IntID>
- : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_SInst_didiu6_and<string opc, Intrinsic IntID>
- : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- u6Imm:$src2),
- !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
- imm:$src2))],
- "$dst2 = $dst">;
-
-class di_SInst_didiu6_or<string opc, Intrinsic IntID>
- : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- u6Imm:$src2),
- !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
- imm:$src2))],
- "$dst2 = $dst">;
-
-class di_SInst_didiu6_xor<string opc, Intrinsic IntID>
- : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- u6Imm:$src2),
- !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
- imm:$src2))],
- "$dst2 = $dst">;
-
-class si_SInst_sisisi_and<string opc, Intrinsic IntID>
- : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_SInst_sisisi_or<string opc, Intrinsic IntID>
- : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-
-class si_SInst_sisiu5_and<string opc, Intrinsic IntID>
- : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- u5Imm:$src2),
- !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- imm:$src2))],
- "$dst2 = $dst">;
-
-class si_SInst_sisiu5_or<string opc, Intrinsic IntID>
- : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- u5Imm:$src2),
- !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- imm:$src2))],
- "$dst2 = $dst">;
-
-class si_SInst_sisiu5_xor<string opc, Intrinsic IntID>
- : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- u5Imm:$src2),
- !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- imm:$src2))],
- "$dst2 = $dst">;
-
-class si_SInst_sisiu5_acc<string opc, Intrinsic IntID>
- : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- u5Imm:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- imm:$src2))],
- "$dst2 = $dst">;
-
-class si_SInst_sisiu5_nac<string opc, Intrinsic IntID>
- : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- u5Imm:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- imm:$src2))],
- "$dst2 = $dst">;
-
-class di_SInst_didiu6_acc<string opc, Intrinsic IntID>
- : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- u5Imm:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
- DoubleRegs:$src1, imm:$src2))],
- "$dst2 = $dst">;
-
-class di_SInst_didiu6_nac<string opc, Intrinsic IntID>
- : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- u5Imm:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
- imm:$src2))],
- "$dst2 = $dst">;
+class T_QIR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred>
+ : Pat <(IntID (i32 PredRegs:$Ps), ImmPred:$Is, I32:$Rs),
+ (MI PredRegs:$Ps, ImmPred:$Is, I32:$Rs)>;
+class T_RRI_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I32:$Rs, I32:$Rt, imm:$Iu),
+ (MI I32:$Rs, I32:$Rt, imm:$Iu)>;
-//
-// MInst classes.
-//
+class T_RII_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I32:$Rs, imm:$It, imm:$Iu),
+ (MI I32:$Rs, imm:$It, imm:$Iu)>;
-class di_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.H, $src2.H):<<1:rnd")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.H, $src2.H):rnd")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.H, $src2.L):<<1:rnd")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.H, $src2.L):rnd")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.L, $src2.H):<<1:rnd")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.L, $src2.H):rnd")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.L, $src2.L):<<1:rnd")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.L, $src2.L):rnd")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_disisi_acc<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_nac<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2):sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_sat_conj<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*):sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_sat_conj<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2*):sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_s1_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1, $src2):<<1:sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_s1_sat_conj<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1, $src2*):<<1:sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_s1_sat_conj<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1, $src2*):<<1:sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_s8s8<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins s8Imm:$src1, s8Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")),
- [(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>;
-
-class si_MInst_sis9<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_MInst_sisi<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_hh<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_lh<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_hl<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_ll<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-
-class si_MInst_sisi_hh<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_lh<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_hl<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_ll<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_up<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_didi<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
-
-class di_MInst_didi_conj<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
-
-class di_MInst_sisi_s1_sat_conj<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, $src2*):<<1:sat")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, $src2):<<1:rnd:sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
-
-class di_MInst_didi_sat<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
-
-class di_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, $src2):rnd:sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
-
-class si_SInst_sisi_sat<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_SInst_didi_sat<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class si_SInst_disi_s1_rnd_sat<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, $src2):<<1:rnd:sat")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_s1_rnd_sat<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, $src2):<<1:rnd:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_l_s1_rnd_sat<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, $src2.L):<<1:rnd:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_h_s1_rnd_sat<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, $src2.H):<<1:rnd:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_sat_conj<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, $src2*):rnd:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_s1_rnd_sat_conj<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, $src2*):<<1:rnd:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_sat<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, $src2):rnd:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisisi_xacc<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
- IntRegs:$src3))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst += ", !strconcat(opc , "($src2, $src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
- IntRegs:$src3))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst -= ", !strconcat(opc , "($src2, $src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
- IntRegs:$src3))],
- "$dst2 = $dst">;
-
-class si_MInst_sisis8_acc<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
- s8Imm:$src3),
- !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
- imm:$src3))],
- "$dst2 = $dst">;
-
-class si_MInst_sisis8_nac<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
- s8Imm:$src3),
- !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
- imm:$src3))],
- "$dst2 = $dst">;
-
-class si_MInst_sisiu4u5<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- u4Imm:$src2, u5Imm:$src3),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, #$src2, #$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- imm:$src2, imm:$src3))],
- "$dst2 = $dst">;
-
-class si_MInst_sisiu8_acc<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
- u8Imm:$src3),
- !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
- imm:$src3))],
- "$dst2 = $dst">;
-
-class si_MInst_sisiu8_nac<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
- u8Imm:$src3),
- !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
- imm:$src3))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_hh<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_lh<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1.L, $src2.H):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_lh_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1.L, $src2.H):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_hh<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1.H, $src2.H):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_hh_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1.H, $src2.H):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_hh_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1.H, $src2.H):<<1")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hh<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_hh_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.H, $src2.H):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_hh<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.H, $src2.H):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_hl_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.H, $src2.L):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_hl<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.H, $src2.L):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_lh_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.L, $src2.H):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_lh<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.L, $src2.H):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_ll_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.L, $src2.L):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_ll<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.L, $src2.L):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hh_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.H, $src2.H):<<1")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_hl<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_hl_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1.H, $src2.L):<<1")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hl<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hl_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.H, $src2.L):<<1")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_lh<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_lh_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1.L, $src2.H):<<1")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_lh<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_lh_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.L, $src2.H):<<1")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_ll<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_ll_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1.L, $src2.L):<<1")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_ll_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1.L, $src2.L):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_hl_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1.H, $src2.L):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_ll<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1.L, $src2.L):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_hl<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1.H, $src2.L):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_ll<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_ll_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.L, $src2.L):<<1")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hh_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.H, $src2.H):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hh_s1_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.H, $src2.H):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hl_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.H, $src2.L):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hl_s1_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.H, $src2.L):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_lh_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.L, $src2.H):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_lh_s1_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.L, $src2.H):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_ll_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.L, $src2.L):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_ll_s1_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1.L, $src2.L):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_ALU32_sisi<string opc, Intrinsic IntID>
- : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_sat<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_sat_conj<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*):sat")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_s1_sat<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_didi_s1_sat<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2))]>;
-
-class si_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, $src2):<<1:rnd:sat")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class si_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class si_MInst_sisi_sat_hh<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_hh_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.H, $src2.H):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_hl<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_hl_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.H, $src2.L):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_lh<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_lh_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.L, $src2.H):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_ll<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_ll_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.L, $src2.L):<<1:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_hh<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.H, $src2.H):rnd:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.H, $src2.H):rnd")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1.H, $src2.H):<<1:rnd")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_hh_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ",
- !strconcat(opc ,
- "($src1.H, $src2.H):<<1:rnd:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ",
- !strconcat(opc , "($src1.H, $src2.L):rnd")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ",
- !strconcat(opc , "($src1.H, $src2.L):<<1:rnd")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_hl<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ",
- !strconcat(opc , "($src1.H, $src2.L):rnd:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_hl_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ",
- !strconcat(opc , "($src1.H, $src2.L):<<1:rnd:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ",
- !strconcat(opc , "($src1.L, $src2.H):rnd")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_lh<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ",
- !strconcat(opc , "($src1.L, $src2.H):rnd:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_lh_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ",
- !strconcat(opc , "($src1.L, $src2.H):<<1:rnd:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ",
- !strconcat(opc , "($src1.L, $src2.H):<<1:rnd")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_ll<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ",
- !strconcat(opc , "($src1.L, $src2.L):rnd:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_ll_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ",
- !strconcat(opc , "($src1.L, $src2.L):<<1:rnd:sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ",
- !strconcat(opc , "($src1.L, $src2.L):rnd")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ",
- !strconcat(opc , "($src1.L, $src2.L):<<1:rnd")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_dididi_acc_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
- DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
- DoubleRegs:$src1,
- DoubleRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_dididi_acc_rnd_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- DoubleRegs:$src2),
- !strconcat("$dst += ",
- !strconcat(opc , "($src1, $src2):rnd:sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
- DoubleRegs:$src1,
- DoubleRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_dididi_acc_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
- DoubleRegs:$src1,
- DoubleRegs:$src2),
- !strconcat("$dst += ",
- !strconcat(opc , "($src1, $src2):<<1")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
- DoubleRegs:$src1,
- DoubleRegs:$src2))],
- "$dst2 = $dst">;
-
-
-class di_MInst_dididi_acc_s1_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
- DoubleRegs:$src1,
- DoubleRegs:$src2),
- !strconcat("$dst += ",
- !strconcat(opc , "($src1, $src2):<<1:sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
- DoubleRegs:$src1,
- DoubleRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_dididi_acc_s1_rnd_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- DoubleRegs:$src2),
- !strconcat("$dst += ",
- !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
- DoubleRegs:$src1,
- DoubleRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_dididi_acc<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- DoubleRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
- DoubleRegs:$src1,
- DoubleRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_dididi_acc_conj<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- DoubleRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
- DoubleRegs:$src1,
- DoubleRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_hh<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_hl<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_lh<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_ll<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_hh_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ",
- !strconcat(opc , "($src1.H, $src2.H):<<1")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_hl_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ",
- !strconcat(opc , "($src1.H, $src2.L):<<1")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_lh_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ",
- !strconcat(opc , "($src1.L, $src2.H):<<1")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_ll_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ",
- !strconcat(opc , "($src1.L, $src2.L):<<1")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_hh<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_hl<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_lh<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_ll<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_hh_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ",
- !strconcat(opc , "($src1.H, $src2.H):<<1")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_hl_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ",
- !strconcat(opc , "($src1.H, $src2.L):<<1")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_lh_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ",
- !strconcat(opc , "($src1.L, $src2.H):<<1")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_ll_s1<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst -= ",
- !strconcat(opc , "($src1.L, $src2.L):<<1")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_s1_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ",
- !strconcat(opc , "($src1, $src2):<<1:sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class di_MInst_disi_s1_sat<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_didisi_acc_s1_sat<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- IntRegs:$src2),
- !strconcat("$dst += ",
- !strconcat(opc , "($src1, $src2):<<1:sat")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
- DoubleRegs:$src1,
- IntRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_disi_s1_rnd_sat<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ",
- !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_didi<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-
-class T_RI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID (i32 IntRegs:$Rs), imm:$It),
- (MI IntRegs:$Rs, imm:$It)>;
+class T_IRI_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID imm:$It, I32:$Rs, imm:$Iu),
+ (MI imm:$It, I32:$Rs, imm:$Iu)>;
-//
-// LDInst classes.
-//
-let mayLoad = 1, hasSideEffects = 0 in
-class di_LDInstPI_diu4<string opc, Intrinsic IntID>
- : LDInstPI<(outs IntRegs:$dst, DoubleRegs:$dst2),
- (ins IntRegs:$src1, IntRegs:$src2, CtrRegs:$src3, s4Imm:$offset),
- "$dst2 = memd($src1++#$offset:circ($src3))",
- [],
- "$src1 = $dst">;
+class T_IRR_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID imm:$Is, I32:$Rs, I32:$Rt),
+ (MI imm:$Is, I32:$Rs, I32:$Rt)>;
-/********************************************************************
-* ALU32/ALU *
-*********************************************************************/
+class T_RIR_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I32:$Rs, imm:$Is, I32:$Rt),
+ (MI I32:$Rs, imm:$Is, I32:$Rt)>;
-// ALU32 / ALU / Add.
-def HEXAGON_A2_add:
- si_ALU32_sisi <"add", int_hexagon_A2_add>;
-def HEXAGON_A2_addi:
- si_ALU32_sis16 <"add", int_hexagon_A2_addi>;
-
-// ALU32 / ALU / Logical operations.
-def HEXAGON_A2_and:
- si_ALU32_sisi <"and", int_hexagon_A2_and>;
-def HEXAGON_A2_andir:
- si_ALU32_sis10 <"and", int_hexagon_A2_andir>;
-def HEXAGON_A2_not:
- si_ALU32_si <"not", int_hexagon_A2_not>;
-def HEXAGON_A2_or:
- si_ALU32_sisi <"or", int_hexagon_A2_or>;
-def HEXAGON_A2_orir:
- si_ALU32_sis10 <"or", int_hexagon_A2_orir>;
-def HEXAGON_A2_xor:
- si_ALU32_sisi <"xor", int_hexagon_A2_xor>;
-
-// ALU32 / ALU / Negate.
-def HEXAGON_A2_neg:
- si_ALU32_si <"neg", int_hexagon_A2_neg>;
-
-// ALU32 / ALU / Subtract.
-def HEXAGON_A2_sub:
- si_ALU32_sisi <"sub", int_hexagon_A2_sub>;
-def HEXAGON_A2_subri:
- si_ALU32_s10si <"sub", int_hexagon_A2_subri>;
-
-// ALU32 / ALU / Transfer Immediate.
-def HEXAGON_A2_tfril:
- si_lo_ALU32_siu16 <"", int_hexagon_A2_tfril>;
-def HEXAGON_A2_tfrih:
- si_hi_ALU32_siu16 <"", int_hexagon_A2_tfrih>;
-def HEXAGON_A2_tfrsi:
- si_ALU32_s16 <"", int_hexagon_A2_tfrsi>;
-def HEXAGON_A2_tfrpi:
- di_ALU32_s8 <"", int_hexagon_A2_tfrpi>;
-
-// ALU32 / ALU / Transfer Register.
-def HEXAGON_A2_tfr:
- si_ALU32_si_tfr <"", int_hexagon_A2_tfr>;
+class T_RRR_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I32:$Rs, I32:$Rt, I32:$Ru),
+ (MI I32:$Rs, I32:$Rt, I32:$Ru)>;
-/********************************************************************
-* ALU32/PERM *
-*********************************************************************/
+class T_PPI_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, I64:$Rt, imm:$Iu),
+ (MI DoubleRegs:$Rs, DoubleRegs:$Rt, imm:$Iu)>;
-// ALU32 / PERM / Combine.
-def HEXAGON_A2_combinew:
- di_ALU32_sisi <"combine", int_hexagon_A2_combinew>;
-def HEXAGON_A2_combine_hh:
- si_MInst_sisi_hh <"combine", int_hexagon_A2_combine_hh>;
-def HEXAGON_A2_combine_lh:
- si_MInst_sisi_lh <"combine", int_hexagon_A2_combine_lh>;
-def HEXAGON_A2_combine_hl:
- si_MInst_sisi_hl <"combine", int_hexagon_A2_combine_hl>;
-def HEXAGON_A2_combine_ll:
- si_MInst_sisi_ll <"combine", int_hexagon_A2_combine_ll>;
-def HEXAGON_A2_combineii:
- di_MInst_s8s8 <"combine", int_hexagon_A2_combineii>;
-
-// ALU32 / PERM / Mux.
-def HEXAGON_C2_mux:
- si_ALU32_qisisi <"mux", int_hexagon_C2_mux>;
-def HEXAGON_C2_muxri:
- si_ALU32_qis8si <"mux", int_hexagon_C2_muxri>;
-def HEXAGON_C2_muxir:
- si_ALU32_qisis8 <"mux", int_hexagon_C2_muxir>;
-def HEXAGON_C2_muxii:
- si_ALU32_qis8s8 <"mux", int_hexagon_C2_muxii>;
-
-// ALU32 / PERM / Shift halfword.
-def HEXAGON_A2_aslh:
- si_ALU32_si <"aslh", int_hexagon_A2_aslh>;
-def HEXAGON_A2_asrh:
- si_ALU32_si <"asrh", int_hexagon_A2_asrh>;
-def SI_to_SXTHI_asrh:
- si_ALU32_si <"asrh", int_hexagon_SI_to_SXTHI_asrh>;
-
-// ALU32 / PERM / Sign/zero extend.
-def HEXAGON_A2_sxth:
- si_ALU32_si <"sxth", int_hexagon_A2_sxth>;
-def HEXAGON_A2_sxtb:
- si_ALU32_si <"sxtb", int_hexagon_A2_sxtb>;
-def HEXAGON_A2_zxth:
- si_ALU32_si <"zxth", int_hexagon_A2_zxth>;
-def HEXAGON_A2_zxtb:
- si_ALU32_si <"zxtb", int_hexagon_A2_zxtb>;
+class T_PII_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, imm:$It, imm:$Iu),
+ (MI DoubleRegs:$Rs, imm:$It, imm:$Iu)>;
-/********************************************************************
-* ALU32/PRED *
-*********************************************************************/
+class T_PPP_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, I64:$Rt, I64:$Ru),
+ (MI DoubleRegs:$Rs, DoubleRegs:$Rt, DoubleRegs:$Ru)>;
-// ALU32 / PRED / Compare.
-def HEXAGON_C2_cmpeq:
- qi_ALU32_sisi <"cmp.eq", int_hexagon_C2_cmpeq>;
-def HEXAGON_C2_cmpeqi:
- qi_ALU32_sis10 <"cmp.eq", int_hexagon_C2_cmpeqi>;
-def HEXAGON_C2_cmpgei:
- qi_ALU32_sis8 <"cmp.ge", int_hexagon_C2_cmpgei>;
-def HEXAGON_C2_cmpgeui:
- qi_ALU32_siu8 <"cmp.geu", int_hexagon_C2_cmpgeui>;
-def HEXAGON_C2_cmpgt:
- qi_ALU32_sisi <"cmp.gt", int_hexagon_C2_cmpgt>;
-def HEXAGON_C2_cmpgti:
- qi_ALU32_sis10 <"cmp.gt", int_hexagon_C2_cmpgti>;
-def HEXAGON_C2_cmpgtu:
- qi_ALU32_sisi <"cmp.gtu", int_hexagon_C2_cmpgtu>;
-def HEXAGON_C2_cmpgtui:
- qi_ALU32_siu9 <"cmp.gtu", int_hexagon_C2_cmpgtui>;
-def HEXAGON_C2_cmplt:
- qi_ALU32_sisi <"cmp.lt", int_hexagon_C2_cmplt>;
-def HEXAGON_C2_cmpltu:
- qi_ALU32_sisi <"cmp.ltu", int_hexagon_C2_cmpltu>;
+class T_PPR_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Ru),
+ (MI DoubleRegs:$Rs, DoubleRegs:$Rt, I32:$Ru)>;
-/********************************************************************
-* ALU32/VH *
-*********************************************************************/
+class T_PRR_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, I32:$Rt, I32:$Ru),
+ (MI DoubleRegs:$Rs, I32:$Rt, I32:$Ru)>;
-// ALU32 / VH / Vector add halfwords.
-// Rd32=vadd[u]h(Rs32,Rt32:sat]
-def HEXAGON_A2_svaddh:
- si_ALU32_sisi <"vaddh", int_hexagon_A2_svaddh>;
-def HEXAGON_A2_svaddhs:
- si_ALU32_sisi_sat <"vaddh", int_hexagon_A2_svaddhs>;
-def HEXAGON_A2_svadduhs:
- si_ALU32_sisi_sat <"vadduh", int_hexagon_A2_svadduhs>;
-
-// ALU32 / VH / Vector average halfwords.
-def HEXAGON_A2_svavgh:
- si_ALU32_sisi <"vavgh", int_hexagon_A2_svavgh>;
-def HEXAGON_A2_svavghs:
- si_ALU32_sisi_rnd <"vavgh", int_hexagon_A2_svavghs>;
-def HEXAGON_A2_svnavgh:
- si_ALU32_sisi <"vnavgh", int_hexagon_A2_svnavgh>;
-
-// ALU32 / VH / Vector subtract halfwords.
-def HEXAGON_A2_svsubh:
- si_ALU32_sisi <"vsubh", int_hexagon_A2_svsubh>;
-def HEXAGON_A2_svsubhs:
- si_ALU32_sisi_sat <"vsubh", int_hexagon_A2_svsubhs>;
-def HEXAGON_A2_svsubuhs:
- si_ALU32_sisi_sat <"vsubuh", int_hexagon_A2_svsubuhs>;
+class T_PPQ_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, I64:$Rt, (i32 PredRegs:$Ru)),
+ (MI DoubleRegs:$Rs, DoubleRegs:$Rt, PredRegs:$Ru)>;
-/********************************************************************
-* ALU64/ALU *
-*********************************************************************/
+class T_PR_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, I32:$Rt),
+ (MI DoubleRegs:$Rs, I32:$Rt)>;
-// ALU64 / ALU / Add.
-def HEXAGON_A2_addp:
- di_ALU64_didi <"add", int_hexagon_A2_addp>;
-def HEXAGON_A2_addsat:
- si_ALU64_sisi_sat <"add", int_hexagon_A2_addsat>;
-
-// ALU64 / ALU / Add halfword.
-// Even though the definition says hl, it should be lh -
-//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
-def HEXAGON_A2_addh_l16_hl:
- si_ALU64_sisi_l16_lh <"add", int_hexagon_A2_addh_l16_hl>;
-def HEXAGON_A2_addh_l16_ll:
- si_ALU64_sisi_l16_ll <"add", int_hexagon_A2_addh_l16_ll>;
-
-def HEXAGON_A2_addh_l16_sat_hl:
- si_ALU64_sisi_l16_sat_lh <"add", int_hexagon_A2_addh_l16_sat_hl>;
-def HEXAGON_A2_addh_l16_sat_ll:
- si_ALU64_sisi_l16_sat_ll <"add", int_hexagon_A2_addh_l16_sat_ll>;
-
-def HEXAGON_A2_addh_h16_hh:
- si_ALU64_sisi_h16_hh <"add", int_hexagon_A2_addh_h16_hh>;
-def HEXAGON_A2_addh_h16_hl:
- si_ALU64_sisi_h16_hl <"add", int_hexagon_A2_addh_h16_hl>;
-def HEXAGON_A2_addh_h16_lh:
- si_ALU64_sisi_h16_lh <"add", int_hexagon_A2_addh_h16_lh>;
-def HEXAGON_A2_addh_h16_ll:
- si_ALU64_sisi_h16_ll <"add", int_hexagon_A2_addh_h16_ll>;
-
-def HEXAGON_A2_addh_h16_sat_hh:
- si_ALU64_sisi_h16_sat_hh <"add", int_hexagon_A2_addh_h16_sat_hh>;
-def HEXAGON_A2_addh_h16_sat_hl:
- si_ALU64_sisi_h16_sat_hl <"add", int_hexagon_A2_addh_h16_sat_hl>;
-def HEXAGON_A2_addh_h16_sat_lh:
- si_ALU64_sisi_h16_sat_lh <"add", int_hexagon_A2_addh_h16_sat_lh>;
-def HEXAGON_A2_addh_h16_sat_ll:
- si_ALU64_sisi_h16_sat_ll <"add", int_hexagon_A2_addh_h16_sat_ll>;
-
-// ALU64 / ALU / Compare.
-def HEXAGON_C2_cmpeqp:
- qi_ALU64_didi <"cmp.eq", int_hexagon_C2_cmpeqp>;
-def HEXAGON_C2_cmpgtp:
- qi_ALU64_didi <"cmp.gt", int_hexagon_C2_cmpgtp>;
-def HEXAGON_C2_cmpgtup:
- qi_ALU64_didi <"cmp.gtu", int_hexagon_C2_cmpgtup>;
-
-// ALU64 / ALU / Logical operations.
-def HEXAGON_A2_andp:
- di_ALU64_didi <"and", int_hexagon_A2_andp>;
-def HEXAGON_A2_orp:
- di_ALU64_didi <"or", int_hexagon_A2_orp>;
-def HEXAGON_A2_xorp:
- di_ALU64_didi <"xor", int_hexagon_A2_xorp>;
-
-// ALU64 / ALU / Maximum.
-def HEXAGON_A2_max:
- si_ALU64_sisi <"max", int_hexagon_A2_max>;
-def HEXAGON_A2_maxu:
- si_ALU64_sisi <"maxu", int_hexagon_A2_maxu>;
-
-// ALU64 / ALU / Minimum.
-def HEXAGON_A2_min:
- si_ALU64_sisi <"min", int_hexagon_A2_min>;
-def HEXAGON_A2_minu:
- si_ALU64_sisi <"minu", int_hexagon_A2_minu>;
-
-// ALU64 / ALU / Subtract.
-def HEXAGON_A2_subp:
- di_ALU64_didi <"sub", int_hexagon_A2_subp>;
-def HEXAGON_A2_subsat:
- si_ALU64_sisi_sat <"sub", int_hexagon_A2_subsat>;
-
-// ALU64 / ALU / Subtract halfword.
-// Even though the definition says hl, it should be lh -
-//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
-def HEXAGON_A2_subh_l16_hl:
- si_ALU64_sisi_l16_lh <"sub", int_hexagon_A2_subh_l16_hl>;
-def HEXAGON_A2_subh_l16_ll:
- si_ALU64_sisi_l16_ll <"sub", int_hexagon_A2_subh_l16_ll>;
-
-def HEXAGON_A2_subh_l16_sat_hl:
- si_ALU64_sisi_l16_sat_lh <"sub", int_hexagon_A2_subh_l16_sat_hl>;
-def HEXAGON_A2_subh_l16_sat_ll:
- si_ALU64_sisi_l16_sat_ll <"sub", int_hexagon_A2_subh_l16_sat_ll>;
-
-def HEXAGON_A2_subh_h16_hh:
- si_ALU64_sisi_h16_hh <"sub", int_hexagon_A2_subh_h16_hh>;
-def HEXAGON_A2_subh_h16_hl:
- si_ALU64_sisi_h16_hl <"sub", int_hexagon_A2_subh_h16_hl>;
-def HEXAGON_A2_subh_h16_lh:
- si_ALU64_sisi_h16_lh <"sub", int_hexagon_A2_subh_h16_lh>;
-def HEXAGON_A2_subh_h16_ll:
- si_ALU64_sisi_h16_ll <"sub", int_hexagon_A2_subh_h16_ll>;
-
-def HEXAGON_A2_subh_h16_sat_hh:
- si_ALU64_sisi_h16_sat_hh <"sub", int_hexagon_A2_subh_h16_sat_hh>;
-def HEXAGON_A2_subh_h16_sat_hl:
- si_ALU64_sisi_h16_sat_hl <"sub", int_hexagon_A2_subh_h16_sat_hl>;
-def HEXAGON_A2_subh_h16_sat_lh:
- si_ALU64_sisi_h16_sat_lh <"sub", int_hexagon_A2_subh_h16_sat_lh>;
-def HEXAGON_A2_subh_h16_sat_ll:
- si_ALU64_sisi_h16_sat_ll <"sub", int_hexagon_A2_subh_h16_sat_ll>;
-
-// ALU64 / ALU / Transfer register.
-def HEXAGON_A2_tfrp:
- di_ALU64_di <"", int_hexagon_A2_tfrp>;
+class T_D_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID (F64:$Rs)),
+ (MI (F64:$Rs))>;
-/********************************************************************
-* ALU64/BIT *
-*********************************************************************/
+class T_DI_pat <InstHexagon MI, Intrinsic IntID,
+ PatLeaf ImmPred = PatLeaf<(i32 imm)>>
+ : Pat<(IntID F64:$Rs, ImmPred:$It),
+ (MI F64:$Rs, ImmPred:$It)>;
-// ALU64 / BIT / Masked parity.
-def HEXAGON_S2_parityp:
- si_ALU64_didi <"parity", int_hexagon_S2_parityp>;
+class T_F_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID F32:$Rs),
+ (MI F32:$Rs)>;
-/********************************************************************
-* ALU64/PERM *
-*********************************************************************/
+class T_FI_pat <InstHexagon MI, Intrinsic IntID,
+ PatLeaf ImmPred = PatLeaf<(i32 imm)>>
+ : Pat<(IntID F32:$Rs, ImmPred:$It),
+ (MI F32:$Rs, ImmPred:$It)>;
-// ALU64 / PERM / Vector pack high and low halfwords.
-def HEXAGON_S2_packhl:
- di_ALU64_sisi <"packhl", int_hexagon_S2_packhl>;
+class T_FF_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID F32:$Rs, F32:$Rt),
+ (MI F32:$Rs, F32:$Rt)>;
-/********************************************************************
-* ALU64/VB *
-*********************************************************************/
+class T_DD_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID F64:$Rs, F64:$Rt),
+ (MI F64:$Rs, F64:$Rt)>;
-// ALU64 / VB / Vector add unsigned bytes.
-def HEXAGON_A2_vaddub:
- di_ALU64_didi <"vaddub", int_hexagon_A2_vaddub>;
-def HEXAGON_A2_vaddubs:
- di_ALU64_didi_sat <"vaddub", int_hexagon_A2_vaddubs>;
-
-// ALU64 / VB / Vector average unsigned bytes.
-def HEXAGON_A2_vavgub:
- di_ALU64_didi <"vavgub", int_hexagon_A2_vavgub>;
-def HEXAGON_A2_vavgubr:
- di_ALU64_didi_rnd <"vavgub", int_hexagon_A2_vavgubr>;
-
-// ALU64 / VB / Vector compare unsigned bytes.
-def HEXAGON_A2_vcmpbeq:
- qi_ALU64_didi <"vcmpb.eq", int_hexagon_A2_vcmpbeq>;
-def HEXAGON_A2_vcmpbgtu:
- qi_ALU64_didi <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>;
-
-// ALU64 / VB / Vector maximum/minimum unsigned bytes.
-def HEXAGON_A2_vmaxub:
- di_ALU64_didi <"vmaxub", int_hexagon_A2_vmaxub>;
-def HEXAGON_A2_vminub:
- di_ALU64_didi <"vminub", int_hexagon_A2_vminub>;
-
-// ALU64 / VB / Vector subtract unsigned bytes.
-def HEXAGON_A2_vsubub:
- di_ALU64_didi <"vsubub", int_hexagon_A2_vsubub>;
-def HEXAGON_A2_vsububs:
- di_ALU64_didi_sat <"vsubub", int_hexagon_A2_vsububs>;
+class T_FFF_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID F32:$Rs, F32:$Rt, F32:$Ru),
+ (MI F32:$Rs, F32:$Rt, F32:$Ru)>;
-// ALU64 / VB / Vector mux.
-def HEXAGON_C2_vmux:
- di_ALU64_qididi <"vmux", int_hexagon_C2_vmux>;
+class T_FFFQ_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID F32:$Rs, F32:$Rt, F32:$Ru, (i32 PredRegs:$Rx)),
+ (MI F32:$Rs, F32:$Rt, F32:$Ru, PredRegs:$Rx)>;
+//===----------------------------------------------------------------------===//
+// MPYS / Multipy signed/unsigned halfwords
+//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
+//===----------------------------------------------------------------------===//
-/********************************************************************
-* ALU64/VH *
-*********************************************************************/
+def : T_RR_pat <M2_mpy_ll_s1, int_hexagon_M2_mpy_ll_s1>;
+def : T_RR_pat <M2_mpy_ll_s0, int_hexagon_M2_mpy_ll_s0>;
+def : T_RR_pat <M2_mpy_lh_s1, int_hexagon_M2_mpy_lh_s1>;
+def : T_RR_pat <M2_mpy_lh_s0, int_hexagon_M2_mpy_lh_s0>;
+def : T_RR_pat <M2_mpy_hl_s1, int_hexagon_M2_mpy_hl_s1>;
+def : T_RR_pat <M2_mpy_hl_s0, int_hexagon_M2_mpy_hl_s0>;
+def : T_RR_pat <M2_mpy_hh_s1, int_hexagon_M2_mpy_hh_s1>;
+def : T_RR_pat <M2_mpy_hh_s0, int_hexagon_M2_mpy_hh_s0>;
+
+def : T_RR_pat <M2_mpyu_ll_s1, int_hexagon_M2_mpyu_ll_s1>;
+def : T_RR_pat <M2_mpyu_ll_s0, int_hexagon_M2_mpyu_ll_s0>;
+def : T_RR_pat <M2_mpyu_lh_s1, int_hexagon_M2_mpyu_lh_s1>;
+def : T_RR_pat <M2_mpyu_lh_s0, int_hexagon_M2_mpyu_lh_s0>;
+def : T_RR_pat <M2_mpyu_hl_s1, int_hexagon_M2_mpyu_hl_s1>;
+def : T_RR_pat <M2_mpyu_hl_s0, int_hexagon_M2_mpyu_hl_s0>;
+def : T_RR_pat <M2_mpyu_hh_s1, int_hexagon_M2_mpyu_hh_s1>;
+def : T_RR_pat <M2_mpyu_hh_s0, int_hexagon_M2_mpyu_hh_s0>;
+
+def : T_RR_pat <M2_mpy_sat_ll_s1, int_hexagon_M2_mpy_sat_ll_s1>;
+def : T_RR_pat <M2_mpy_sat_ll_s0, int_hexagon_M2_mpy_sat_ll_s0>;
+def : T_RR_pat <M2_mpy_sat_lh_s1, int_hexagon_M2_mpy_sat_lh_s1>;
+def : T_RR_pat <M2_mpy_sat_lh_s0, int_hexagon_M2_mpy_sat_lh_s0>;
+def : T_RR_pat <M2_mpy_sat_hl_s1, int_hexagon_M2_mpy_sat_hl_s1>;
+def : T_RR_pat <M2_mpy_sat_hl_s0, int_hexagon_M2_mpy_sat_hl_s0>;
+def : T_RR_pat <M2_mpy_sat_hh_s1, int_hexagon_M2_mpy_sat_hh_s1>;
+def : T_RR_pat <M2_mpy_sat_hh_s0, int_hexagon_M2_mpy_sat_hh_s0>;
+
+def : T_RR_pat <M2_mpy_rnd_ll_s1, int_hexagon_M2_mpy_rnd_ll_s1>;
+def : T_RR_pat <M2_mpy_rnd_ll_s0, int_hexagon_M2_mpy_rnd_ll_s0>;
+def : T_RR_pat <M2_mpy_rnd_lh_s1, int_hexagon_M2_mpy_rnd_lh_s1>;
+def : T_RR_pat <M2_mpy_rnd_lh_s0, int_hexagon_M2_mpy_rnd_lh_s0>;
+def : T_RR_pat <M2_mpy_rnd_hl_s1, int_hexagon_M2_mpy_rnd_hl_s1>;
+def : T_RR_pat <M2_mpy_rnd_hl_s0, int_hexagon_M2_mpy_rnd_hl_s0>;
+def : T_RR_pat <M2_mpy_rnd_hh_s1, int_hexagon_M2_mpy_rnd_hh_s1>;
+def : T_RR_pat <M2_mpy_rnd_hh_s0, int_hexagon_M2_mpy_rnd_hh_s0>;
+
+def : T_RR_pat <M2_mpy_sat_rnd_ll_s1, int_hexagon_M2_mpy_sat_rnd_ll_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_ll_s0, int_hexagon_M2_mpy_sat_rnd_ll_s0>;
+def : T_RR_pat <M2_mpy_sat_rnd_lh_s1, int_hexagon_M2_mpy_sat_rnd_lh_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_lh_s0, int_hexagon_M2_mpy_sat_rnd_lh_s0>;
+def : T_RR_pat <M2_mpy_sat_rnd_hl_s1, int_hexagon_M2_mpy_sat_rnd_hl_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_hl_s0, int_hexagon_M2_mpy_sat_rnd_hl_s0>;
+def : T_RR_pat <M2_mpy_sat_rnd_hh_s1, int_hexagon_M2_mpy_sat_rnd_hh_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_hh_s0, int_hexagon_M2_mpy_sat_rnd_hh_s0>;
-// ALU64 / VH / Vector add halfwords.
-// Rdd64=vadd[u]h(Rss64,Rtt64:sat]
-def HEXAGON_A2_vaddh:
- di_ALU64_didi <"vaddh", int_hexagon_A2_vaddh>;
-def HEXAGON_A2_vaddhs:
- di_ALU64_didi_sat <"vaddh", int_hexagon_A2_vaddhs>;
-def HEXAGON_A2_vadduhs:
- di_ALU64_didi_sat <"vadduh", int_hexagon_A2_vadduhs>;
-
-// ALU64 / VH / Vector average halfwords.
-// Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat]
-def HEXAGON_A2_vavgh:
- di_ALU64_didi <"vavgh", int_hexagon_A2_vavgh>;
-def HEXAGON_A2_vavghcr:
- di_ALU64_didi_crnd <"vavgh", int_hexagon_A2_vavghcr>;
-def HEXAGON_A2_vavghr:
- di_ALU64_didi_rnd <"vavgh", int_hexagon_A2_vavghr>;
-def HEXAGON_A2_vavguh:
- di_ALU64_didi <"vavguh", int_hexagon_A2_vavguh>;
-def HEXAGON_A2_vavguhr:
- di_ALU64_didi_rnd <"vavguh", int_hexagon_A2_vavguhr>;
-def HEXAGON_A2_vnavgh:
- di_ALU64_didi <"vnavgh", int_hexagon_A2_vnavgh>;
-def HEXAGON_A2_vnavghcr:
- di_ALU64_didi_crnd_sat <"vnavgh", int_hexagon_A2_vnavghcr>;
-def HEXAGON_A2_vnavghr:
- di_ALU64_didi_rnd_sat <"vnavgh", int_hexagon_A2_vnavghr>;
-
-// ALU64 / VH / Vector compare halfwords.
-def HEXAGON_A2_vcmpheq:
- qi_ALU64_didi <"vcmph.eq", int_hexagon_A2_vcmpheq>;
-def HEXAGON_A2_vcmphgt:
- qi_ALU64_didi <"vcmph.gt", int_hexagon_A2_vcmphgt>;
-def HEXAGON_A2_vcmphgtu:
- qi_ALU64_didi <"vcmph.gtu",int_hexagon_A2_vcmphgtu>;
-
-// ALU64 / VH / Vector maximum halfwords.
-def HEXAGON_A2_vmaxh:
- di_ALU64_didi <"vmaxh", int_hexagon_A2_vmaxh>;
-def HEXAGON_A2_vmaxuh:
- di_ALU64_didi <"vmaxuh", int_hexagon_A2_vmaxuh>;
-
-// ALU64 / VH / Vector minimum halfwords.
-def HEXAGON_A2_vminh:
- di_ALU64_didi <"vminh", int_hexagon_A2_vminh>;
-def HEXAGON_A2_vminuh:
- di_ALU64_didi <"vminuh", int_hexagon_A2_vminuh>;
-
-// ALU64 / VH / Vector subtract halfwords.
-def HEXAGON_A2_vsubh:
- di_ALU64_didi <"vsubh", int_hexagon_A2_vsubh>;
-def HEXAGON_A2_vsubhs:
- di_ALU64_didi_sat <"vsubh", int_hexagon_A2_vsubhs>;
-def HEXAGON_A2_vsubuhs:
- di_ALU64_didi_sat <"vsubuh", int_hexagon_A2_vsubuhs>;
+//===----------------------------------------------------------------------===//
+// MPYS / Multipy signed/unsigned halfwords and add/subtract the
+// result from the accumulator.
+//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//===----------------------------------------------------------------------===//
-/********************************************************************
-* ALU64/VW *
-*********************************************************************/
+def : T_RRR_pat <M2_mpy_acc_ll_s1, int_hexagon_M2_mpy_acc_ll_s1>;
+def : T_RRR_pat <M2_mpy_acc_ll_s0, int_hexagon_M2_mpy_acc_ll_s0>;
+def : T_RRR_pat <M2_mpy_acc_lh_s1, int_hexagon_M2_mpy_acc_lh_s1>;
+def : T_RRR_pat <M2_mpy_acc_lh_s0, int_hexagon_M2_mpy_acc_lh_s0>;
+def : T_RRR_pat <M2_mpy_acc_hl_s1, int_hexagon_M2_mpy_acc_hl_s1>;
+def : T_RRR_pat <M2_mpy_acc_hl_s0, int_hexagon_M2_mpy_acc_hl_s0>;
+def : T_RRR_pat <M2_mpy_acc_hh_s1, int_hexagon_M2_mpy_acc_hh_s1>;
+def : T_RRR_pat <M2_mpy_acc_hh_s0, int_hexagon_M2_mpy_acc_hh_s0>;
+
+def : T_RRR_pat <M2_mpyu_acc_ll_s1, int_hexagon_M2_mpyu_acc_ll_s1>;
+def : T_RRR_pat <M2_mpyu_acc_ll_s0, int_hexagon_M2_mpyu_acc_ll_s0>;
+def : T_RRR_pat <M2_mpyu_acc_lh_s1, int_hexagon_M2_mpyu_acc_lh_s1>;
+def : T_RRR_pat <M2_mpyu_acc_lh_s0, int_hexagon_M2_mpyu_acc_lh_s0>;
+def : T_RRR_pat <M2_mpyu_acc_hl_s1, int_hexagon_M2_mpyu_acc_hl_s1>;
+def : T_RRR_pat <M2_mpyu_acc_hl_s0, int_hexagon_M2_mpyu_acc_hl_s0>;
+def : T_RRR_pat <M2_mpyu_acc_hh_s1, int_hexagon_M2_mpyu_acc_hh_s1>;
+def : T_RRR_pat <M2_mpyu_acc_hh_s0, int_hexagon_M2_mpyu_acc_hh_s0>;
+
+def : T_RRR_pat <M2_mpy_nac_ll_s1, int_hexagon_M2_mpy_nac_ll_s1>;
+def : T_RRR_pat <M2_mpy_nac_ll_s0, int_hexagon_M2_mpy_nac_ll_s0>;
+def : T_RRR_pat <M2_mpy_nac_lh_s1, int_hexagon_M2_mpy_nac_lh_s1>;
+def : T_RRR_pat <M2_mpy_nac_lh_s0, int_hexagon_M2_mpy_nac_lh_s0>;
+def : T_RRR_pat <M2_mpy_nac_hl_s1, int_hexagon_M2_mpy_nac_hl_s1>;
+def : T_RRR_pat <M2_mpy_nac_hl_s0, int_hexagon_M2_mpy_nac_hl_s0>;
+def : T_RRR_pat <M2_mpy_nac_hh_s1, int_hexagon_M2_mpy_nac_hh_s1>;
+def : T_RRR_pat <M2_mpy_nac_hh_s0, int_hexagon_M2_mpy_nac_hh_s0>;
+
+def : T_RRR_pat <M2_mpyu_nac_ll_s1, int_hexagon_M2_mpyu_nac_ll_s1>;
+def : T_RRR_pat <M2_mpyu_nac_ll_s0, int_hexagon_M2_mpyu_nac_ll_s0>;
+def : T_RRR_pat <M2_mpyu_nac_lh_s1, int_hexagon_M2_mpyu_nac_lh_s1>;
+def : T_RRR_pat <M2_mpyu_nac_lh_s0, int_hexagon_M2_mpyu_nac_lh_s0>;
+def : T_RRR_pat <M2_mpyu_nac_hl_s1, int_hexagon_M2_mpyu_nac_hl_s1>;
+def : T_RRR_pat <M2_mpyu_nac_hl_s0, int_hexagon_M2_mpyu_nac_hl_s0>;
+def : T_RRR_pat <M2_mpyu_nac_hh_s1, int_hexagon_M2_mpyu_nac_hh_s1>;
+def : T_RRR_pat <M2_mpyu_nac_hh_s0, int_hexagon_M2_mpyu_nac_hh_s0>;
+
+def : T_RRR_pat <M2_mpy_acc_sat_ll_s1, int_hexagon_M2_mpy_acc_sat_ll_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_ll_s0, int_hexagon_M2_mpy_acc_sat_ll_s0>;
+def : T_RRR_pat <M2_mpy_acc_sat_lh_s1, int_hexagon_M2_mpy_acc_sat_lh_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_lh_s0, int_hexagon_M2_mpy_acc_sat_lh_s0>;
+def : T_RRR_pat <M2_mpy_acc_sat_hl_s1, int_hexagon_M2_mpy_acc_sat_hl_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_hl_s0, int_hexagon_M2_mpy_acc_sat_hl_s0>;
+def : T_RRR_pat <M2_mpy_acc_sat_hh_s1, int_hexagon_M2_mpy_acc_sat_hh_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_hh_s0, int_hexagon_M2_mpy_acc_sat_hh_s0>;
+
+def : T_RRR_pat <M2_mpy_nac_sat_ll_s1, int_hexagon_M2_mpy_nac_sat_ll_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_ll_s0, int_hexagon_M2_mpy_nac_sat_ll_s0>;
+def : T_RRR_pat <M2_mpy_nac_sat_lh_s1, int_hexagon_M2_mpy_nac_sat_lh_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_lh_s0, int_hexagon_M2_mpy_nac_sat_lh_s0>;
+def : T_RRR_pat <M2_mpy_nac_sat_hl_s1, int_hexagon_M2_mpy_nac_sat_hl_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_hl_s0, int_hexagon_M2_mpy_nac_sat_hl_s0>;
+def : T_RRR_pat <M2_mpy_nac_sat_hh_s1, int_hexagon_M2_mpy_nac_sat_hh_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_hh_s0, int_hexagon_M2_mpy_nac_sat_hh_s0>;
-// ALU64 / VW / Vector add words.
-// Rdd32=vaddw(Rss32,Rtt32)[:sat]
-def HEXAGON_A2_vaddw:
- di_ALU64_didi <"vaddw", int_hexagon_A2_vaddw>;
-def HEXAGON_A2_vaddws:
- di_ALU64_didi_sat <"vaddw", int_hexagon_A2_vaddws>;
-
-// ALU64 / VW / Vector average words.
-def HEXAGON_A2_vavguw:
- di_ALU64_didi <"vavguw", int_hexagon_A2_vavguw>;
-def HEXAGON_A2_vavguwr:
- di_ALU64_didi_rnd <"vavguw", int_hexagon_A2_vavguwr>;
-def HEXAGON_A2_vavgw:
- di_ALU64_didi <"vavgw", int_hexagon_A2_vavgw>;
-def HEXAGON_A2_vavgwcr:
- di_ALU64_didi_crnd <"vavgw", int_hexagon_A2_vavgwcr>;
-def HEXAGON_A2_vavgwr:
- di_ALU64_didi_rnd <"vavgw", int_hexagon_A2_vavgwr>;
-def HEXAGON_A2_vnavgw:
- di_ALU64_didi <"vnavgw", int_hexagon_A2_vnavgw>;
-def HEXAGON_A2_vnavgwcr:
- di_ALU64_didi_crnd_sat <"vnavgw", int_hexagon_A2_vnavgwcr>;
-def HEXAGON_A2_vnavgwr:
- di_ALU64_didi_rnd_sat <"vnavgw", int_hexagon_A2_vnavgwr>;
-
-// ALU64 / VW / Vector compare words.
-def HEXAGON_A2_vcmpweq:
- qi_ALU64_didi <"vcmpw.eq", int_hexagon_A2_vcmpweq>;
-def HEXAGON_A2_vcmpwgt:
- qi_ALU64_didi <"vcmpw.gt", int_hexagon_A2_vcmpwgt>;
-def HEXAGON_A2_vcmpwgtu:
- qi_ALU64_didi <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>;
-
-// ALU64 / VW / Vector maximum words.
-def HEXAGON_A2_vmaxw:
- di_ALU64_didi <"vmaxw", int_hexagon_A2_vmaxw>;
-def HEXAGON_A2_vmaxuw:
- di_ALU64_didi <"vmaxuw", int_hexagon_A2_vmaxuw>;
-
-// ALU64 / VW / Vector minimum words.
-def HEXAGON_A2_vminw:
- di_ALU64_didi <"vminw", int_hexagon_A2_vminw>;
-def HEXAGON_A2_vminuw:
- di_ALU64_didi <"vminuw", int_hexagon_A2_vminuw>;
-
-// ALU64 / VW / Vector subtract words.
-def HEXAGON_A2_vsubw:
- di_ALU64_didi <"vsubw", int_hexagon_A2_vsubw>;
-def HEXAGON_A2_vsubws:
- di_ALU64_didi_sat <"vsubw", int_hexagon_A2_vsubws>;
+//===----------------------------------------------------------------------===//
+// Multiply signed/unsigned halfwords with and without saturation and rounding
+// into a 64-bits destination register.
+//===----------------------------------------------------------------------===//
-/********************************************************************
-* CR *
-*********************************************************************/
+def : T_RR_pat <M2_mpyd_hh_s0, int_hexagon_M2_mpyd_hh_s0>;
+def : T_RR_pat <M2_mpyd_hl_s0, int_hexagon_M2_mpyd_hl_s0>;
+def : T_RR_pat <M2_mpyd_lh_s0, int_hexagon_M2_mpyd_lh_s0>;
+def : T_RR_pat <M2_mpyd_ll_s0, int_hexagon_M2_mpyd_ll_s0>;
+def : T_RR_pat <M2_mpyd_hh_s1, int_hexagon_M2_mpyd_hh_s1>;
+def : T_RR_pat <M2_mpyd_hl_s1, int_hexagon_M2_mpyd_hl_s1>;
+def : T_RR_pat <M2_mpyd_lh_s1, int_hexagon_M2_mpyd_lh_s1>;
+def : T_RR_pat <M2_mpyd_ll_s1, int_hexagon_M2_mpyd_ll_s1>;
+
+def : T_RR_pat <M2_mpyd_rnd_hh_s0, int_hexagon_M2_mpyd_rnd_hh_s0>;
+def : T_RR_pat <M2_mpyd_rnd_hl_s0, int_hexagon_M2_mpyd_rnd_hl_s0>;
+def : T_RR_pat <M2_mpyd_rnd_lh_s0, int_hexagon_M2_mpyd_rnd_lh_s0>;
+def : T_RR_pat <M2_mpyd_rnd_ll_s0, int_hexagon_M2_mpyd_rnd_ll_s0>;
+def : T_RR_pat <M2_mpyd_rnd_hh_s1, int_hexagon_M2_mpyd_rnd_hh_s1>;
+def : T_RR_pat <M2_mpyd_rnd_hl_s1, int_hexagon_M2_mpyd_rnd_hl_s1>;
+def : T_RR_pat <M2_mpyd_rnd_lh_s1, int_hexagon_M2_mpyd_rnd_lh_s1>;
+def : T_RR_pat <M2_mpyd_rnd_ll_s1, int_hexagon_M2_mpyd_rnd_ll_s1>;
+
+def : T_RR_pat <M2_mpyud_hh_s0, int_hexagon_M2_mpyud_hh_s0>;
+def : T_RR_pat <M2_mpyud_hl_s0, int_hexagon_M2_mpyud_hl_s0>;
+def : T_RR_pat <M2_mpyud_lh_s0, int_hexagon_M2_mpyud_lh_s0>;
+def : T_RR_pat <M2_mpyud_ll_s0, int_hexagon_M2_mpyud_ll_s0>;
+def : T_RR_pat <M2_mpyud_hh_s1, int_hexagon_M2_mpyud_hh_s1>;
+def : T_RR_pat <M2_mpyud_hl_s1, int_hexagon_M2_mpyud_hl_s1>;
+def : T_RR_pat <M2_mpyud_lh_s1, int_hexagon_M2_mpyud_lh_s1>;
+def : T_RR_pat <M2_mpyud_ll_s1, int_hexagon_M2_mpyud_ll_s1>;
+
+//===----------------------------------------------------------------------===//
+// MPYS / Multipy signed/unsigned halfwords and add/subtract the
+// result from the 64-bit destination register.
+//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//===----------------------------------------------------------------------===//
-// CR / Logical reductions on predicates.
-def HEXAGON_C2_all8:
- qi_SInst_qi <"all8", int_hexagon_C2_all8>;
-def HEXAGON_C2_any8:
- qi_SInst_qi <"any8", int_hexagon_C2_any8>;
-
-// CR / Logical operations on predicates.
-def HEXAGON_C2_pxfer_map:
- qi_SInst_qi_pxfer <"", int_hexagon_C2_pxfer_map>;
-def HEXAGON_C2_and:
- qi_SInst_qiqi <"and", int_hexagon_C2_and>;
-def HEXAGON_C2_andn:
- qi_SInst_qiqi_neg <"and", int_hexagon_C2_andn>;
-def HEXAGON_C2_not:
- qi_SInst_qi <"not", int_hexagon_C2_not>;
-def HEXAGON_C2_or:
- qi_SInst_qiqi <"or", int_hexagon_C2_or>;
-def HEXAGON_C2_orn:
- qi_SInst_qiqi_neg <"or", int_hexagon_C2_orn>;
-def HEXAGON_C2_xor:
- qi_SInst_qiqi <"xor", int_hexagon_C2_xor>;
+def : T_PRR_pat <M2_mpyd_acc_hh_s0, int_hexagon_M2_mpyd_acc_hh_s0>;
+def : T_PRR_pat <M2_mpyd_acc_hl_s0, int_hexagon_M2_mpyd_acc_hl_s0>;
+def : T_PRR_pat <M2_mpyd_acc_lh_s0, int_hexagon_M2_mpyd_acc_lh_s0>;
+def : T_PRR_pat <M2_mpyd_acc_ll_s0, int_hexagon_M2_mpyd_acc_ll_s0>;
+
+def : T_PRR_pat <M2_mpyd_acc_hh_s1, int_hexagon_M2_mpyd_acc_hh_s1>;
+def : T_PRR_pat <M2_mpyd_acc_hl_s1, int_hexagon_M2_mpyd_acc_hl_s1>;
+def : T_PRR_pat <M2_mpyd_acc_lh_s1, int_hexagon_M2_mpyd_acc_lh_s1>;
+def : T_PRR_pat <M2_mpyd_acc_ll_s1, int_hexagon_M2_mpyd_acc_ll_s1>;
+
+def : T_PRR_pat <M2_mpyd_nac_hh_s0, int_hexagon_M2_mpyd_nac_hh_s0>;
+def : T_PRR_pat <M2_mpyd_nac_hl_s0, int_hexagon_M2_mpyd_nac_hl_s0>;
+def : T_PRR_pat <M2_mpyd_nac_lh_s0, int_hexagon_M2_mpyd_nac_lh_s0>;
+def : T_PRR_pat <M2_mpyd_nac_ll_s0, int_hexagon_M2_mpyd_nac_ll_s0>;
+
+def : T_PRR_pat <M2_mpyd_nac_hh_s1, int_hexagon_M2_mpyd_nac_hh_s1>;
+def : T_PRR_pat <M2_mpyd_nac_hl_s1, int_hexagon_M2_mpyd_nac_hl_s1>;
+def : T_PRR_pat <M2_mpyd_nac_lh_s1, int_hexagon_M2_mpyd_nac_lh_s1>;
+def : T_PRR_pat <M2_mpyd_nac_ll_s1, int_hexagon_M2_mpyd_nac_ll_s1>;
+
+def : T_PRR_pat <M2_mpyud_acc_hh_s0, int_hexagon_M2_mpyud_acc_hh_s0>;
+def : T_PRR_pat <M2_mpyud_acc_hl_s0, int_hexagon_M2_mpyud_acc_hl_s0>;
+def : T_PRR_pat <M2_mpyud_acc_lh_s0, int_hexagon_M2_mpyud_acc_lh_s0>;
+def : T_PRR_pat <M2_mpyud_acc_ll_s0, int_hexagon_M2_mpyud_acc_ll_s0>;
+
+def : T_PRR_pat <M2_mpyud_acc_hh_s1, int_hexagon_M2_mpyud_acc_hh_s1>;
+def : T_PRR_pat <M2_mpyud_acc_hl_s1, int_hexagon_M2_mpyud_acc_hl_s1>;
+def : T_PRR_pat <M2_mpyud_acc_lh_s1, int_hexagon_M2_mpyud_acc_lh_s1>;
+def : T_PRR_pat <M2_mpyud_acc_ll_s1, int_hexagon_M2_mpyud_acc_ll_s1>;
+
+def : T_PRR_pat <M2_mpyud_nac_hh_s0, int_hexagon_M2_mpyud_nac_hh_s0>;
+def : T_PRR_pat <M2_mpyud_nac_hl_s0, int_hexagon_M2_mpyud_nac_hl_s0>;
+def : T_PRR_pat <M2_mpyud_nac_lh_s0, int_hexagon_M2_mpyud_nac_lh_s0>;
+def : T_PRR_pat <M2_mpyud_nac_ll_s0, int_hexagon_M2_mpyud_nac_ll_s0>;
+
+def : T_PRR_pat <M2_mpyud_nac_hh_s1, int_hexagon_M2_mpyud_nac_hh_s1>;
+def : T_PRR_pat <M2_mpyud_nac_hl_s1, int_hexagon_M2_mpyud_nac_hl_s1>;
+def : T_PRR_pat <M2_mpyud_nac_lh_s1, int_hexagon_M2_mpyud_nac_lh_s1>;
+def : T_PRR_pat <M2_mpyud_nac_ll_s1, int_hexagon_M2_mpyud_nac_ll_s1>;
+
+// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vcmpy_s1_sat_i, int_hexagon_M2_vcmpy_s1_sat_i>;
+def : T_PP_pat <M2_vcmpy_s0_sat_i, int_hexagon_M2_vcmpy_s0_sat_i>;
+
+// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vcmpy_s1_sat_r, int_hexagon_M2_vcmpy_s1_sat_r>;
+def : T_PP_pat <M2_vcmpy_s0_sat_r, int_hexagon_M2_vcmpy_s0_sat_r>;
+
+// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vdmpys_s1, int_hexagon_M2_vdmpys_s1>;
+def : T_PP_pat <M2_vdmpys_s0, int_hexagon_M2_vdmpys_s0>;
+
+// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vmpy2es_s1, int_hexagon_M2_vmpy2es_s1>;
+def : T_PP_pat <M2_vmpy2es_s0, int_hexagon_M2_vmpy2es_s0>;
+
+//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyh_s0, int_hexagon_M2_mmpyh_s0>;
+def : T_PP_pat <M2_mmpyh_s1, int_hexagon_M2_mmpyh_s1>;
+def : T_PP_pat <M2_mmpyh_rs0, int_hexagon_M2_mmpyh_rs0>;
+def : T_PP_pat <M2_mmpyh_rs1, int_hexagon_M2_mmpyh_rs1>;
+
+//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyl_s0, int_hexagon_M2_mmpyl_s0>;
+def : T_PP_pat <M2_mmpyl_s1, int_hexagon_M2_mmpyl_s1>;
+def : T_PP_pat <M2_mmpyl_rs0, int_hexagon_M2_mmpyl_rs0>;
+def : T_PP_pat <M2_mmpyl_rs1, int_hexagon_M2_mmpyl_rs1>;
+
+//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyuh_s0, int_hexagon_M2_mmpyuh_s0>;
+def : T_PP_pat <M2_mmpyuh_s1, int_hexagon_M2_mmpyuh_s1>;
+def : T_PP_pat <M2_mmpyuh_rs0, int_hexagon_M2_mmpyuh_rs0>;
+def : T_PP_pat <M2_mmpyuh_rs1, int_hexagon_M2_mmpyuh_rs1>;
+
+//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyul_s0, int_hexagon_M2_mmpyul_s0>;
+def : T_PP_pat <M2_mmpyul_s1, int_hexagon_M2_mmpyul_s1>;
+def : T_PP_pat <M2_mmpyul_rs0, int_hexagon_M2_mmpyul_rs0>;
+def : T_PP_pat <M2_mmpyul_rs1, int_hexagon_M2_mmpyul_rs1>;
+
+// Vector reduce add unsigned bytes: Rdd32[+]=vrmpybu(Rss32,Rtt32)
+def : T_PP_pat <A2_vraddub, int_hexagon_A2_vraddub>;
+def : T_PPP_pat <A2_vraddub_acc, int_hexagon_A2_vraddub_acc>;
+
+// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt)
+def : T_PP_pat <A2_vrsadub, int_hexagon_A2_vrsadub>;
+def : T_PPP_pat <A2_vrsadub_acc, int_hexagon_A2_vrsadub_acc>;
+
+// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss)
+def : T_PP_pat <M2_vabsdiffh, int_hexagon_M2_vabsdiffh>;
+
+// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss)
+def : T_PP_pat <M2_vabsdiffw, int_hexagon_M2_vabsdiffw>;
+
+// Vector reduce complex multiply real or imaginary:
+// Rdd[+]=vrcmpy[ir](Rss,Rtt[*])
+def : T_PP_pat <M2_vrcmpyi_s0, int_hexagon_M2_vrcmpyi_s0>;
+def : T_PP_pat <M2_vrcmpyi_s0c, int_hexagon_M2_vrcmpyi_s0c>;
+def : T_PPP_pat <M2_vrcmaci_s0, int_hexagon_M2_vrcmaci_s0>;
+def : T_PPP_pat <M2_vrcmaci_s0c, int_hexagon_M2_vrcmaci_s0c>;
+
+def : T_PP_pat <M2_vrcmpyr_s0, int_hexagon_M2_vrcmpyr_s0>;
+def : T_PP_pat <M2_vrcmpyr_s0c, int_hexagon_M2_vrcmpyr_s0c>;
+def : T_PPP_pat <M2_vrcmacr_s0, int_hexagon_M2_vrcmacr_s0>;
+def : T_PPP_pat <M2_vrcmacr_s0c, int_hexagon_M2_vrcmacr_s0c>;
+
+// Vector reduce halfwords
+// Rdd[+]=vrmpyh(Rss,Rtt)
+def : T_PP_pat <M2_vrmpy_s0, int_hexagon_M2_vrmpy_s0>;
+def : T_PPP_pat <M2_vrmac_s0, int_hexagon_M2_vrmac_s0>;
+//===----------------------------------------------------------------------===//
+// Vector Multipy with accumulation
+//===----------------------------------------------------------------------===//
+
+// Vector multiply word by signed half with accumulation
+// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PPP_pat <M2_mmacls_s1, int_hexagon_M2_mmacls_s1>;
+def : T_PPP_pat <M2_mmacls_s0, int_hexagon_M2_mmacls_s0>;
+def : T_PPP_pat <M2_mmacls_rs1, int_hexagon_M2_mmacls_rs1>;
+def : T_PPP_pat <M2_mmacls_rs0, int_hexagon_M2_mmacls_rs0>;
+def : T_PPP_pat <M2_mmachs_s1, int_hexagon_M2_mmachs_s1>;
+def : T_PPP_pat <M2_mmachs_s0, int_hexagon_M2_mmachs_s0>;
+def : T_PPP_pat <M2_mmachs_rs1, int_hexagon_M2_mmachs_rs1>;
+def : T_PPP_pat <M2_mmachs_rs0, int_hexagon_M2_mmachs_rs0>;
+
+// Vector multiply word by unsigned half with accumulation
+// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PPP_pat <M2_mmaculs_s1, int_hexagon_M2_mmaculs_s1>;
+def : T_PPP_pat <M2_mmaculs_s0, int_hexagon_M2_mmaculs_s0>;
+def : T_PPP_pat <M2_mmaculs_rs1, int_hexagon_M2_mmaculs_rs1>;
+def : T_PPP_pat <M2_mmaculs_rs0, int_hexagon_M2_mmaculs_rs0>;
+def : T_PPP_pat <M2_mmacuhs_s1, int_hexagon_M2_mmacuhs_s1>;
+def : T_PPP_pat <M2_mmacuhs_s0, int_hexagon_M2_mmacuhs_s0>;
+def : T_PPP_pat <M2_mmacuhs_rs1, int_hexagon_M2_mmacuhs_rs1>;
+def : T_PPP_pat <M2_mmacuhs_rs0, int_hexagon_M2_mmacuhs_rs0>;
+
+// Vector multiply even halfwords with accumulation
+// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat]
+def : T_PPP_pat <M2_vmac2es, int_hexagon_M2_vmac2es>;
+def : T_PPP_pat <M2_vmac2es_s1, int_hexagon_M2_vmac2es_s1>;
+def : T_PPP_pat <M2_vmac2es_s0, int_hexagon_M2_vmac2es_s0>;
+
+// Vector dual multiply with accumulation
+// Rxx+=vdmpy(Rss,Rtt)[:sat]
+def : T_PPP_pat <M2_vdmacs_s1, int_hexagon_M2_vdmacs_s1>;
+def : T_PPP_pat <M2_vdmacs_s0, int_hexagon_M2_vdmacs_s0>;
+
+// Vector complex multiply real or imaginary with accumulation
+// Rxx+=vcmpy[ir](Rss,Rtt):sat
+def : T_PPP_pat <M2_vcmac_s0_sat_r, int_hexagon_M2_vcmac_s0_sat_r>;
+def : T_PPP_pat <M2_vcmac_s0_sat_i, int_hexagon_M2_vcmac_s0_sat_i>;
+
+//===----------------------------------------------------------------------===//
+// Add/Subtract halfword
+// Rd=add(Rt.L,Rs.[HL])[:sat]
+// Rd=sub(Rt.L,Rs.[HL])[:sat]
+// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16]
+// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16]
+//===----------------------------------------------------------------------===//
+
+//Rd=add(Rt.L,Rs.[LH])
+def : T_RR_pat <A2_addh_l16_ll, int_hexagon_A2_addh_l16_ll>;
+def : T_RR_pat <A2_addh_l16_hl, int_hexagon_A2_addh_l16_hl>;
+
+//Rd=add(Rt.L,Rs.[LH]):sat
+def : T_RR_pat <A2_addh_l16_sat_ll, int_hexagon_A2_addh_l16_sat_ll>;
+def : T_RR_pat <A2_addh_l16_sat_hl, int_hexagon_A2_addh_l16_sat_hl>;
+
+//Rd=sub(Rt.L,Rs.[LH])
+def : T_RR_pat <A2_subh_l16_ll, int_hexagon_A2_subh_l16_ll>;
+def : T_RR_pat <A2_subh_l16_hl, int_hexagon_A2_subh_l16_hl>;
+
+//Rd=sub(Rt.L,Rs.[LH]):sat
+def : T_RR_pat <A2_subh_l16_sat_ll, int_hexagon_A2_subh_l16_sat_ll>;
+def : T_RR_pat <A2_subh_l16_sat_hl, int_hexagon_A2_subh_l16_sat_hl>;
+
+//Rd=add(Rt.[LH],Rs.[LH]):<<16
+def : T_RR_pat <A2_addh_h16_ll, int_hexagon_A2_addh_h16_ll>;
+def : T_RR_pat <A2_addh_h16_lh, int_hexagon_A2_addh_h16_lh>;
+def : T_RR_pat <A2_addh_h16_hl, int_hexagon_A2_addh_h16_hl>;
+def : T_RR_pat <A2_addh_h16_hh, int_hexagon_A2_addh_h16_hh>;
+
+//Rd=sub(Rt.[LH],Rs.[LH]):<<16
+def : T_RR_pat <A2_subh_h16_ll, int_hexagon_A2_subh_h16_ll>;
+def : T_RR_pat <A2_subh_h16_lh, int_hexagon_A2_subh_h16_lh>;
+def : T_RR_pat <A2_subh_h16_hl, int_hexagon_A2_subh_h16_hl>;
+def : T_RR_pat <A2_subh_h16_hh, int_hexagon_A2_subh_h16_hh>;
+
+//Rd=add(Rt.[LH],Rs.[LH]):sat:<<16
+def : T_RR_pat <A2_addh_h16_sat_ll, int_hexagon_A2_addh_h16_sat_ll>;
+def : T_RR_pat <A2_addh_h16_sat_lh, int_hexagon_A2_addh_h16_sat_lh>;
+def : T_RR_pat <A2_addh_h16_sat_hl, int_hexagon_A2_addh_h16_sat_hl>;
+def : T_RR_pat <A2_addh_h16_sat_hh, int_hexagon_A2_addh_h16_sat_hh>;
+
+//Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16
+def : T_RR_pat <A2_subh_h16_sat_ll, int_hexagon_A2_subh_h16_sat_ll>;
+def : T_RR_pat <A2_subh_h16_sat_lh, int_hexagon_A2_subh_h16_sat_lh>;
+def : T_RR_pat <A2_subh_h16_sat_hl, int_hexagon_A2_subh_h16_sat_hl>;
+def : T_RR_pat <A2_subh_h16_sat_hh, int_hexagon_A2_subh_h16_sat_hh>;
+
+// ALU64 / ALU / min max
+def : T_RR_pat<A2_max, int_hexagon_A2_max>;
+def : T_RR_pat<A2_min, int_hexagon_A2_min>;
+def : T_RR_pat<A2_maxu, int_hexagon_A2_maxu>;
+def : T_RR_pat<A2_minu, int_hexagon_A2_minu>;
+
+// Shift and accumulate
+def : T_RRI_pat <S2_asr_i_r_nac, int_hexagon_S2_asr_i_r_nac>;
+def : T_RRI_pat <S2_lsr_i_r_nac, int_hexagon_S2_lsr_i_r_nac>;
+def : T_RRI_pat <S2_asl_i_r_nac, int_hexagon_S2_asl_i_r_nac>;
+def : T_RRI_pat <S2_asr_i_r_acc, int_hexagon_S2_asr_i_r_acc>;
+def : T_RRI_pat <S2_lsr_i_r_acc, int_hexagon_S2_lsr_i_r_acc>;
+def : T_RRI_pat <S2_asl_i_r_acc, int_hexagon_S2_asl_i_r_acc>;
+
+def : T_RRI_pat <S2_asr_i_r_and, int_hexagon_S2_asr_i_r_and>;
+def : T_RRI_pat <S2_lsr_i_r_and, int_hexagon_S2_lsr_i_r_and>;
+def : T_RRI_pat <S2_asl_i_r_and, int_hexagon_S2_asl_i_r_and>;
+def : T_RRI_pat <S2_asr_i_r_or, int_hexagon_S2_asr_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_or, int_hexagon_S2_lsr_i_r_or>;
+def : T_RRI_pat <S2_asl_i_r_or, int_hexagon_S2_asl_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>;
+def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>;
+
+def : T_PPI_pat <S2_asr_i_p_nac, int_hexagon_S2_asr_i_p_nac>;
+def : T_PPI_pat <S2_lsr_i_p_nac, int_hexagon_S2_lsr_i_p_nac>;
+def : T_PPI_pat <S2_asl_i_p_nac, int_hexagon_S2_asl_i_p_nac>;
+def : T_PPI_pat <S2_asr_i_p_acc, int_hexagon_S2_asr_i_p_acc>;
+def : T_PPI_pat <S2_lsr_i_p_acc, int_hexagon_S2_lsr_i_p_acc>;
+def : T_PPI_pat <S2_asl_i_p_acc, int_hexagon_S2_asl_i_p_acc>;
+
+def : T_PPI_pat <S2_asr_i_p_and, int_hexagon_S2_asr_i_p_and>;
+def : T_PPI_pat <S2_lsr_i_p_and, int_hexagon_S2_lsr_i_p_and>;
+def : T_PPI_pat <S2_asl_i_p_and, int_hexagon_S2_asl_i_p_and>;
+def : T_PPI_pat <S2_asr_i_p_or, int_hexagon_S2_asr_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_or, int_hexagon_S2_lsr_i_p_or>;
+def : T_PPI_pat <S2_asl_i_p_or, int_hexagon_S2_asl_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>;
+def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>;
+
+def : T_RRR_pat <S2_asr_r_r_nac, int_hexagon_S2_asr_r_r_nac>;
+def : T_RRR_pat <S2_lsr_r_r_nac, int_hexagon_S2_lsr_r_r_nac>;
+def : T_RRR_pat <S2_asl_r_r_nac, int_hexagon_S2_asl_r_r_nac>;
+def : T_RRR_pat <S2_lsl_r_r_nac, int_hexagon_S2_lsl_r_r_nac>;
+def : T_RRR_pat <S2_asr_r_r_acc, int_hexagon_S2_asr_r_r_acc>;
+def : T_RRR_pat <S2_lsr_r_r_acc, int_hexagon_S2_lsr_r_r_acc>;
+def : T_RRR_pat <S2_asl_r_r_acc, int_hexagon_S2_asl_r_r_acc>;
+def : T_RRR_pat <S2_lsl_r_r_acc, int_hexagon_S2_lsl_r_r_acc>;
+
+def : T_RRR_pat <S2_asr_r_r_and, int_hexagon_S2_asr_r_r_and>;
+def : T_RRR_pat <S2_lsr_r_r_and, int_hexagon_S2_lsr_r_r_and>;
+def : T_RRR_pat <S2_asl_r_r_and, int_hexagon_S2_asl_r_r_and>;
+def : T_RRR_pat <S2_lsl_r_r_and, int_hexagon_S2_lsl_r_r_and>;
+def : T_RRR_pat <S2_asr_r_r_or, int_hexagon_S2_asr_r_r_or>;
+def : T_RRR_pat <S2_lsr_r_r_or, int_hexagon_S2_lsr_r_r_or>;
+def : T_RRR_pat <S2_asl_r_r_or, int_hexagon_S2_asl_r_r_or>;
+def : T_RRR_pat <S2_lsl_r_r_or, int_hexagon_S2_lsl_r_r_or>;
+
+def : T_PPR_pat <S2_asr_r_p_nac, int_hexagon_S2_asr_r_p_nac>;
+def : T_PPR_pat <S2_lsr_r_p_nac, int_hexagon_S2_lsr_r_p_nac>;
+def : T_PPR_pat <S2_asl_r_p_nac, int_hexagon_S2_asl_r_p_nac>;
+def : T_PPR_pat <S2_lsl_r_p_nac, int_hexagon_S2_lsl_r_p_nac>;
+def : T_PPR_pat <S2_asr_r_p_acc, int_hexagon_S2_asr_r_p_acc>;
+def : T_PPR_pat <S2_lsr_r_p_acc, int_hexagon_S2_lsr_r_p_acc>;
+def : T_PPR_pat <S2_asl_r_p_acc, int_hexagon_S2_asl_r_p_acc>;
+def : T_PPR_pat <S2_lsl_r_p_acc, int_hexagon_S2_lsl_r_p_acc>;
+
+def : T_PPR_pat <S2_asr_r_p_and, int_hexagon_S2_asr_r_p_and>;
+def : T_PPR_pat <S2_lsr_r_p_and, int_hexagon_S2_lsr_r_p_and>;
+def : T_PPR_pat <S2_asl_r_p_and, int_hexagon_S2_asl_r_p_and>;
+def : T_PPR_pat <S2_lsl_r_p_and, int_hexagon_S2_lsl_r_p_and>;
+def : T_PPR_pat <S2_asr_r_p_or, int_hexagon_S2_asr_r_p_or>;
+def : T_PPR_pat <S2_lsr_r_p_or, int_hexagon_S2_lsr_r_p_or>;
+def : T_PPR_pat <S2_asl_r_p_or, int_hexagon_S2_asl_r_p_or>;
+def : T_PPR_pat <S2_lsl_r_p_or, int_hexagon_S2_lsl_r_p_or>;
+
+def : T_RRI_pat <S2_asr_i_r_nac, int_hexagon_S2_asr_i_r_nac>;
+def : T_RRI_pat <S2_lsr_i_r_nac, int_hexagon_S2_lsr_i_r_nac>;
+def : T_RRI_pat <S2_asl_i_r_nac, int_hexagon_S2_asl_i_r_nac>;
+def : T_RRI_pat <S2_asr_i_r_acc, int_hexagon_S2_asr_i_r_acc>;
+def : T_RRI_pat <S2_lsr_i_r_acc, int_hexagon_S2_lsr_i_r_acc>;
+def : T_RRI_pat <S2_asl_i_r_acc, int_hexagon_S2_asl_i_r_acc>;
+
+def : T_RRI_pat <S2_asr_i_r_and, int_hexagon_S2_asr_i_r_and>;
+def : T_RRI_pat <S2_lsr_i_r_and, int_hexagon_S2_lsr_i_r_and>;
+def : T_RRI_pat <S2_asl_i_r_and, int_hexagon_S2_asl_i_r_and>;
+def : T_RRI_pat <S2_asr_i_r_or, int_hexagon_S2_asr_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_or, int_hexagon_S2_lsr_i_r_or>;
+def : T_RRI_pat <S2_asl_i_r_or, int_hexagon_S2_asl_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>;
+def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>;
+
+def : T_PPI_pat <S2_asr_i_p_nac, int_hexagon_S2_asr_i_p_nac>;
+def : T_PPI_pat <S2_lsr_i_p_nac, int_hexagon_S2_lsr_i_p_nac>;
+def : T_PPI_pat <S2_asl_i_p_nac, int_hexagon_S2_asl_i_p_nac>;
+def : T_PPI_pat <S2_asr_i_p_acc, int_hexagon_S2_asr_i_p_acc>;
+def : T_PPI_pat <S2_lsr_i_p_acc, int_hexagon_S2_lsr_i_p_acc>;
+def : T_PPI_pat <S2_asl_i_p_acc, int_hexagon_S2_asl_i_p_acc>;
+
+def : T_PPI_pat <S2_asr_i_p_and, int_hexagon_S2_asr_i_p_and>;
+def : T_PPI_pat <S2_lsr_i_p_and, int_hexagon_S2_lsr_i_p_and>;
+def : T_PPI_pat <S2_asl_i_p_and, int_hexagon_S2_asl_i_p_and>;
+def : T_PPI_pat <S2_asr_i_p_or, int_hexagon_S2_asr_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_or, int_hexagon_S2_lsr_i_p_or>;
+def : T_PPI_pat <S2_asl_i_p_or, int_hexagon_S2_asl_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>;
+def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>;
+
+def : T_RRR_pat <S2_asr_r_r_nac, int_hexagon_S2_asr_r_r_nac>;
+def : T_RRR_pat <S2_lsr_r_r_nac, int_hexagon_S2_lsr_r_r_nac>;
+def : T_RRR_pat <S2_asl_r_r_nac, int_hexagon_S2_asl_r_r_nac>;
+def : T_RRR_pat <S2_lsl_r_r_nac, int_hexagon_S2_lsl_r_r_nac>;
+def : T_RRR_pat <S2_asr_r_r_acc, int_hexagon_S2_asr_r_r_acc>;
+def : T_RRR_pat <S2_lsr_r_r_acc, int_hexagon_S2_lsr_r_r_acc>;
+def : T_RRR_pat <S2_asl_r_r_acc, int_hexagon_S2_asl_r_r_acc>;
+def : T_RRR_pat <S2_lsl_r_r_acc, int_hexagon_S2_lsl_r_r_acc>;
+
+def : T_RRR_pat <S2_asr_r_r_and, int_hexagon_S2_asr_r_r_and>;
+def : T_RRR_pat <S2_lsr_r_r_and, int_hexagon_S2_lsr_r_r_and>;
+def : T_RRR_pat <S2_asl_r_r_and, int_hexagon_S2_asl_r_r_and>;
+def : T_RRR_pat <S2_lsl_r_r_and, int_hexagon_S2_lsl_r_r_and>;
+def : T_RRR_pat <S2_asr_r_r_or, int_hexagon_S2_asr_r_r_or>;
+def : T_RRR_pat <S2_lsr_r_r_or, int_hexagon_S2_lsr_r_r_or>;
+def : T_RRR_pat <S2_asl_r_r_or, int_hexagon_S2_asl_r_r_or>;
+def : T_RRR_pat <S2_lsl_r_r_or, int_hexagon_S2_lsl_r_r_or>;
+
+def : T_PPR_pat <S2_asr_r_p_nac, int_hexagon_S2_asr_r_p_nac>;
+def : T_PPR_pat <S2_lsr_r_p_nac, int_hexagon_S2_lsr_r_p_nac>;
+def : T_PPR_pat <S2_asl_r_p_nac, int_hexagon_S2_asl_r_p_nac>;
+def : T_PPR_pat <S2_lsl_r_p_nac, int_hexagon_S2_lsl_r_p_nac>;
+def : T_PPR_pat <S2_asr_r_p_acc, int_hexagon_S2_asr_r_p_acc>;
+def : T_PPR_pat <S2_lsr_r_p_acc, int_hexagon_S2_lsr_r_p_acc>;
+def : T_PPR_pat <S2_asl_r_p_acc, int_hexagon_S2_asl_r_p_acc>;
+def : T_PPR_pat <S2_lsl_r_p_acc, int_hexagon_S2_lsl_r_p_acc>;
+
+def : T_PPR_pat <S2_asr_r_p_and, int_hexagon_S2_asr_r_p_and>;
+def : T_PPR_pat <S2_lsr_r_p_and, int_hexagon_S2_lsr_r_p_and>;
+def : T_PPR_pat <S2_asl_r_p_and, int_hexagon_S2_asl_r_p_and>;
+def : T_PPR_pat <S2_lsl_r_p_and, int_hexagon_S2_lsl_r_p_and>;
+def : T_PPR_pat <S2_asr_r_p_or, int_hexagon_S2_asr_r_p_or>;
+def : T_PPR_pat <S2_lsr_r_p_or, int_hexagon_S2_lsr_r_p_or>;
+def : T_PPR_pat <S2_asl_r_p_or, int_hexagon_S2_asl_r_p_or>;
+def : T_PPR_pat <S2_lsl_r_p_or, int_hexagon_S2_lsl_r_p_or>;
/********************************************************************
-* MTYPE/ALU *
+* ALU32/ALU *
*********************************************************************/
+def : T_RR_pat<A2_add, int_hexagon_A2_add>;
+def : T_RI_pat<A2_addi, int_hexagon_A2_addi>;
+def : T_RR_pat<A2_sub, int_hexagon_A2_sub>;
+def : T_IR_pat<A2_subri, int_hexagon_A2_subri>;
+def : T_RR_pat<A2_and, int_hexagon_A2_and>;
+def : T_RI_pat<A2_andir, int_hexagon_A2_andir>;
+def : T_RR_pat<A2_or, int_hexagon_A2_or>;
+def : T_RI_pat<A2_orir, int_hexagon_A2_orir>;
+def : T_RR_pat<A2_xor, int_hexagon_A2_xor>;
+def : T_RR_pat<A2_combinew, int_hexagon_A2_combinew>;
+
+// Assembler mapped from Rd32=not(Rs32) to Rd32=sub(#-1,Rs32)
+def : Pat <(int_hexagon_A2_not (I32:$Rs)),
+ (A2_subri -1, IntRegs:$Rs)>;
+
+// Assembler mapped from Rd32=neg(Rs32) to Rd32=sub(#0,Rs32)
+def : Pat <(int_hexagon_A2_neg IntRegs:$Rs),
+ (A2_subri 0, IntRegs:$Rs)>;
+
+// Transfer immediate
+def : Pat <(int_hexagon_A2_tfril (I32:$Rs), u16_0ImmPred:$Is),
+ (A2_tfril IntRegs:$Rs, u16_0ImmPred:$Is)>;
+def : Pat <(int_hexagon_A2_tfrih (I32:$Rs), u16_0ImmPred:$Is),
+ (A2_tfrih IntRegs:$Rs, u16_0ImmPred:$Is)>;
+
+// Transfer Register/immediate.
+def : T_R_pat <A2_tfr, int_hexagon_A2_tfr>;
+def : T_I_pat <A2_tfrsi, int_hexagon_A2_tfrsi>;
+
+// Assembler mapped from Rdd32=Rss32 to Rdd32=combine(Rss.H32,Rss.L32)
+def : Pat<(int_hexagon_A2_tfrp DoubleRegs:$src),
+ (A2_combinew (HiReg DoubleRegs:$src), (LoReg DoubleRegs:$src))>;
-// MTYPE / ALU / Add and accumulate.
-def HEXAGON_M2_acci:
- si_MInst_sisisi_acc <"add", int_hexagon_M2_acci>;
-def HEXAGON_M2_accii:
- si_MInst_sisis8_acc <"add", int_hexagon_M2_accii>;
-def HEXAGON_M2_nacci:
- si_MInst_sisisi_nac <"add", int_hexagon_M2_nacci>;
-def HEXAGON_M2_naccii:
- si_MInst_sisis8_nac <"add", int_hexagon_M2_naccii>;
+/********************************************************************
+* ALU32/PERM *
+*********************************************************************/
+// Combine
+def: T_RR_pat<A2_combine_hh, int_hexagon_A2_combine_hh>;
+def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>;
+def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>;
+def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>;
+
+def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>;
-// MTYPE / ALU / Subtract and accumulate.
-def HEXAGON_M2_subacc:
- si_MInst_sisisi_acc <"sub", int_hexagon_M2_subacc>;
+def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))),
+ (i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>;
-// MTYPE / ALU / Vector absolute difference.
-def HEXAGON_M2_vabsdiffh:
- di_MInst_didi <"vabsdiffh",int_hexagon_M2_vabsdiffh>;
-def HEXAGON_M2_vabsdiffw:
- di_MInst_didi <"vabsdiffw",int_hexagon_M2_vabsdiffw>;
+// Mux
+def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>;
+def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>;
+def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>;
-// MTYPE / ALU / XOR and xor with destination.
-def HEXAGON_M2_xor_xacc:
- si_MInst_sisisi_xacc <"xor", int_hexagon_M2_xor_xacc>;
+// Shift halfword
+def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
+def : T_R_pat<A2_asrh, int_hexagon_A2_asrh>;
+def : T_R_pat<A2_asrh, int_hexagon_SI_to_SXTHI_asrh>;
+// Sign/zero extend
+def : T_R_pat<A2_sxth, int_hexagon_A2_sxth>;
+def : T_R_pat<A2_sxtb, int_hexagon_A2_sxtb>;
+def : T_R_pat<A2_zxth, int_hexagon_A2_zxth>;
+def : T_R_pat<A2_zxtb, int_hexagon_A2_zxtb>;
/********************************************************************
-* MTYPE/COMPLEX *
+* ALU32/PRED *
*********************************************************************/
+// Compare
+def : T_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>;
+def : T_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>;
+def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>;
-// MTYPE / COMPLEX / Complex multiply.
-// Rdd[-+]=cmpy(Rs, Rt:<<1]:sat
-def HEXAGON_M2_cmpys_s1:
- di_MInst_sisi_s1_sat <"cmpy", int_hexagon_M2_cmpys_s1>;
-def HEXAGON_M2_cmpys_s0:
- di_MInst_sisi_sat <"cmpy", int_hexagon_M2_cmpys_s0>;
-def HEXAGON_M2_cmpysc_s1:
- di_MInst_sisi_s1_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s1>;
-def HEXAGON_M2_cmpysc_s0:
- di_MInst_sisi_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s0>;
-
-def HEXAGON_M2_cmacs_s1:
- di_MInst_disisi_acc_s1_sat <"cmpy", int_hexagon_M2_cmacs_s1>;
-def HEXAGON_M2_cmacs_s0:
- di_MInst_disisi_acc_sat <"cmpy", int_hexagon_M2_cmacs_s0>;
-def HEXAGON_M2_cmacsc_s1:
- di_MInst_disisi_acc_s1_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s1>;
-def HEXAGON_M2_cmacsc_s0:
- di_MInst_disisi_acc_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s0>;
-
-def HEXAGON_M2_cnacs_s1:
- di_MInst_disisi_nac_s1_sat <"cmpy", int_hexagon_M2_cnacs_s1>;
-def HEXAGON_M2_cnacs_s0:
- di_MInst_disisi_nac_sat <"cmpy", int_hexagon_M2_cnacs_s0>;
-def HEXAGON_M2_cnacsc_s1:
- di_MInst_disisi_nac_s1_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s1>;
-def HEXAGON_M2_cnacsc_s0:
- di_MInst_disisi_nac_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s0>;
-
-// MTYPE / COMPLEX / Complex multiply real or imaginary.
-def HEXAGON_M2_cmpyr_s0:
- di_MInst_sisi <"cmpyr", int_hexagon_M2_cmpyr_s0>;
-def HEXAGON_M2_cmacr_s0:
- di_MInst_disisi_acc <"cmpyr", int_hexagon_M2_cmacr_s0>;
-
-def HEXAGON_M2_cmpyi_s0:
- di_MInst_sisi <"cmpyi", int_hexagon_M2_cmpyi_s0>;
-def HEXAGON_M2_cmaci_s0:
- di_MInst_disisi_acc <"cmpyi", int_hexagon_M2_cmaci_s0>;
-
-// MTYPE / COMPLEX / Complex multiply with round and pack.
-// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
-def HEXAGON_M2_cmpyrs_s0:
- si_MInst_sisi_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s0>;
-def HEXAGON_M2_cmpyrs_s1:
- si_MInst_sisi_s1_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s1>;
-
-def HEXAGON_M2_cmpyrsc_s0:
- si_MInst_sisi_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s0>;
-def HEXAGON_M2_cmpyrsc_s1:
- si_MInst_sisi_s1_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s1>;
-
-//MTYPE / COMPLEX / Vector complex multiply real or imaginary.
-def HEXAGON_M2_vcmpy_s0_sat_i:
- di_MInst_didi_sat <"vcmpyi", int_hexagon_M2_vcmpy_s0_sat_i>;
-def HEXAGON_M2_vcmpy_s1_sat_i:
- di_MInst_didi_s1_sat <"vcmpyi", int_hexagon_M2_vcmpy_s1_sat_i>;
-
-def HEXAGON_M2_vcmpy_s0_sat_r:
- di_MInst_didi_sat <"vcmpyr", int_hexagon_M2_vcmpy_s0_sat_r>;
-def HEXAGON_M2_vcmpy_s1_sat_r:
- di_MInst_didi_s1_sat <"vcmpyr", int_hexagon_M2_vcmpy_s1_sat_r>;
-
-def HEXAGON_M2_vcmac_s0_sat_i:
- di_MInst_dididi_acc_sat <"vcmpyi", int_hexagon_M2_vcmac_s0_sat_i>;
-def HEXAGON_M2_vcmac_s0_sat_r:
- di_MInst_dididi_acc_sat <"vcmpyr", int_hexagon_M2_vcmac_s0_sat_r>;
-
-//MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
-def HEXAGON_M2_vrcmpyi_s0:
- di_MInst_didi <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0>;
-def HEXAGON_M2_vrcmpyr_s0:
- di_MInst_didi <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0>;
-
-def HEXAGON_M2_vrcmpyi_s0c:
- di_MInst_didi_conj <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0c>;
-def HEXAGON_M2_vrcmpyr_s0c:
- di_MInst_didi_conj <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0c>;
-
-def HEXAGON_M2_vrcmaci_s0:
- di_MInst_dididi_acc <"vrcmpyi", int_hexagon_M2_vrcmaci_s0>;
-def HEXAGON_M2_vrcmacr_s0:
- di_MInst_dididi_acc <"vrcmpyr", int_hexagon_M2_vrcmacr_s0>;
-
-def HEXAGON_M2_vrcmaci_s0c:
- di_MInst_dididi_acc_conj <"vrcmpyi", int_hexagon_M2_vrcmaci_s0c>;
-def HEXAGON_M2_vrcmacr_s0c:
- di_MInst_dididi_acc_conj <"vrcmpyr", int_hexagon_M2_vrcmacr_s0c>;
+def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>;
+def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>;
+def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>;
+def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)),
+ (i32 (C2_cmpgti (I32:$src1),
+ (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
-/********************************************************************
-* MTYPE/MPYH *
-*********************************************************************/
+def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)),
+ (i32 (C2_cmpgtui (I32:$src1),
+ (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>;
+
+// The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0.
+def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)),
+ (i32 (C2_cmpeq (I32:$src1), (I32:$src1)))>;
-// MTYPE / MPYH / Multiply and use lower result.
-//def HEXAGON_M2_mpysmi:
-//FIXME: Hexagon_M2_mpysmi should really by of the type si_MInst_sim9,
-// not si_MInst_sis9 - but for now, we will use s9.
-// def Hexagon_M2_mpysmi:
-// si_MInst_sim9 <"mpyi", int_hexagon_M2_mpysmi>;
-def Hexagon_M2_mpysmi:
- si_MInst_sis9 <"mpyi", int_hexagon_M2_mpysmi>;
-def HEXAGON_M2_mpyi:
- si_MInst_sisi <"mpyi", int_hexagon_M2_mpyi>;
-def HEXAGON_M2_mpyui:
- si_MInst_sisi <"mpyui", int_hexagon_M2_mpyui>;
-def HEXAGON_M2_macsip:
- si_MInst_sisiu8_acc <"mpyi", int_hexagon_M2_macsip>;
-def HEXAGON_M2_maci:
- si_MInst_sisisi_acc <"mpyi", int_hexagon_M2_maci>;
-def HEXAGON_M2_macsin:
- si_MInst_sisiu8_nac <"mpyi", int_hexagon_M2_macsin>;
-
-// MTYPE / MPYH / Multiply word by half (32x16).
-//Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat]
-//Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat]
-def HEXAGON_M2_mmpyl_rs1:
- di_MInst_didi_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs1>;
-def HEXAGON_M2_mmpyl_s1:
- di_MInst_didi_s1_sat <"vmpyweh", int_hexagon_M2_mmpyl_s1>;
-def HEXAGON_M2_mmpyl_rs0:
- di_MInst_didi_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs0>;
-def HEXAGON_M2_mmpyl_s0:
- di_MInst_didi_sat <"vmpyweh", int_hexagon_M2_mmpyl_s0>;
-def HEXAGON_M2_mmpyh_rs1:
- di_MInst_didi_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs1>;
-def HEXAGON_M2_mmpyh_s1:
- di_MInst_didi_s1_sat <"vmpywoh", int_hexagon_M2_mmpyh_s1>;
-def HEXAGON_M2_mmpyh_rs0:
- di_MInst_didi_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs0>;
-def HEXAGON_M2_mmpyh_s0:
- di_MInst_didi_sat <"vmpywoh", int_hexagon_M2_mmpyh_s0>;
-def HEXAGON_M2_mmacls_rs1:
- di_MInst_dididi_acc_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs1>;
-def HEXAGON_M2_mmacls_s1:
- di_MInst_dididi_acc_s1_sat <"vmpyweh", int_hexagon_M2_mmacls_s1>;
-def HEXAGON_M2_mmacls_rs0:
- di_MInst_dididi_acc_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs0>;
-def HEXAGON_M2_mmacls_s0:
- di_MInst_dididi_acc_sat <"vmpyweh", int_hexagon_M2_mmacls_s0>;
-def HEXAGON_M2_mmachs_rs1:
- di_MInst_dididi_acc_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs1>;
-def HEXAGON_M2_mmachs_s1:
- di_MInst_dididi_acc_s1_sat <"vmpywoh", int_hexagon_M2_mmachs_s1>;
-def HEXAGON_M2_mmachs_rs0:
- di_MInst_dididi_acc_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs0>;
-def HEXAGON_M2_mmachs_s0:
- di_MInst_dididi_acc_sat <"vmpywoh", int_hexagon_M2_mmachs_s0>;
-
-// MTYPE / MPYH / Multiply word by unsigned half (32x16).
-//Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat]
-//Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat]
-def HEXAGON_M2_mmpyul_rs1:
- di_MInst_didi_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>;
-def HEXAGON_M2_mmpyul_s1:
- di_MInst_didi_s1_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s1>;
-def HEXAGON_M2_mmpyul_rs0:
- di_MInst_didi_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>;
-def HEXAGON_M2_mmpyul_s0:
- di_MInst_didi_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s0>;
-def HEXAGON_M2_mmpyuh_rs1:
- di_MInst_didi_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>;
-def HEXAGON_M2_mmpyuh_s1:
- di_MInst_didi_s1_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s1>;
-def HEXAGON_M2_mmpyuh_rs0:
- di_MInst_didi_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>;
-def HEXAGON_M2_mmpyuh_s0:
- di_MInst_didi_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s0>;
-def HEXAGON_M2_mmaculs_rs1:
- di_MInst_dididi_acc_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>;
-def HEXAGON_M2_mmaculs_s1:
- di_MInst_dididi_acc_s1_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s1>;
-def HEXAGON_M2_mmaculs_rs0:
- di_MInst_dididi_acc_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>;
-def HEXAGON_M2_mmaculs_s0:
- di_MInst_dididi_acc_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s0>;
-def HEXAGON_M2_mmacuhs_rs1:
- di_MInst_dididi_acc_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>;
-def HEXAGON_M2_mmacuhs_s1:
- di_MInst_dididi_acc_s1_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s1>;
-def HEXAGON_M2_mmacuhs_rs0:
- di_MInst_dididi_acc_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>;
-def HEXAGON_M2_mmacuhs_s0:
- di_MInst_dididi_acc_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s0>;
-
-// MTYPE / MPYH / Multiply and use upper result.
-def HEXAGON_M2_hmmpyh_rs1:
- si_MInst_sisi_h_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyh_rs1>;
-def HEXAGON_M2_hmmpyl_rs1:
- si_MInst_sisi_l_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyl_rs1>;
-def HEXAGON_M2_mpy_up:
- si_MInst_sisi <"mpy", int_hexagon_M2_mpy_up>;
-def HEXAGON_M2_dpmpyss_rnd_s0:
- si_MInst_sisi_rnd <"mpy", int_hexagon_M2_dpmpyss_rnd_s0>;
-def HEXAGON_M2_mpyu_up:
- si_MInst_sisi <"mpyu", int_hexagon_M2_mpyu_up>;
-
-// MTYPE / MPYH / Multiply and use full result.
-def HEXAGON_M2_dpmpyuu_s0:
- di_MInst_sisi <"mpyu", int_hexagon_M2_dpmpyuu_s0>;
-def HEXAGON_M2_dpmpyuu_acc_s0:
- di_MInst_disisi_acc <"mpyu", int_hexagon_M2_dpmpyuu_acc_s0>;
-def HEXAGON_M2_dpmpyuu_nac_s0:
- di_MInst_disisi_nac <"mpyu", int_hexagon_M2_dpmpyuu_nac_s0>;
-def HEXAGON_M2_dpmpyss_s0:
- di_MInst_sisi <"mpy", int_hexagon_M2_dpmpyss_s0>;
-def HEXAGON_M2_dpmpyss_acc_s0:
- di_MInst_disisi_acc <"mpy", int_hexagon_M2_dpmpyss_acc_s0>;
-def HEXAGON_M2_dpmpyss_nac_s0:
- di_MInst_disisi_nac <"mpy", int_hexagon_M2_dpmpyss_nac_s0>;
+def : Pat <(i32 (int_hexagon_C2_cmplt (I32:$src1),
+ (I32:$src2))),
+ (i32 (C2_cmpgt (I32:$src2), (I32:$src1)))>;
+def : Pat <(i32 (int_hexagon_C2_cmpltu (I32:$src1),
+ (I32:$src2))),
+ (i32 (C2_cmpgtu (I32:$src2), (I32:$src1)))>;
/********************************************************************
-* MTYPE/MPYS *
+* ALU32/VH *
*********************************************************************/
+// Vector add, subtract, average halfwords
+def: T_RR_pat<A2_svaddh, int_hexagon_A2_svaddh>;
+def: T_RR_pat<A2_svaddhs, int_hexagon_A2_svaddhs>;
+def: T_RR_pat<A2_svadduhs, int_hexagon_A2_svadduhs>;
-// MTYPE / MPYS / Scalar 16x16 multiply signed.
-//Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]|
-// [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]]
-def HEXAGON_M2_mpy_hh_s0:
- si_MInst_sisi_hh <"mpy", int_hexagon_M2_mpy_hh_s0>;
-def HEXAGON_M2_mpy_hh_s1:
- si_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpy_hh_s1>;
-def HEXAGON_M2_mpy_rnd_hh_s1:
- si_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_rnd_hh_s1>;
-def HEXAGON_M2_mpy_sat_rnd_hh_s1:
- si_MInst_sisi_sat_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s1>;
-def HEXAGON_M2_mpy_sat_hh_s1:
- si_MInst_sisi_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_hh_s1>;
-def HEXAGON_M2_mpy_rnd_hh_s0:
- si_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpy_rnd_hh_s0>;
-def HEXAGON_M2_mpy_sat_rnd_hh_s0:
- si_MInst_sisi_sat_rnd_hh <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s0>;
-def HEXAGON_M2_mpy_sat_hh_s0:
- si_MInst_sisi_sat_hh <"mpy", int_hexagon_M2_mpy_sat_hh_s0>;
-
-def HEXAGON_M2_mpy_hl_s0:
- si_MInst_sisi_hl <"mpy", int_hexagon_M2_mpy_hl_s0>;
-def HEXAGON_M2_mpy_hl_s1:
- si_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpy_hl_s1>;
-def HEXAGON_M2_mpy_rnd_hl_s1:
- si_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_rnd_hl_s1>;
-def HEXAGON_M2_mpy_sat_rnd_hl_s1:
- si_MInst_sisi_sat_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s1>;
-def HEXAGON_M2_mpy_sat_hl_s1:
- si_MInst_sisi_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_hl_s1>;
-def HEXAGON_M2_mpy_rnd_hl_s0:
- si_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpy_rnd_hl_s0>;
-def HEXAGON_M2_mpy_sat_rnd_hl_s0:
- si_MInst_sisi_sat_rnd_hl <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s0>;
-def HEXAGON_M2_mpy_sat_hl_s0:
- si_MInst_sisi_sat_hl <"mpy", int_hexagon_M2_mpy_sat_hl_s0>;
-
-def HEXAGON_M2_mpy_lh_s0:
- si_MInst_sisi_lh <"mpy", int_hexagon_M2_mpy_lh_s0>;
-def HEXAGON_M2_mpy_lh_s1:
- si_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpy_lh_s1>;
-def HEXAGON_M2_mpy_rnd_lh_s1:
- si_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_rnd_lh_s1>;
-def HEXAGON_M2_mpy_sat_rnd_lh_s1:
- si_MInst_sisi_sat_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s1>;
-def HEXAGON_M2_mpy_sat_lh_s1:
- si_MInst_sisi_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_lh_s1>;
-def HEXAGON_M2_mpy_rnd_lh_s0:
- si_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpy_rnd_lh_s0>;
-def HEXAGON_M2_mpy_sat_rnd_lh_s0:
- si_MInst_sisi_sat_rnd_lh <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s0>;
-def HEXAGON_M2_mpy_sat_lh_s0:
- si_MInst_sisi_sat_lh <"mpy", int_hexagon_M2_mpy_sat_lh_s0>;
-
-def HEXAGON_M2_mpy_ll_s0:
- si_MInst_sisi_ll <"mpy", int_hexagon_M2_mpy_ll_s0>;
-def HEXAGON_M2_mpy_ll_s1:
- si_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpy_ll_s1>;
-def HEXAGON_M2_mpy_rnd_ll_s1:
- si_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_rnd_ll_s1>;
-def HEXAGON_M2_mpy_sat_rnd_ll_s1:
- si_MInst_sisi_sat_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s1>;
-def HEXAGON_M2_mpy_sat_ll_s1:
- si_MInst_sisi_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_ll_s1>;
-def HEXAGON_M2_mpy_rnd_ll_s0:
- si_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpy_rnd_ll_s0>;
-def HEXAGON_M2_mpy_sat_rnd_ll_s0:
- si_MInst_sisi_sat_rnd_ll <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s0>;
-def HEXAGON_M2_mpy_sat_ll_s0:
- si_MInst_sisi_sat_ll <"mpy", int_hexagon_M2_mpy_sat_ll_s0>;
-
-//Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]]
-def HEXAGON_M2_mpyd_hh_s0:
- di_MInst_sisi_hh <"mpy", int_hexagon_M2_mpyd_hh_s0>;
-def HEXAGON_M2_mpyd_hh_s1:
- di_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpyd_hh_s1>;
-def HEXAGON_M2_mpyd_rnd_hh_s1:
- di_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hh_s1>;
-def HEXAGON_M2_mpyd_rnd_hh_s0:
- di_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpyd_rnd_hh_s0>;
-
-def HEXAGON_M2_mpyd_hl_s0:
- di_MInst_sisi_hl <"mpy", int_hexagon_M2_mpyd_hl_s0>;
-def HEXAGON_M2_mpyd_hl_s1:
- di_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpyd_hl_s1>;
-def HEXAGON_M2_mpyd_rnd_hl_s1:
- di_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hl_s1>;
-def HEXAGON_M2_mpyd_rnd_hl_s0:
- di_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpyd_rnd_hl_s0>;
-
-def HEXAGON_M2_mpyd_lh_s0:
- di_MInst_sisi_lh <"mpy", int_hexagon_M2_mpyd_lh_s0>;
-def HEXAGON_M2_mpyd_lh_s1:
- di_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpyd_lh_s1>;
-def HEXAGON_M2_mpyd_rnd_lh_s1:
- di_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_lh_s1>;
-def HEXAGON_M2_mpyd_rnd_lh_s0:
- di_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpyd_rnd_lh_s0>;
-
-def HEXAGON_M2_mpyd_ll_s0:
- di_MInst_sisi_ll <"mpy", int_hexagon_M2_mpyd_ll_s0>;
-def HEXAGON_M2_mpyd_ll_s1:
- di_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpyd_ll_s1>;
-def HEXAGON_M2_mpyd_rnd_ll_s1:
- di_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpyd_rnd_ll_s1>;
-def HEXAGON_M2_mpyd_rnd_ll_s0:
- di_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpyd_rnd_ll_s0>;
-
-//Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
-def HEXAGON_M2_mpy_acc_hh_s0:
- si_MInst_sisisi_acc_hh <"mpy", int_hexagon_M2_mpy_acc_hh_s0>;
-def HEXAGON_M2_mpy_acc_hh_s1:
- si_MInst_sisisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_hh_s1>;
-def HEXAGON_M2_mpy_acc_sat_hh_s1:
- si_MInst_sisisi_acc_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s1>;
-def HEXAGON_M2_mpy_acc_sat_hh_s0:
- si_MInst_sisisi_acc_sat_hh <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s0>;
-
-def HEXAGON_M2_mpy_acc_hl_s0:
- si_MInst_sisisi_acc_hl <"mpy", int_hexagon_M2_mpy_acc_hl_s0>;
-def HEXAGON_M2_mpy_acc_hl_s1:
- si_MInst_sisisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_hl_s1>;
-def HEXAGON_M2_mpy_acc_sat_hl_s1:
- si_MInst_sisisi_acc_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s1>;
-def HEXAGON_M2_mpy_acc_sat_hl_s0:
- si_MInst_sisisi_acc_sat_hl <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s0>;
-
-def HEXAGON_M2_mpy_acc_lh_s0:
- si_MInst_sisisi_acc_lh <"mpy", int_hexagon_M2_mpy_acc_lh_s0>;
-def HEXAGON_M2_mpy_acc_lh_s1:
- si_MInst_sisisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_lh_s1>;
-def HEXAGON_M2_mpy_acc_sat_lh_s1:
- si_MInst_sisisi_acc_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s1>;
-def HEXAGON_M2_mpy_acc_sat_lh_s0:
- si_MInst_sisisi_acc_sat_lh <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s0>;
-
-def HEXAGON_M2_mpy_acc_ll_s0:
- si_MInst_sisisi_acc_ll <"mpy", int_hexagon_M2_mpy_acc_ll_s0>;
-def HEXAGON_M2_mpy_acc_ll_s1:
- si_MInst_sisisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_ll_s1>;
-def HEXAGON_M2_mpy_acc_sat_ll_s1:
- si_MInst_sisisi_acc_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s1>;
-def HEXAGON_M2_mpy_acc_sat_ll_s0:
- si_MInst_sisisi_acc_sat_ll <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s0>;
-
-//Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
-def HEXAGON_M2_mpy_nac_hh_s0:
- si_MInst_sisisi_nac_hh <"mpy", int_hexagon_M2_mpy_nac_hh_s0>;
-def HEXAGON_M2_mpy_nac_hh_s1:
- si_MInst_sisisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_hh_s1>;
-def HEXAGON_M2_mpy_nac_sat_hh_s1:
- si_MInst_sisisi_nac_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s1>;
-def HEXAGON_M2_mpy_nac_sat_hh_s0:
- si_MInst_sisisi_nac_sat_hh <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s0>;
-
-def HEXAGON_M2_mpy_nac_hl_s0:
- si_MInst_sisisi_nac_hl <"mpy", int_hexagon_M2_mpy_nac_hl_s0>;
-def HEXAGON_M2_mpy_nac_hl_s1:
- si_MInst_sisisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_hl_s1>;
-def HEXAGON_M2_mpy_nac_sat_hl_s1:
- si_MInst_sisisi_nac_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s1>;
-def HEXAGON_M2_mpy_nac_sat_hl_s0:
- si_MInst_sisisi_nac_sat_hl <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s0>;
-
-def HEXAGON_M2_mpy_nac_lh_s0:
- si_MInst_sisisi_nac_lh <"mpy", int_hexagon_M2_mpy_nac_lh_s0>;
-def HEXAGON_M2_mpy_nac_lh_s1:
- si_MInst_sisisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_lh_s1>;
-def HEXAGON_M2_mpy_nac_sat_lh_s1:
- si_MInst_sisisi_nac_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s1>;
-def HEXAGON_M2_mpy_nac_sat_lh_s0:
- si_MInst_sisisi_nac_sat_lh <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s0>;
-
-def HEXAGON_M2_mpy_nac_ll_s0:
- si_MInst_sisisi_nac_ll <"mpy", int_hexagon_M2_mpy_nac_ll_s0>;
-def HEXAGON_M2_mpy_nac_ll_s1:
- si_MInst_sisisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_ll_s1>;
-def HEXAGON_M2_mpy_nac_sat_ll_s1:
- si_MInst_sisisi_nac_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s1>;
-def HEXAGON_M2_mpy_nac_sat_ll_s0:
- si_MInst_sisisi_nac_sat_ll <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s0>;
-
-//Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
-def HEXAGON_M2_mpyd_acc_hh_s0:
- di_MInst_disisi_acc_hh <"mpy", int_hexagon_M2_mpyd_acc_hh_s0>;
-def HEXAGON_M2_mpyd_acc_hh_s1:
- di_MInst_disisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpyd_acc_hh_s1>;
-
-def HEXAGON_M2_mpyd_acc_hl_s0:
- di_MInst_disisi_acc_hl <"mpy", int_hexagon_M2_mpyd_acc_hl_s0>;
-def HEXAGON_M2_mpyd_acc_hl_s1:
- di_MInst_disisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpyd_acc_hl_s1>;
-
-def HEXAGON_M2_mpyd_acc_lh_s0:
- di_MInst_disisi_acc_lh <"mpy", int_hexagon_M2_mpyd_acc_lh_s0>;
-def HEXAGON_M2_mpyd_acc_lh_s1:
- di_MInst_disisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpyd_acc_lh_s1>;
-
-def HEXAGON_M2_mpyd_acc_ll_s0:
- di_MInst_disisi_acc_ll <"mpy", int_hexagon_M2_mpyd_acc_ll_s0>;
-def HEXAGON_M2_mpyd_acc_ll_s1:
- di_MInst_disisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpyd_acc_ll_s1>;
-
-//Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
-def HEXAGON_M2_mpyd_nac_hh_s0:
- di_MInst_disisi_nac_hh <"mpy", int_hexagon_M2_mpyd_nac_hh_s0>;
-def HEXAGON_M2_mpyd_nac_hh_s1:
- di_MInst_disisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpyd_nac_hh_s1>;
-
-def HEXAGON_M2_mpyd_nac_hl_s0:
- di_MInst_disisi_nac_hl <"mpy", int_hexagon_M2_mpyd_nac_hl_s0>;
-def HEXAGON_M2_mpyd_nac_hl_s1:
- di_MInst_disisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpyd_nac_hl_s1>;
-
-def HEXAGON_M2_mpyd_nac_lh_s0:
- di_MInst_disisi_nac_lh <"mpy", int_hexagon_M2_mpyd_nac_lh_s0>;
-def HEXAGON_M2_mpyd_nac_lh_s1:
- di_MInst_disisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpyd_nac_lh_s1>;
-
-def HEXAGON_M2_mpyd_nac_ll_s0:
- di_MInst_disisi_nac_ll <"mpy", int_hexagon_M2_mpyd_nac_ll_s0>;
-def HEXAGON_M2_mpyd_nac_ll_s1:
- di_MInst_disisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpyd_nac_ll_s1>;
-
-// MTYPE / MPYS / Scalar 16x16 multiply unsigned.
-//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def HEXAGON_M2_mpyu_hh_s0:
- si_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyu_hh_s0>;
-def HEXAGON_M2_mpyu_hh_s1:
- si_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyu_hh_s1>;
-def HEXAGON_M2_mpyu_hl_s0:
- si_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyu_hl_s0>;
-def HEXAGON_M2_mpyu_hl_s1:
- si_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyu_hl_s1>;
-def HEXAGON_M2_mpyu_lh_s0:
- si_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyu_lh_s0>;
-def HEXAGON_M2_mpyu_lh_s1:
- si_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyu_lh_s1>;
-def HEXAGON_M2_mpyu_ll_s0:
- si_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyu_ll_s0>;
-def HEXAGON_M2_mpyu_ll_s1:
- si_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyu_ll_s1>;
-
-//Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def HEXAGON_M2_mpyud_hh_s0:
- di_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyud_hh_s0>;
-def HEXAGON_M2_mpyud_hh_s1:
- di_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyud_hh_s1>;
-def HEXAGON_M2_mpyud_hl_s0:
- di_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyud_hl_s0>;
-def HEXAGON_M2_mpyud_hl_s1:
- di_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyud_hl_s1>;
-def HEXAGON_M2_mpyud_lh_s0:
- di_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyud_lh_s0>;
-def HEXAGON_M2_mpyud_lh_s1:
- di_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyud_lh_s1>;
-def HEXAGON_M2_mpyud_ll_s0:
- di_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyud_ll_s0>;
-def HEXAGON_M2_mpyud_ll_s1:
- di_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyud_ll_s1>;
-
-//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def HEXAGON_M2_mpyu_acc_hh_s0:
- si_MInst_sisisi_acc_hh <"mpyu", int_hexagon_M2_mpyu_acc_hh_s0>;
-def HEXAGON_M2_mpyu_acc_hh_s1:
- si_MInst_sisisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hh_s1>;
-def HEXAGON_M2_mpyu_acc_hl_s0:
- si_MInst_sisisi_acc_hl <"mpyu", int_hexagon_M2_mpyu_acc_hl_s0>;
-def HEXAGON_M2_mpyu_acc_hl_s1:
- si_MInst_sisisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hl_s1>;
-def HEXAGON_M2_mpyu_acc_lh_s0:
- si_MInst_sisisi_acc_lh <"mpyu", int_hexagon_M2_mpyu_acc_lh_s0>;
-def HEXAGON_M2_mpyu_acc_lh_s1:
- si_MInst_sisisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_lh_s1>;
-def HEXAGON_M2_mpyu_acc_ll_s0:
- si_MInst_sisisi_acc_ll <"mpyu", int_hexagon_M2_mpyu_acc_ll_s0>;
-def HEXAGON_M2_mpyu_acc_ll_s1:
- si_MInst_sisisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyu_acc_ll_s1>;
-
-//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def HEXAGON_M2_mpyu_nac_hh_s0:
- si_MInst_sisisi_nac_hh <"mpyu", int_hexagon_M2_mpyu_nac_hh_s0>;
-def HEXAGON_M2_mpyu_nac_hh_s1:
- si_MInst_sisisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hh_s1>;
-def HEXAGON_M2_mpyu_nac_hl_s0:
- si_MInst_sisisi_nac_hl <"mpyu", int_hexagon_M2_mpyu_nac_hl_s0>;
-def HEXAGON_M2_mpyu_nac_hl_s1:
- si_MInst_sisisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hl_s1>;
-def HEXAGON_M2_mpyu_nac_lh_s0:
- si_MInst_sisisi_nac_lh <"mpyu", int_hexagon_M2_mpyu_nac_lh_s0>;
-def HEXAGON_M2_mpyu_nac_lh_s1:
- si_MInst_sisisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_lh_s1>;
-def HEXAGON_M2_mpyu_nac_ll_s0:
- si_MInst_sisisi_nac_ll <"mpyu", int_hexagon_M2_mpyu_nac_ll_s0>;
-def HEXAGON_M2_mpyu_nac_ll_s1:
- si_MInst_sisisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyu_nac_ll_s1>;
-
-//Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def HEXAGON_M2_mpyud_acc_hh_s0:
- di_MInst_disisi_acc_hh <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>;
-def HEXAGON_M2_mpyud_acc_hh_s1:
- di_MInst_disisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>;
-def HEXAGON_M2_mpyud_acc_hl_s0:
- di_MInst_disisi_acc_hl <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>;
-def HEXAGON_M2_mpyud_acc_hl_s1:
- di_MInst_disisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>;
-def HEXAGON_M2_mpyud_acc_lh_s0:
- di_MInst_disisi_acc_lh <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>;
-def HEXAGON_M2_mpyud_acc_lh_s1:
- di_MInst_disisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>;
-def HEXAGON_M2_mpyud_acc_ll_s0:
- di_MInst_disisi_acc_ll <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>;
-def HEXAGON_M2_mpyud_acc_ll_s1:
- di_MInst_disisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>;
-
-//Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def HEXAGON_M2_mpyud_nac_hh_s0:
- di_MInst_disisi_nac_hh <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>;
-def HEXAGON_M2_mpyud_nac_hh_s1:
- di_MInst_disisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>;
-def HEXAGON_M2_mpyud_nac_hl_s0:
- di_MInst_disisi_nac_hl <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>;
-def HEXAGON_M2_mpyud_nac_hl_s1:
- di_MInst_disisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>;
-def HEXAGON_M2_mpyud_nac_lh_s0:
- di_MInst_disisi_nac_lh <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>;
-def HEXAGON_M2_mpyud_nac_lh_s1:
- di_MInst_disisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>;
-def HEXAGON_M2_mpyud_nac_ll_s0:
- di_MInst_disisi_nac_ll <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>;
-def HEXAGON_M2_mpyud_nac_ll_s1:
- di_MInst_disisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>;
+def: T_RR_pat<A2_svsubh, int_hexagon_A2_svsubh>;
+def: T_RR_pat<A2_svsubhs, int_hexagon_A2_svsubhs>;
+def: T_RR_pat<A2_svsubuhs, int_hexagon_A2_svsubuhs>;
+def: T_RR_pat<A2_svavgh, int_hexagon_A2_svavgh>;
+def: T_RR_pat<A2_svavghs, int_hexagon_A2_svavghs>;
+def: T_RR_pat<A2_svnavgh, int_hexagon_A2_svnavgh>;
/********************************************************************
-* MTYPE/VB *
+* ALU64/ALU *
*********************************************************************/
+def: T_RR_pat<A2_addsat, int_hexagon_A2_addsat>;
+def: T_RR_pat<A2_subsat, int_hexagon_A2_subsat>;
+def: T_PP_pat<A2_addp, int_hexagon_A2_addp>;
+def: T_PP_pat<A2_subp, int_hexagon_A2_subp>;
+
+def: T_PP_pat<A2_andp, int_hexagon_A2_andp>;
+def: T_PP_pat<A2_orp, int_hexagon_A2_orp>;
+def: T_PP_pat<A2_xorp, int_hexagon_A2_xorp>;
-// MTYPE / VB / Vector reduce add unsigned bytes.
-def HEXAGON_A2_vraddub:
- di_MInst_didi <"vraddub", int_hexagon_A2_vraddub>;
-def HEXAGON_A2_vraddub_acc:
- di_MInst_dididi_acc <"vraddub", int_hexagon_A2_vraddub_acc>;
+def: T_PP_pat<C2_cmpeqp, int_hexagon_C2_cmpeqp>;
+def: T_PP_pat<C2_cmpgtp, int_hexagon_C2_cmpgtp>;
+def: T_PP_pat<C2_cmpgtup, int_hexagon_C2_cmpgtup>;
-// MTYPE / VB / Vector sum of absolute differences unsigned bytes.
-def HEXAGON_A2_vrsadub:
- di_MInst_didi <"vrsadub", int_hexagon_A2_vrsadub>;
-def HEXAGON_A2_vrsadub_acc:
- di_MInst_dididi_acc <"vrsadub", int_hexagon_A2_vrsadub_acc>;
+def: T_PP_pat<S2_parityp, int_hexagon_S2_parityp>;
+def: T_RR_pat<S2_packhl, int_hexagon_S2_packhl>;
/********************************************************************
-* MTYPE/VH *
+* ALU64/VB *
*********************************************************************/
+// ALU64 - Vector add
+def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddub>;
+def : T_PP_pat <A2_vaddubs, int_hexagon_A2_vaddubs>;
+def : T_PP_pat <A2_vaddh, int_hexagon_A2_vaddh>;
+def : T_PP_pat <A2_vaddhs, int_hexagon_A2_vaddhs>;
+def : T_PP_pat <A2_vadduhs, int_hexagon_A2_vadduhs>;
+def : T_PP_pat <A2_vaddw, int_hexagon_A2_vaddw>;
+def : T_PP_pat <A2_vaddws, int_hexagon_A2_vaddws>;
+
+// ALU64 - Vector average
+def : T_PP_pat <A2_vavgub, int_hexagon_A2_vavgub>;
+def : T_PP_pat <A2_vavgubr, int_hexagon_A2_vavgubr>;
+def : T_PP_pat <A2_vavgh, int_hexagon_A2_vavgh>;
+def : T_PP_pat <A2_vavghr, int_hexagon_A2_vavghr>;
+def : T_PP_pat <A2_vavghcr, int_hexagon_A2_vavghcr>;
+def : T_PP_pat <A2_vavguh, int_hexagon_A2_vavguh>;
+def : T_PP_pat <A2_vavguhr, int_hexagon_A2_vavguhr>;
+
+def : T_PP_pat <A2_vavgw, int_hexagon_A2_vavgw>;
+def : T_PP_pat <A2_vavgwr, int_hexagon_A2_vavgwr>;
+def : T_PP_pat <A2_vavgwcr, int_hexagon_A2_vavgwcr>;
+def : T_PP_pat <A2_vavguw, int_hexagon_A2_vavguw>;
+def : T_PP_pat <A2_vavguwr, int_hexagon_A2_vavguwr>;
+
+// ALU64 - Vector negative average
+def : T_PP_pat <A2_vnavgh, int_hexagon_A2_vnavgh>;
+def : T_PP_pat <A2_vnavghr, int_hexagon_A2_vnavghr>;
+def : T_PP_pat <A2_vnavghcr, int_hexagon_A2_vnavghcr>;
+def : T_PP_pat <A2_vnavgw, int_hexagon_A2_vnavgw>;
+def : T_PP_pat <A2_vnavgwr, int_hexagon_A2_vnavgwr>;
+def : T_PP_pat <A2_vnavgwcr, int_hexagon_A2_vnavgwcr>;
+
+// ALU64 - Vector max
+def : T_PP_pat <A2_vmaxh, int_hexagon_A2_vmaxh>;
+def : T_PP_pat <A2_vmaxw, int_hexagon_A2_vmaxw>;
+def : T_PP_pat <A2_vmaxub, int_hexagon_A2_vmaxub>;
+def : T_PP_pat <A2_vmaxuh, int_hexagon_A2_vmaxuh>;
+def : T_PP_pat <A2_vmaxuw, int_hexagon_A2_vmaxuw>;
+
+// ALU64 - Vector min
+def : T_PP_pat <A2_vminh, int_hexagon_A2_vminh>;
+def : T_PP_pat <A2_vminw, int_hexagon_A2_vminw>;
+def : T_PP_pat <A2_vminub, int_hexagon_A2_vminub>;
+def : T_PP_pat <A2_vminuh, int_hexagon_A2_vminuh>;
+def : T_PP_pat <A2_vminuw, int_hexagon_A2_vminuw>;
+
+// ALU64 - Vector sub
+def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubub>;
+def : T_PP_pat <A2_vsububs, int_hexagon_A2_vsububs>;
+def : T_PP_pat <A2_vsubh, int_hexagon_A2_vsubh>;
+def : T_PP_pat <A2_vsubhs, int_hexagon_A2_vsubhs>;
+def : T_PP_pat <A2_vsubuhs, int_hexagon_A2_vsubuhs>;
+def : T_PP_pat <A2_vsubw, int_hexagon_A2_vsubw>;
+def : T_PP_pat <A2_vsubws, int_hexagon_A2_vsubws>;
+
+// ALU64 - Vector compare bytes
+def : T_PP_pat <A2_vcmpbeq, int_hexagon_A2_vcmpbeq>;
+def : T_PP_pat <A4_vcmpbgt, int_hexagon_A4_vcmpbgt>;
+def : T_PP_pat <A2_vcmpbgtu, int_hexagon_A2_vcmpbgtu>;
+
+// ALU64 - Vector compare halfwords
+def : T_PP_pat <A2_vcmpheq, int_hexagon_A2_vcmpheq>;
+def : T_PP_pat <A2_vcmphgt, int_hexagon_A2_vcmphgt>;
+def : T_PP_pat <A2_vcmphgtu, int_hexagon_A2_vcmphgtu>;
+
+// ALU64 - Vector compare words
+def : T_PP_pat <A2_vcmpweq, int_hexagon_A2_vcmpweq>;
+def : T_PP_pat <A2_vcmpwgt, int_hexagon_A2_vcmpwgt>;
+def : T_PP_pat <A2_vcmpwgtu, int_hexagon_A2_vcmpwgtu>;
-// MTYPE / VH / Vector dual multiply.
-def HEXAGON_M2_vdmpys_s1:
- di_MInst_didi_s1_sat <"vdmpy", int_hexagon_M2_vdmpys_s1>;
-def HEXAGON_M2_vdmpys_s0:
- di_MInst_didi_sat <"vdmpy", int_hexagon_M2_vdmpys_s0>;
-def HEXAGON_M2_vdmacs_s1:
- di_MInst_dididi_acc_s1_sat <"vdmpy", int_hexagon_M2_vdmacs_s1>;
-def HEXAGON_M2_vdmacs_s0:
- di_MInst_dididi_acc_sat <"vdmpy", int_hexagon_M2_vdmacs_s0>;
-
-// MTYPE / VH / Vector dual multiply with round and pack.
-def HEXAGON_M2_vdmpyrs_s0:
- si_MInst_didi_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s0>;
-def HEXAGON_M2_vdmpyrs_s1:
- si_MInst_didi_s1_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s1>;
-
-// MTYPE / VH / Vector multiply even halfwords.
-def HEXAGON_M2_vmpy2es_s1:
- di_MInst_didi_s1_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s1>;
-def HEXAGON_M2_vmpy2es_s0:
- di_MInst_didi_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s0>;
-def HEXAGON_M2_vmac2es:
- di_MInst_dididi_acc <"vmpyeh", int_hexagon_M2_vmac2es>;
-def HEXAGON_M2_vmac2es_s1:
- di_MInst_dididi_acc_s1_sat <"vmpyeh", int_hexagon_M2_vmac2es_s1>;
-def HEXAGON_M2_vmac2es_s0:
- di_MInst_dididi_acc_sat <"vmpyeh", int_hexagon_M2_vmac2es_s0>;
-
-// MTYPE / VH / Vector multiply halfwords.
-def HEXAGON_M2_vmpy2s_s0:
- di_MInst_sisi_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0>;
-def HEXAGON_M2_vmpy2s_s1:
- di_MInst_sisi_s1_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1>;
-def HEXAGON_M2_vmac2:
- di_MInst_disisi_acc <"vmpyh", int_hexagon_M2_vmac2>;
-def HEXAGON_M2_vmac2s_s0:
- di_MInst_disisi_acc_sat <"vmpyh", int_hexagon_M2_vmac2s_s0>;
-def HEXAGON_M2_vmac2s_s1:
- di_MInst_disisi_acc_s1_sat <"vmpyh", int_hexagon_M2_vmac2s_s1>;
-
-// MTYPE / VH / Vector multiply halfwords with round and pack.
-def HEXAGON_M2_vmpy2s_s0pack:
- si_MInst_sisi_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0pack>;
-def HEXAGON_M2_vmpy2s_s1pack:
- si_MInst_sisi_s1_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1pack>;
-
-// MTYPE / VH / Vector reduce multiply halfwords.
-// Rxx32+=vrmpyh(Rss32,Rtt32)
-def HEXAGON_M2_vrmpy_s0:
- di_MInst_didi <"vrmpyh", int_hexagon_M2_vrmpy_s0>;
-def HEXAGON_M2_vrmac_s0:
- di_MInst_dididi_acc <"vrmpyh", int_hexagon_M2_vrmac_s0>;
-
+// ALU64 / VB / Vector mux.
+def : Pat<(int_hexagon_C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+ (C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+// MPY - Multiply and use full result
+// Rdd = mpy[u](Rs, Rt)
+def : T_RR_pat <M2_dpmpyss_s0, int_hexagon_M2_dpmpyss_s0>;
+def : T_RR_pat <M2_dpmpyuu_s0, int_hexagon_M2_dpmpyuu_s0>;
+
+// Complex multiply real or imaginary
+def : T_RR_pat <M2_cmpyi_s0, int_hexagon_M2_cmpyi_s0>;
+def : T_RR_pat <M2_cmpyr_s0, int_hexagon_M2_cmpyr_s0>;
+
+// Complex multiply
+def : T_RR_pat <M2_cmpys_s0, int_hexagon_M2_cmpys_s0>;
+def : T_RR_pat <M2_cmpysc_s0, int_hexagon_M2_cmpysc_s0>;
+def : T_RR_pat <M2_cmpys_s1, int_hexagon_M2_cmpys_s1>;
+def : T_RR_pat <M2_cmpysc_s1, int_hexagon_M2_cmpysc_s1>;
+
+// Vector multiply halfwords
+// Rdd=vmpyh(Rs,Rt)[:<<1]:sat
+def : T_RR_pat <M2_vmpy2s_s0, int_hexagon_M2_vmpy2s_s0>;
+def : T_RR_pat <M2_vmpy2s_s1, int_hexagon_M2_vmpy2s_s1>;
+
+// Rxx[+-]= mpy[u](Rs,Rt)
+def : T_PRR_pat <M2_dpmpyss_acc_s0, int_hexagon_M2_dpmpyss_acc_s0>;
+def : T_PRR_pat <M2_dpmpyss_nac_s0, int_hexagon_M2_dpmpyss_nac_s0>;
+def : T_PRR_pat <M2_dpmpyuu_acc_s0, int_hexagon_M2_dpmpyuu_acc_s0>;
+def : T_PRR_pat <M2_dpmpyuu_nac_s0, int_hexagon_M2_dpmpyuu_nac_s0>;
+
+// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat
+def : T_PRR_pat <M2_cmacs_s0, int_hexagon_M2_cmacs_s0>;
+def : T_PRR_pat <M2_cnacs_s0, int_hexagon_M2_cnacs_s0>;
+def : T_PRR_pat <M2_cmacs_s1, int_hexagon_M2_cmacs_s1>;
+def : T_PRR_pat <M2_cnacs_s1, int_hexagon_M2_cnacs_s1>;
+
+// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat
+def : T_PRR_pat <M2_cmacsc_s0, int_hexagon_M2_cmacsc_s0>;
+def : T_PRR_pat <M2_cnacsc_s0, int_hexagon_M2_cnacsc_s0>;
+def : T_PRR_pat <M2_cmacsc_s1, int_hexagon_M2_cmacsc_s1>;
+def : T_PRR_pat <M2_cnacsc_s1, int_hexagon_M2_cnacsc_s1>;
+
+// Rxx+=cmpy[ir](Rs,Rt)
+def : T_PRR_pat <M2_cmaci_s0, int_hexagon_M2_cmaci_s0>;
+def : T_PRR_pat <M2_cmacr_s0, int_hexagon_M2_cmacr_s0>;
+
+// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat]
+def : T_PRR_pat <M2_vmac2, int_hexagon_M2_vmac2>;
+def : T_PRR_pat <M2_vmac2s_s0, int_hexagon_M2_vmac2s_s0>;
+def : T_PRR_pat <M2_vmac2s_s1, int_hexagon_M2_vmac2s_s1>;
/********************************************************************
-* STYPE/ALU *
+* CR *
*********************************************************************/
+class qi_CRInst_qi_pat<InstHexagon Inst, Intrinsic IntID> :
+ Pat<(i32 (IntID IntRegs:$Rs)),
+ (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs))))>;
+
+class qi_CRInst_qiqi_pat<InstHexagon Inst, Intrinsic IntID> :
+ Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt)),
+ (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs), (C2_tfrrp IntRegs:$Rt))))>;
+
+def: qi_CRInst_qi_pat<C2_not, int_hexagon_C2_not>;
+def: qi_CRInst_qi_pat<C2_all8, int_hexagon_C2_all8>;
+def: qi_CRInst_qi_pat<C2_any8, int_hexagon_C2_any8>;
+
+def: qi_CRInst_qiqi_pat<C2_and, int_hexagon_C2_and>;
+def: qi_CRInst_qiqi_pat<C2_andn, int_hexagon_C2_andn>;
+def: qi_CRInst_qiqi_pat<C2_or, int_hexagon_C2_or>;
+def: qi_CRInst_qiqi_pat<C2_orn, int_hexagon_C2_orn>;
+def: qi_CRInst_qiqi_pat<C2_xor, int_hexagon_C2_xor>;
+
+// Multiply 32x32 and use lower result
+def : T_RRI_pat <M2_macsip, int_hexagon_M2_macsip>;
+def : T_RRI_pat <M2_macsin, int_hexagon_M2_macsin>;
+def : T_RRR_pat <M2_maci, int_hexagon_M2_maci>;
+
+// Subtract and accumulate
+def : T_RRR_pat <M2_subacc, int_hexagon_M2_subacc>;
+
+// Add and accumulate
+def : T_RRR_pat <M2_acci, int_hexagon_M2_acci>;
+def : T_RRR_pat <M2_nacci, int_hexagon_M2_nacci>;
+def : T_RRI_pat <M2_accii, int_hexagon_M2_accii>;
+def : T_RRI_pat <M2_naccii, int_hexagon_M2_naccii>;
+
+// XOR and XOR with destination
+def : T_RRR_pat <M2_xor_xacc, int_hexagon_M2_xor_xacc>;
+
+class MType_R32_pat <Intrinsic IntID, InstHexagon OutputInst> :
+ Pat <(IntID IntRegs:$src1, IntRegs:$src2),
+ (OutputInst IntRegs:$src1, IntRegs:$src2)>;
-// STYPE / ALU / Absolute value.
-def HEXAGON_A2_abs:
- si_SInst_si <"abs", int_hexagon_A2_abs>;
-def HEXAGON_A2_absp:
- di_SInst_di <"abs", int_hexagon_A2_absp>;
-def HEXAGON_A2_abssat:
- si_SInst_si_sat <"abs", int_hexagon_A2_abssat>;
+// Vector dual multiply with round and pack
-// STYPE / ALU / Negate.
-def HEXAGON_A2_negp:
- di_SInst_di <"neg", int_hexagon_A2_negp>;
-def HEXAGON_A2_negsat:
- si_SInst_si_sat <"neg", int_hexagon_A2_negsat>;
+def : Pat <(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>;
-// STYPE / ALU / Logical Not.
-def HEXAGON_A2_notp:
- di_SInst_di <"not", int_hexagon_A2_notp>;
+def : Pat <(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2)>;
-// STYPE / ALU / Sign extend word to doubleword.
-def HEXAGON_A2_sxtw:
- di_SInst_si <"sxtw", int_hexagon_A2_sxtw>;
+// Vector multiply halfwords with round and pack
+def : MType_R32_pat <int_hexagon_M2_vmpy2s_s0pack, M2_vmpy2s_s0pack>;
+def : MType_R32_pat <int_hexagon_M2_vmpy2s_s1pack, M2_vmpy2s_s1pack>;
+
+// Multiply and use lower result
+def : MType_R32_pat <int_hexagon_M2_mpyi, M2_mpyi>;
+def : T_RI_pat<M2_mpysmi, int_hexagon_M2_mpysmi>;
+
+// Assembler mapped from Rd32=mpyui(Rs32,Rt32) to Rd32=mpyi(Rs32,Rt32)
+def : MType_R32_pat <int_hexagon_M2_mpyui, M2_mpyi>;
+
+// Multiply and use upper result
+def : MType_R32_pat <int_hexagon_M2_mpy_up, M2_mpy_up>;
+def : MType_R32_pat <int_hexagon_M2_mpyu_up, M2_mpyu_up>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyh_rs1, M2_hmmpyh_rs1>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyl_rs1, M2_hmmpyl_rs1>;
+def : MType_R32_pat <int_hexagon_M2_dpmpyss_rnd_s0, M2_dpmpyss_rnd_s0>;
+
+// Complex multiply with round and pack
+// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
+def : MType_R32_pat <int_hexagon_M2_cmpyrs_s0, M2_cmpyrs_s0>;
+def : MType_R32_pat <int_hexagon_M2_cmpyrs_s1, M2_cmpyrs_s1>;
+def : MType_R32_pat <int_hexagon_M2_cmpyrsc_s0, M2_cmpyrsc_s0>;
+def : MType_R32_pat <int_hexagon_M2_cmpyrsc_s1, M2_cmpyrsc_s1>;
/********************************************************************
-* STYPE/BIT *
+* STYPE/ALU *
*********************************************************************/
+def : T_P_pat <A2_absp, int_hexagon_A2_absp>;
+def : T_P_pat <A2_negp, int_hexagon_A2_negp>;
+def : T_P_pat <A2_notp, int_hexagon_A2_notp>;
-// STYPE / BIT / Count leading.
-def HEXAGON_S2_cl0:
- si_SInst_si <"cl0", int_hexagon_S2_cl0>;
-def HEXAGON_S2_cl0p:
- si_SInst_di <"cl0", int_hexagon_S2_cl0p>;
-def HEXAGON_S2_cl1:
- si_SInst_si <"cl1", int_hexagon_S2_cl1>;
-def HEXAGON_S2_cl1p:
- si_SInst_di <"cl1", int_hexagon_S2_cl1p>;
-def HEXAGON_S2_clb:
- si_SInst_si <"clb", int_hexagon_S2_clb>;
-def HEXAGON_S2_clbp:
- si_SInst_di <"clb", int_hexagon_S2_clbp>;
-def HEXAGON_S2_clbnorm:
- si_SInst_si <"normamt", int_hexagon_S2_clbnorm>;
-
-// STYPE / BIT / Count trailing.
-def HEXAGON_S2_ct0:
- si_SInst_si <"ct0", int_hexagon_S2_ct0>;
-def HEXAGON_S2_ct1:
- si_SInst_si <"ct1", int_hexagon_S2_ct1>;
-
-// STYPE / BIT / Compare bit mask.
-def Hexagon_C2_bitsclr:
- qi_SInst_sisi <"bitsclr", int_hexagon_C2_bitsclr>;
-def Hexagon_C2_bitsclri:
- qi_SInst_siu6 <"bitsclr", int_hexagon_C2_bitsclri>;
-def Hexagon_C2_bitsset:
- qi_SInst_sisi <"bitsset", int_hexagon_C2_bitsset>;
-
-// STYPE / BIT / Extract unsigned.
-// Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm])
-def HEXAGON_S2_extractu:
- si_SInst_siu5u5 <"extractu",int_hexagon_S2_extractu>;
-def HEXAGON_S2_extractu_rp:
- si_SInst_sidi <"extractu",int_hexagon_S2_extractu_rp>;
-def HEXAGON_S2_extractup:
- di_SInst_diu6u6 <"extractu",int_hexagon_S2_extractup>;
-def HEXAGON_S2_extractup_rp:
- di_SInst_didi <"extractu",int_hexagon_S2_extractup_rp>;
-
-// STYPE / BIT / Insert bitfield.
-def Hexagon_S2_insert:
- si_SInst_sisiu5u5 <"insert", int_hexagon_S2_insert>;
-def Hexagon_S2_insert_rp:
- si_SInst_sisidi <"insert", int_hexagon_S2_insert_rp>;
-def Hexagon_S2_insertp:
- di_SInst_didiu6u6 <"insert", int_hexagon_S2_insertp>;
-def Hexagon_S2_insertp_rp:
- di_SInst_dididi <"insert", int_hexagon_S2_insertp_rp>;
-
-// STYPE / BIT / Innterleave/deinterleave.
-def Hexagon_S2_interleave:
- di_SInst_di <"interleave", int_hexagon_S2_interleave>;
-def Hexagon_S2_deinterleave:
- di_SInst_di <"deinterleave", int_hexagon_S2_deinterleave>;
-
-// STYPE / BIT / Linear feedback-shift Iteration.
-def Hexagon_S2_lfsp:
- di_SInst_didi <"lfs", int_hexagon_S2_lfsp>;
-
-// STYPE / BIT / Bit reverse.
-def Hexagon_S2_brev:
- si_SInst_si <"brev", int_hexagon_S2_brev>;
-
-// STYPE / BIT / Set/Clear/Toggle Bit.
-def HEXAGON_S2_setbit_i:
- si_SInst_siu5 <"setbit", int_hexagon_S2_setbit_i>;
-def HEXAGON_S2_togglebit_i:
- si_SInst_siu5 <"togglebit", int_hexagon_S2_togglebit_i>;
-def HEXAGON_S2_clrbit_i:
- si_SInst_siu5 <"clrbit", int_hexagon_S2_clrbit_i>;
-def HEXAGON_S2_setbit_r:
- si_SInst_sisi <"setbit", int_hexagon_S2_setbit_r>;
-def HEXAGON_S2_togglebit_r:
- si_SInst_sisi <"togglebit", int_hexagon_S2_togglebit_r>;
-def HEXAGON_S2_clrbit_r:
- si_SInst_sisi <"clrbit", int_hexagon_S2_clrbit_r>;
-
-// STYPE / BIT / Test Bit.
-def HEXAGON_S2_tstbit_i:
- qi_SInst_siu5 <"tstbit", int_hexagon_S2_tstbit_i>;
-def HEXAGON_S2_tstbit_r:
- qi_SInst_sisi <"tstbit", int_hexagon_S2_tstbit_r>;
+/********************************************************************
+* STYPE/BIT *
+*********************************************************************/
+// Count leading/trailing
+def: T_R_pat<S2_cl0, int_hexagon_S2_cl0>;
+def: T_P_pat<S2_cl0p, int_hexagon_S2_cl0p>;
+def: T_R_pat<S2_cl1, int_hexagon_S2_cl1>;
+def: T_P_pat<S2_cl1p, int_hexagon_S2_cl1p>;
+def: T_R_pat<S2_clb, int_hexagon_S2_clb>;
+def: T_P_pat<S2_clbp, int_hexagon_S2_clbp>;
+def: T_R_pat<S2_clbnorm, int_hexagon_S2_clbnorm>;
+def: T_R_pat<S2_ct0, int_hexagon_S2_ct0>;
+def: T_R_pat<S2_ct1, int_hexagon_S2_ct1>;
+
+// Compare bit mask
+def: T_RR_pat<C2_bitsclr, int_hexagon_C2_bitsclr>;
+def: T_RI_pat<C2_bitsclri, int_hexagon_C2_bitsclri>;
+def: T_RR_pat<C2_bitsset, int_hexagon_C2_bitsset>;
+
+// Vector shuffle
+def : T_PP_pat <S2_shuffeb, int_hexagon_S2_shuffeb>;
+def : T_PP_pat <S2_shuffob, int_hexagon_S2_shuffob>;
+def : T_PP_pat <S2_shuffeh, int_hexagon_S2_shuffeh>;
+def : T_PP_pat <S2_shuffoh, int_hexagon_S2_shuffoh>;
+
+// Vector truncate
+def : T_PP_pat <S2_vtrunewh, int_hexagon_S2_vtrunewh>;
+def : T_PP_pat <S2_vtrunowh, int_hexagon_S2_vtrunowh>;
+
+// Linear feedback-shift Iteration.
+def : T_PP_pat <S2_lfsp, int_hexagon_S2_lfsp>;
+
+// Vector splice
+def : T_PPQ_pat <S2_vsplicerb, int_hexagon_S2_vsplicerb>;
+def : T_PPI_pat <S2_vspliceib, int_hexagon_S2_vspliceib>;
+
+// Shift by immediate and add
+def : T_RRI_pat<S2_addasl_rrri, int_hexagon_S2_addasl_rrri>;
+
+// Extract bitfield
+def : T_PII_pat<S2_extractup, int_hexagon_S2_extractup>;
+def : T_RII_pat<S2_extractu, int_hexagon_S2_extractu>;
+def : T_RP_pat <S2_extractu_rp, int_hexagon_S2_extractu_rp>;
+def : T_PP_pat <S2_extractup_rp, int_hexagon_S2_extractup_rp>;
+
+// Insert bitfield
+def : Pat <(int_hexagon_S2_insert_rp IntRegs:$src1, IntRegs:$src2,
+ DoubleRegs:$src3),
+ (S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3)>;
+
+def : Pat<(i64 (int_hexagon_S2_insertp_rp (I64:$src1),
+ (I64:$src2), (I64:$src3))),
+ (i64 (S2_insertp_rp (I64:$src1), (I64:$src2),
+ (I64:$src3)))>;
+
+def : Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2,
+ u5ImmPred:$src3, u5ImmPred:$src4),
+ (S2_insert IntRegs:$src1, IntRegs:$src2,
+ u5ImmPred:$src3, u5ImmPred:$src4)>;
+
+def : Pat<(i64 (int_hexagon_S2_insertp (I64:$src1),
+ (I64:$src2), u6ImmPred:$src3, u6ImmPred:$src4)),
+ (i64 (S2_insertp (I64:$src1), (I64:$src2),
+ u6ImmPred:$src3, u6ImmPred:$src4))>;
+
+
+// Innterleave/deinterleave
+def : T_P_pat <S2_interleave, int_hexagon_S2_interleave>;
+def : T_P_pat <S2_deinterleave, int_hexagon_S2_deinterleave>;
+
+// Set/Clear/Toggle Bit
+def: T_RI_pat<S2_setbit_i, int_hexagon_S2_setbit_i>;
+def: T_RI_pat<S2_clrbit_i, int_hexagon_S2_clrbit_i>;
+def: T_RI_pat<S2_togglebit_i, int_hexagon_S2_togglebit_i>;
+
+def: T_RR_pat<S2_setbit_r, int_hexagon_S2_setbit_r>;
+def: T_RR_pat<S2_clrbit_r, int_hexagon_S2_clrbit_r>;
+def: T_RR_pat<S2_togglebit_r, int_hexagon_S2_togglebit_r>;
+
+// Test Bit
+def: T_RI_pat<S2_tstbit_i, int_hexagon_S2_tstbit_i>;
+def: T_RR_pat<S2_tstbit_r, int_hexagon_S2_tstbit_r>;
/********************************************************************
* STYPE/COMPLEX *
*********************************************************************/
+// Vector Complex conjugate
+def : T_P_pat <A2_vconj, int_hexagon_A2_vconj>;
-// STYPE / COMPLEX / Vector Complex conjugate.
-def HEXAGON_A2_vconj:
- di_SInst_di_sat <"vconj", int_hexagon_A2_vconj>;
-
-// STYPE / COMPLEX / Vector Complex rotate.
-def HEXAGON_S2_vcrotate:
- di_SInst_disi <"vcrotate",int_hexagon_S2_vcrotate>;
-
+// Vector Complex rotate
+def : T_PR_pat <S2_vcrotate, int_hexagon_S2_vcrotate>;
/********************************************************************
* STYPE/PERM *
*********************************************************************/
-// STYPE / PERM / Saturate.
-def HEXAGON_A2_sat:
- si_SInst_di <"sat", int_hexagon_A2_sat>;
-def HEXAGON_A2_satb:
- si_SInst_si <"satb", int_hexagon_A2_satb>;
-def HEXAGON_A2_sath:
- si_SInst_si <"sath", int_hexagon_A2_sath>;
-def HEXAGON_A2_satub:
- si_SInst_si <"satub", int_hexagon_A2_satub>;
-def HEXAGON_A2_satuh:
- si_SInst_si <"satuh", int_hexagon_A2_satuh>;
-
-// STYPE / PERM / Swizzle bytes.
-def HEXAGON_A2_swiz:
- si_SInst_si <"swiz", int_hexagon_A2_swiz>;
-
-// STYPE / PERM / Vector align.
-// Need custom lowering
-def HEXAGON_S2_valignib:
- di_SInst_didiu3 <"valignb", int_hexagon_S2_valignib>;
-def HEXAGON_S2_valignrb:
- di_SInst_didiqi <"valignb", int_hexagon_S2_valignrb>;
-
-// STYPE / PERM / Vector round and pack.
-def HEXAGON_S2_vrndpackwh:
- si_SInst_di <"vrndwh", int_hexagon_S2_vrndpackwh>;
-def HEXAGON_S2_vrndpackwhs:
- si_SInst_di_sat <"vrndwh", int_hexagon_S2_vrndpackwhs>;
-
-// STYPE / PERM / Vector saturate and pack.
-def HEXAGON_S2_svsathb:
- si_SInst_si <"vsathb", int_hexagon_S2_svsathb>;
-def HEXAGON_S2_vsathb:
- si_SInst_di <"vsathb", int_hexagon_S2_vsathb>;
-def HEXAGON_S2_svsathub:
- si_SInst_si <"vsathub", int_hexagon_S2_svsathub>;
-def HEXAGON_S2_vsathub:
- si_SInst_di <"vsathub", int_hexagon_S2_vsathub>;
-def HEXAGON_S2_vsatwh:
- si_SInst_di <"vsatwh", int_hexagon_S2_vsatwh>;
-def HEXAGON_S2_vsatwuh:
- si_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh>;
-
-// STYPE / PERM / Vector saturate without pack.
-def HEXAGON_S2_vsathb_nopack:
- di_SInst_di <"vsathb", int_hexagon_S2_vsathb_nopack>;
-def HEXAGON_S2_vsathub_nopack:
- di_SInst_di <"vsathub", int_hexagon_S2_vsathub_nopack>;
-def HEXAGON_S2_vsatwh_nopack:
- di_SInst_di <"vsatwh", int_hexagon_S2_vsatwh_nopack>;
-def HEXAGON_S2_vsatwuh_nopack:
- di_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>;
-
-// STYPE / PERM / Vector shuffle.
-def HEXAGON_S2_shuffeb:
- di_SInst_didi <"shuffeb", int_hexagon_S2_shuffeb>;
-def HEXAGON_S2_shuffeh:
- di_SInst_didi <"shuffeh", int_hexagon_S2_shuffeh>;
-def HEXAGON_S2_shuffob:
- di_SInst_didi <"shuffob", int_hexagon_S2_shuffob>;
-def HEXAGON_S2_shuffoh:
- di_SInst_didi <"shuffoh", int_hexagon_S2_shuffoh>;
-
-// STYPE / PERM / Vector splat bytes.
-def HEXAGON_S2_vsplatrb:
- si_SInst_si <"vsplatb", int_hexagon_S2_vsplatrb>;
-
-// STYPE / PERM / Vector splat halfwords.
-def HEXAGON_S2_vsplatrh:
- di_SInst_si <"vsplath", int_hexagon_S2_vsplatrh>;
-
-// STYPE / PERM / Vector splice.
-def Hexagon_S2_vsplicerb:
- di_SInst_didiqi <"vspliceb",int_hexagon_S2_vsplicerb>;
-def Hexagon_S2_vspliceib:
- di_SInst_didiu3 <"vspliceb",int_hexagon_S2_vspliceib>;
-
-// STYPE / PERM / Sign extend.
-def HEXAGON_S2_vsxtbh:
- di_SInst_si <"vsxtbh", int_hexagon_S2_vsxtbh>;
-def HEXAGON_S2_vsxthw:
- di_SInst_si <"vsxthw", int_hexagon_S2_vsxthw>;
-
-// STYPE / PERM / Truncate.
-def HEXAGON_S2_vtrunehb:
- si_SInst_di <"vtrunehb",int_hexagon_S2_vtrunehb>;
-def HEXAGON_S2_vtrunohb:
- si_SInst_di <"vtrunohb",int_hexagon_S2_vtrunohb>;
-def HEXAGON_S2_vtrunewh:
- di_SInst_didi <"vtrunewh",int_hexagon_S2_vtrunewh>;
-def HEXAGON_S2_vtrunowh:
- di_SInst_didi <"vtrunowh",int_hexagon_S2_vtrunowh>;
-
-// STYPE / PERM / Zero extend.
-def HEXAGON_S2_vzxtbh:
- di_SInst_si <"vzxtbh", int_hexagon_S2_vzxtbh>;
-def HEXAGON_S2_vzxthw:
- di_SInst_si <"vzxthw", int_hexagon_S2_vzxthw>;
-
+// Vector saturate without pack
+def : T_P_pat <S2_vsathb_nopack, int_hexagon_S2_vsathb_nopack>;
+def : T_P_pat <S2_vsathub_nopack, int_hexagon_S2_vsathub_nopack>;
+def : T_P_pat <S2_vsatwh_nopack, int_hexagon_S2_vsatwh_nopack>;
+def : T_P_pat <S2_vsatwuh_nopack, int_hexagon_S2_vsatwuh_nopack>;
/********************************************************************
* STYPE/PRED *
*********************************************************************/
-// STYPE / PRED / Mask generate from predicate.
-def HEXAGON_C2_mask:
- di_SInst_qi <"mask", int_hexagon_C2_mask>;
-
-// STYPE / PRED / Predicate transfer.
-def HEXAGON_C2_tfrpr:
- si_SInst_qi <"", int_hexagon_C2_tfrpr>;
-def HEXAGON_C2_tfrrp:
- qi_SInst_si <"", int_hexagon_C2_tfrrp>;
+// Predicate transfer
+def: Pat<(i32 (int_hexagon_C2_tfrpr (I32:$Rs))),
+ (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>;
+def: Pat<(i32 (int_hexagon_C2_tfrrp (I32:$Rs))),
+ (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>;
-// STYPE / PRED / Viterbi pack even and odd predicate bits.
-def HEXAGON_C2_vitpack:
- si_SInst_qiqi <"vitpack",int_hexagon_C2_vitpack>;
+// Mask generate from predicate
+def: Pat<(i64 (int_hexagon_C2_mask (I32:$Rs))),
+ (i64 (C2_mask (C2_tfrrp (I32:$Rs))))>;
+// Viterbi pack even and odd predicate bits
+def: Pat<(i32 (int_hexagon_C2_vitpack (I32:$Rs), (I32:$Rt))),
+ (i32 (C2_vitpack (C2_tfrrp (I32:$Rs)),
+ (C2_tfrrp (I32:$Rt))))>;
/********************************************************************
* STYPE/SHIFT *
*********************************************************************/
-// STYPE / SHIFT / Shift by immediate.
-def HEXAGON_S2_asl_i_r:
- si_SInst_siu5 <"asl", int_hexagon_S2_asl_i_r>;
-def HEXAGON_S2_asr_i_r:
- si_SInst_siu5 <"asr", int_hexagon_S2_asr_i_r>;
-def HEXAGON_S2_lsr_i_r:
- si_SInst_siu5 <"lsr", int_hexagon_S2_lsr_i_r>;
-def HEXAGON_S2_asl_i_p:
- di_SInst_diu6 <"asl", int_hexagon_S2_asl_i_p>;
-def HEXAGON_S2_asr_i_p:
- di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p>;
-def HEXAGON_S2_lsr_i_p:
- di_SInst_diu6 <"lsr", int_hexagon_S2_lsr_i_p>;
-
-// STYPE / SHIFT / Shift by immediate and accumulate.
-def HEXAGON_S2_asl_i_r_acc:
- si_SInst_sisiu5_acc <"asl", int_hexagon_S2_asl_i_r_acc>;
-def HEXAGON_S2_asr_i_r_acc:
- si_SInst_sisiu5_acc <"asr", int_hexagon_S2_asr_i_r_acc>;
-def HEXAGON_S2_lsr_i_r_acc:
- si_SInst_sisiu5_acc <"lsr", int_hexagon_S2_lsr_i_r_acc>;
-def HEXAGON_S2_asl_i_r_nac:
- si_SInst_sisiu5_nac <"asl", int_hexagon_S2_asl_i_r_nac>;
-def HEXAGON_S2_asr_i_r_nac:
- si_SInst_sisiu5_nac <"asr", int_hexagon_S2_asr_i_r_nac>;
-def HEXAGON_S2_lsr_i_r_nac:
- si_SInst_sisiu5_nac <"lsr", int_hexagon_S2_lsr_i_r_nac>;
-def HEXAGON_S2_asl_i_p_acc:
- di_SInst_didiu6_acc <"asl", int_hexagon_S2_asl_i_p_acc>;
-def HEXAGON_S2_asr_i_p_acc:
- di_SInst_didiu6_acc <"asr", int_hexagon_S2_asr_i_p_acc>;
-def HEXAGON_S2_lsr_i_p_acc:
- di_SInst_didiu6_acc <"lsr", int_hexagon_S2_lsr_i_p_acc>;
-def HEXAGON_S2_asl_i_p_nac:
- di_SInst_didiu6_nac <"asl", int_hexagon_S2_asl_i_p_nac>;
-def HEXAGON_S2_asr_i_p_nac:
- di_SInst_didiu6_nac <"asr", int_hexagon_S2_asr_i_p_nac>;
-def HEXAGON_S2_lsr_i_p_nac:
- di_SInst_didiu6_nac <"lsr", int_hexagon_S2_lsr_i_p_nac>;
-
-// STYPE / SHIFT / Shift by immediate and add.
-def HEXAGON_S2_addasl_rrri:
- si_SInst_sisiu3 <"addasl", int_hexagon_S2_addasl_rrri>;
-
-// STYPE / SHIFT / Shift by immediate and logical.
-def HEXAGON_S2_asl_i_r_and:
- si_SInst_sisiu5_and <"asl", int_hexagon_S2_asl_i_r_and>;
-def HEXAGON_S2_asr_i_r_and:
- si_SInst_sisiu5_and <"asr", int_hexagon_S2_asr_i_r_and>;
-def HEXAGON_S2_lsr_i_r_and:
- si_SInst_sisiu5_and <"lsr", int_hexagon_S2_lsr_i_r_and>;
-
-def HEXAGON_S2_asl_i_r_xacc:
- si_SInst_sisiu5_xor <"asl", int_hexagon_S2_asl_i_r_xacc>;
-def HEXAGON_S2_lsr_i_r_xacc:
- si_SInst_sisiu5_xor <"lsr", int_hexagon_S2_lsr_i_r_xacc>;
-
-def HEXAGON_S2_asl_i_r_or:
- si_SInst_sisiu5_or <"asl", int_hexagon_S2_asl_i_r_or>;
-def HEXAGON_S2_asr_i_r_or:
- si_SInst_sisiu5_or <"asr", int_hexagon_S2_asr_i_r_or>;
-def HEXAGON_S2_lsr_i_r_or:
- si_SInst_sisiu5_or <"lsr", int_hexagon_S2_lsr_i_r_or>;
-
-def HEXAGON_S2_asl_i_p_and:
- di_SInst_didiu6_and <"asl", int_hexagon_S2_asl_i_p_and>;
-def HEXAGON_S2_asr_i_p_and:
- di_SInst_didiu6_and <"asr", int_hexagon_S2_asr_i_p_and>;
-def HEXAGON_S2_lsr_i_p_and:
- di_SInst_didiu6_and <"lsr", int_hexagon_S2_lsr_i_p_and>;
-
-def HEXAGON_S2_asl_i_p_xacc:
- di_SInst_didiu6_xor <"asl", int_hexagon_S2_asl_i_p_xacc>;
-def HEXAGON_S2_lsr_i_p_xacc:
- di_SInst_didiu6_xor <"lsr", int_hexagon_S2_lsr_i_p_xacc>;
-
-def HEXAGON_S2_asl_i_p_or:
- di_SInst_didiu6_or <"asl", int_hexagon_S2_asl_i_p_or>;
-def HEXAGON_S2_asr_i_p_or:
- di_SInst_didiu6_or <"asr", int_hexagon_S2_asr_i_p_or>;
-def HEXAGON_S2_lsr_i_p_or:
- di_SInst_didiu6_or <"lsr", int_hexagon_S2_lsr_i_p_or>;
-
-// STYPE / SHIFT / Shift right by immediate with rounding.
-def HEXAGON_S2_asr_i_r_rnd:
- si_SInst_siu5_rnd <"asr", int_hexagon_S2_asr_i_r_rnd>;
-def HEXAGON_S2_asr_i_r_rnd_goodsyntax:
- si_SInst_siu5 <"asrrnd", int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
-
-// STYPE / SHIFT / Shift left by immediate with saturation.
-def HEXAGON_S2_asl_i_r_sat:
- si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_i_r_sat>;
-
-// STYPE / SHIFT / Shift by register.
-def HEXAGON_S2_asl_r_r:
- si_SInst_sisi <"asl", int_hexagon_S2_asl_r_r>;
-def HEXAGON_S2_asr_r_r:
- si_SInst_sisi <"asr", int_hexagon_S2_asr_r_r>;
-def HEXAGON_S2_lsl_r_r:
- si_SInst_sisi <"lsl", int_hexagon_S2_lsl_r_r>;
-def HEXAGON_S2_lsr_r_r:
- si_SInst_sisi <"lsr", int_hexagon_S2_lsr_r_r>;
-def HEXAGON_S2_asl_r_p:
- di_SInst_disi <"asl", int_hexagon_S2_asl_r_p>;
-def HEXAGON_S2_asr_r_p:
- di_SInst_disi <"asr", int_hexagon_S2_asr_r_p>;
-def HEXAGON_S2_lsl_r_p:
- di_SInst_disi <"lsl", int_hexagon_S2_lsl_r_p>;
-def HEXAGON_S2_lsr_r_p:
- di_SInst_disi <"lsr", int_hexagon_S2_lsr_r_p>;
-
-// STYPE / SHIFT / Shift by register and accumulate.
-def HEXAGON_S2_asl_r_r_acc:
- si_SInst_sisisi_acc <"asl", int_hexagon_S2_asl_r_r_acc>;
-def HEXAGON_S2_asr_r_r_acc:
- si_SInst_sisisi_acc <"asr", int_hexagon_S2_asr_r_r_acc>;
-def HEXAGON_S2_lsl_r_r_acc:
- si_SInst_sisisi_acc <"lsl", int_hexagon_S2_lsl_r_r_acc>;
-def HEXAGON_S2_lsr_r_r_acc:
- si_SInst_sisisi_acc <"lsr", int_hexagon_S2_lsr_r_r_acc>;
-def HEXAGON_S2_asl_r_p_acc:
- di_SInst_didisi_acc <"asl", int_hexagon_S2_asl_r_p_acc>;
-def HEXAGON_S2_asr_r_p_acc:
- di_SInst_didisi_acc <"asr", int_hexagon_S2_asr_r_p_acc>;
-def HEXAGON_S2_lsl_r_p_acc:
- di_SInst_didisi_acc <"lsl", int_hexagon_S2_lsl_r_p_acc>;
-def HEXAGON_S2_lsr_r_p_acc:
- di_SInst_didisi_acc <"lsr", int_hexagon_S2_lsr_r_p_acc>;
-
-def HEXAGON_S2_asl_r_r_nac:
- si_SInst_sisisi_nac <"asl", int_hexagon_S2_asl_r_r_nac>;
-def HEXAGON_S2_asr_r_r_nac:
- si_SInst_sisisi_nac <"asr", int_hexagon_S2_asr_r_r_nac>;
-def HEXAGON_S2_lsl_r_r_nac:
- si_SInst_sisisi_nac <"lsl", int_hexagon_S2_lsl_r_r_nac>;
-def HEXAGON_S2_lsr_r_r_nac:
- si_SInst_sisisi_nac <"lsr", int_hexagon_S2_lsr_r_r_nac>;
-def HEXAGON_S2_asl_r_p_nac:
- di_SInst_didisi_nac <"asl", int_hexagon_S2_asl_r_p_nac>;
-def HEXAGON_S2_asr_r_p_nac:
- di_SInst_didisi_nac <"asr", int_hexagon_S2_asr_r_p_nac>;
-def HEXAGON_S2_lsl_r_p_nac:
- di_SInst_didisi_nac <"lsl", int_hexagon_S2_lsl_r_p_nac>;
-def HEXAGON_S2_lsr_r_p_nac:
- di_SInst_didisi_nac <"lsr", int_hexagon_S2_lsr_r_p_nac>;
-
-// STYPE / SHIFT / Shift by register and logical.
-def HEXAGON_S2_asl_r_r_and:
- si_SInst_sisisi_and <"asl", int_hexagon_S2_asl_r_r_and>;
-def HEXAGON_S2_asr_r_r_and:
- si_SInst_sisisi_and <"asr", int_hexagon_S2_asr_r_r_and>;
-def HEXAGON_S2_lsl_r_r_and:
- si_SInst_sisisi_and <"lsl", int_hexagon_S2_lsl_r_r_and>;
-def HEXAGON_S2_lsr_r_r_and:
- si_SInst_sisisi_and <"lsr", int_hexagon_S2_lsr_r_r_and>;
-
-def HEXAGON_S2_asl_r_r_or:
- si_SInst_sisisi_or <"asl", int_hexagon_S2_asl_r_r_or>;
-def HEXAGON_S2_asr_r_r_or:
- si_SInst_sisisi_or <"asr", int_hexagon_S2_asr_r_r_or>;
-def HEXAGON_S2_lsl_r_r_or:
- si_SInst_sisisi_or <"lsl", int_hexagon_S2_lsl_r_r_or>;
-def HEXAGON_S2_lsr_r_r_or:
- si_SInst_sisisi_or <"lsr", int_hexagon_S2_lsr_r_r_or>;
-
-def HEXAGON_S2_asl_r_p_and:
- di_SInst_didisi_and <"asl", int_hexagon_S2_asl_r_p_and>;
-def HEXAGON_S2_asr_r_p_and:
- di_SInst_didisi_and <"asr", int_hexagon_S2_asr_r_p_and>;
-def HEXAGON_S2_lsl_r_p_and:
- di_SInst_didisi_and <"lsl", int_hexagon_S2_lsl_r_p_and>;
-def HEXAGON_S2_lsr_r_p_and:
- di_SInst_didisi_and <"lsr", int_hexagon_S2_lsr_r_p_and>;
-
-def HEXAGON_S2_asl_r_p_or:
- di_SInst_didisi_or <"asl", int_hexagon_S2_asl_r_p_or>;
-def HEXAGON_S2_asr_r_p_or:
- di_SInst_didisi_or <"asr", int_hexagon_S2_asr_r_p_or>;
-def HEXAGON_S2_lsl_r_p_or:
- di_SInst_didisi_or <"lsl", int_hexagon_S2_lsl_r_p_or>;
-def HEXAGON_S2_lsr_r_p_or:
- di_SInst_didisi_or <"lsr", int_hexagon_S2_lsr_r_p_or>;
-
-// STYPE / SHIFT / Shift by register with saturation.
-def HEXAGON_S2_asl_r_r_sat:
- si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_r_r_sat>;
-def HEXAGON_S2_asr_r_r_sat:
- si_SInst_sisi_sat <"asr", int_hexagon_S2_asr_r_r_sat>;
-
-// STYPE / SHIFT / Table Index.
-def Hexagon_S2_tableidxb_goodsyntax:
- si_MInst_sisiu4u5 <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>;
-def Hexagon_S2_tableidxd_goodsyntax:
- si_MInst_sisiu4u5 <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>;
-def Hexagon_S2_tableidxh_goodsyntax:
- si_MInst_sisiu4u5 <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>;
-def Hexagon_S2_tableidxw_goodsyntax:
- si_MInst_sisiu4u5 <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>;
+def : T_PI_pat <S2_asr_i_p, int_hexagon_S2_asr_i_p>;
+def : T_PI_pat <S2_lsr_i_p, int_hexagon_S2_lsr_i_p>;
+def : T_PI_pat <S2_asl_i_p, int_hexagon_S2_asl_i_p>;
+
+def : T_PR_pat <S2_asr_r_p, int_hexagon_S2_asr_r_p>;
+def : T_PR_pat <S2_lsr_r_p, int_hexagon_S2_lsr_r_p>;
+def : T_PR_pat <S2_asl_r_p, int_hexagon_S2_asl_r_p>;
+def : T_PR_pat <S2_lsl_r_p, int_hexagon_S2_lsl_r_p>;
+
+def : T_RR_pat <S2_asr_r_r, int_hexagon_S2_asr_r_r>;
+def : T_RR_pat <S2_lsr_r_r, int_hexagon_S2_lsr_r_r>;
+def : T_RR_pat <S2_asl_r_r, int_hexagon_S2_asl_r_r>;
+def : T_RR_pat <S2_lsl_r_r, int_hexagon_S2_lsl_r_r>;
+
+def : T_RR_pat <S2_asr_r_r_sat, int_hexagon_S2_asr_r_r_sat>;
+def : T_RR_pat <S2_asl_r_r_sat, int_hexagon_S2_asl_r_r_sat>;
+
+def : T_R_pat <S2_vsxtbh, int_hexagon_S2_vsxtbh>;
+def : T_R_pat <S2_vzxtbh, int_hexagon_S2_vzxtbh>;
+def : T_R_pat <S2_vsxthw, int_hexagon_S2_vsxthw>;
+def : T_R_pat <S2_vzxthw, int_hexagon_S2_vzxthw>;
+def : T_R_pat <S2_vsplatrh, int_hexagon_S2_vsplatrh>;
+def : T_R_pat <A2_sxtw, int_hexagon_A2_sxtw>;
+
+// Vector saturate and pack
+def : T_R_pat <S2_svsathb, int_hexagon_S2_svsathb>;
+def : T_R_pat <S2_svsathub, int_hexagon_S2_svsathub>;
+def : T_P_pat <S2_vsathub, int_hexagon_S2_vsathub>;
+def : T_P_pat <S2_vsatwh, int_hexagon_S2_vsatwh>;
+def : T_P_pat <S2_vsatwuh, int_hexagon_S2_vsatwuh>;
+def : T_P_pat <S2_vsathb, int_hexagon_S2_vsathb>;
+
+def : T_P_pat <S2_vtrunohb, int_hexagon_S2_vtrunohb>;
+def : T_P_pat <S2_vtrunehb, int_hexagon_S2_vtrunehb>;
+def : T_P_pat <S2_vrndpackwh, int_hexagon_S2_vrndpackwh>;
+def : T_P_pat <S2_vrndpackwhs, int_hexagon_S2_vrndpackwhs>;
+def : T_R_pat <S2_brev, int_hexagon_S2_brev>;
+def : T_R_pat <S2_vsplatrb, int_hexagon_S2_vsplatrb>;
+
+def : T_R_pat <A2_abs, int_hexagon_A2_abs>;
+def : T_R_pat <A2_abssat, int_hexagon_A2_abssat>;
+def : T_R_pat <A2_negsat, int_hexagon_A2_negsat>;
+
+def : T_R_pat <A2_swiz, int_hexagon_A2_swiz>;
+
+def : T_P_pat <A2_sat, int_hexagon_A2_sat>;
+def : T_R_pat <A2_sath, int_hexagon_A2_sath>;
+def : T_R_pat <A2_satuh, int_hexagon_A2_satuh>;
+def : T_R_pat <A2_satub, int_hexagon_A2_satub>;
+def : T_R_pat <A2_satb, int_hexagon_A2_satb>;
+
+// Vector arithmetic shift right by immediate with truncate and pack.
+def : T_PI_pat<S2_asr_i_svw_trun, int_hexagon_S2_asr_i_svw_trun>;
+
+def : T_RI_pat <S2_asr_i_r, int_hexagon_S2_asr_i_r>;
+def : T_RI_pat <S2_lsr_i_r, int_hexagon_S2_lsr_i_r>;
+def : T_RI_pat <S2_asl_i_r, int_hexagon_S2_asl_i_r>;
+def : T_RI_pat <S2_asr_i_r_rnd, int_hexagon_S2_asr_i_r_rnd>;
+def : T_RI_pat <S2_asr_i_r_rnd_goodsyntax,
+ int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
+
+// Shift left by immediate with saturation.
+def : T_RI_pat <S2_asl_i_r_sat, int_hexagon_S2_asl_i_r_sat>;
+//===----------------------------------------------------------------------===//
+// Template 'def pat' to map tableidx[bhwd] intrinsics to :raw instructions.
+//===----------------------------------------------------------------------===//
+class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst,
+ SDNodeXForm XformImm>
+ : Pat <(IntID IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3, u5ImmPred:$src4),
+ (OutputInst IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3,
+ (XformImm u5ImmPred:$src4))>;
+
+
+// Table Index : Extract and insert bits.
+// Map to the real hardware instructions after subtracting appropriate
+// values from the 4th input operand. Please note that subtraction is not
+// needed for int_hexagon_S2_tableidxb_goodsyntax.
+
+def : Pat <(int_hexagon_S2_tableidxb_goodsyntax IntRegs:$src1, IntRegs:$src2,
+ u4ImmPred:$src3, u5ImmPred:$src4),
+ (S2_tableidxb IntRegs:$src1, IntRegs:$src2,
+ u4ImmPred:$src3, u5ImmPred:$src4)>;
+
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxh_goodsyntax, S2_tableidxh,
+ DEC_CONST_SIGNED>;
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw,
+ DEC2_CONST_SIGNED>;
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxd_goodsyntax, S2_tableidxd,
+ DEC3_CONST_SIGNED>;
/********************************************************************
* STYPE/VH *
*********************************************************************/
-// STYPE / VH / Vector absolute value halfwords.
-// Rdd64=vabsh(Rss64)
-def HEXAGON_A2_vabsh:
- di_SInst_di <"vabsh", int_hexagon_A2_vabsh>;
-def HEXAGON_A2_vabshsat:
- di_SInst_di_sat <"vabsh", int_hexagon_A2_vabshsat>;
-
-// STYPE / VH / Vector shift halfwords by immediate.
-// Rdd64=v[asl/asr/lsr]h(Rss64,Rt32)
-def HEXAGON_S2_asl_i_vh:
- di_SInst_disi <"vaslh", int_hexagon_S2_asl_i_vh>;
-def HEXAGON_S2_asr_i_vh:
- di_SInst_disi <"vasrh", int_hexagon_S2_asr_i_vh>;
-def HEXAGON_S2_lsr_i_vh:
- di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_i_vh>;
-
-// STYPE / VH / Vector shift halfwords by register.
-// Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32)
-def HEXAGON_S2_asl_r_vh:
- di_SInst_disi <"vaslh", int_hexagon_S2_asl_r_vh>;
-def HEXAGON_S2_asr_r_vh:
- di_SInst_disi <"vasrh", int_hexagon_S2_asr_r_vh>;
-def HEXAGON_S2_lsl_r_vh:
- di_SInst_disi <"vlslh", int_hexagon_S2_lsl_r_vh>;
-def HEXAGON_S2_lsr_r_vh:
- di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_r_vh>;
+// Vector absolute value halfwords with and without saturation
+// Rdd64=vabsh(Rss64)[:sat]
+def : T_P_pat <A2_vabsh, int_hexagon_A2_vabsh>;
+def : T_P_pat <A2_vabshsat, int_hexagon_A2_vabshsat>;
+// Vector shift halfwords by immediate
+// Rdd64=[vaslh/vasrh/vlsrh](Rss64,u4)
+def : T_PI_pat <S2_asr_i_vh, int_hexagon_S2_asr_i_vh>;
+def : T_PI_pat <S2_lsr_i_vh, int_hexagon_S2_lsr_i_vh>;
+def : T_PI_pat <S2_asl_i_vh, int_hexagon_S2_asl_i_vh>;
+
+// Vector shift halfwords by register
+// Rdd64=[vaslw/vasrw/vlslw/vlsrw](Rss64,Rt32)
+def : T_PR_pat <S2_asr_r_vh, int_hexagon_S2_asr_r_vh>;
+def : T_PR_pat <S2_lsr_r_vh, int_hexagon_S2_lsr_r_vh>;
+def : T_PR_pat <S2_asl_r_vh, int_hexagon_S2_asl_r_vh>;
+def : T_PR_pat <S2_lsl_r_vh, int_hexagon_S2_lsl_r_vh>;
/********************************************************************
* STYPE/VW *
*********************************************************************/
-// STYPE / VW / Vector absolute value words.
-def HEXAGON_A2_vabsw:
- di_SInst_di <"vabsw", int_hexagon_A2_vabsw>;
-def HEXAGON_A2_vabswsat:
- di_SInst_di_sat <"vabsw", int_hexagon_A2_vabswsat>;
-
-// STYPE / VW / Vector shift words by immediate.
-// Rdd64=v[asl/vsl]w(Rss64,Rt32)
-def HEXAGON_S2_asl_i_vw:
- di_SInst_disi <"vaslw", int_hexagon_S2_asl_i_vw>;
-def HEXAGON_S2_asr_i_vw:
- di_SInst_disi <"vasrw", int_hexagon_S2_asr_i_vw>;
-def HEXAGON_S2_lsr_i_vw:
- di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_i_vw>;
-
-// STYPE / VW / Vector shift words by register.
-// Rdd64=v[asl/vsl]w(Rss64,Rt32)
-def HEXAGON_S2_asl_r_vw:
- di_SInst_disi <"vaslw", int_hexagon_S2_asl_r_vw>;
-def HEXAGON_S2_asr_r_vw:
- di_SInst_disi <"vasrw", int_hexagon_S2_asr_r_vw>;
-def HEXAGON_S2_lsl_r_vw:
- di_SInst_disi <"vlslw", int_hexagon_S2_lsl_r_vw>;
-def HEXAGON_S2_lsr_r_vw:
- di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_r_vw>;
-
-// STYPE / VW / Vector shift words with truncate and pack.
-def HEXAGON_S2_asr_r_svw_trun:
- si_SInst_disi <"vasrw", int_hexagon_S2_asr_r_svw_trun>;
-def HEXAGON_S2_asr_i_svw_trun:
- si_SInst_diu5 <"vasrw", int_hexagon_S2_asr_i_svw_trun>;
-
-// LD / Circular loads.
-def HEXAGON_circ_ldd:
- di_LDInstPI_diu4 <"circ_ldd", int_hexagon_circ_ldd>;
+// Vector absolute value words with and without saturation
+def : T_P_pat <A2_vabsw, int_hexagon_A2_vabsw>;
+def : T_P_pat <A2_vabswsat, int_hexagon_A2_vabswsat>;
+
+// Vector shift words by immediate.
+// Rdd64=[vasrw/vlsrw|vaslw](Rss64,u5)
+def : T_PI_pat <S2_asr_i_vw, int_hexagon_S2_asr_i_vw>;
+def : T_PI_pat <S2_lsr_i_vw, int_hexagon_S2_lsr_i_vw>;
+def : T_PI_pat <S2_asl_i_vw, int_hexagon_S2_asl_i_vw>;
+
+// Vector shift words by register.
+// Rdd64=[vasrw/vlsrw|vaslw|vlslw](Rss64,Rt32)
+def : T_PR_pat <S2_asr_r_vw, int_hexagon_S2_asr_r_vw>;
+def : T_PR_pat <S2_lsr_r_vw, int_hexagon_S2_lsr_r_vw>;
+def : T_PR_pat <S2_asl_r_vw, int_hexagon_S2_asl_r_vw>;
+def : T_PR_pat <S2_lsl_r_vw, int_hexagon_S2_lsl_r_vw>;
+
+// Vector shift words with truncate and pack
+
+def : T_PR_pat <S2_asr_r_svw_trun, int_hexagon_S2_asr_r_svw_trun>;
+
+def : T_R_pat<L2_loadw_locked, int_hexagon_L2_loadw_locked>;
+def : T_R_pat<L4_loadd_locked, int_hexagon_L4_loadd_locked>;
+
+def: Pat<(i32 (int_hexagon_S2_storew_locked (I32:$Rs), (I32:$Rt))),
+ (i32 (C2_tfrpr (S2_storew_locked (I32:$Rs), (I32:$Rt))))>;
+def: Pat<(i32 (int_hexagon_S4_stored_locked (I32:$Rs), (I64:$Rt))),
+ (i32 (C2_tfrpr (S4_stored_locked (I32:$Rs), (I64:$Rt))))>;
+
+/********************************************************************
+* ST
+*********************************************************************/
+
+class T_stb_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Val>
+ : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru),
+ (MI I32:$Rs, Val:$Rt, I32:$Ru)>;
+
+def : T_stb_pat <S2_storerh_pbr_pseudo, int_hexagon_brev_sth, I32>;
+def : T_stb_pat <S2_storerb_pbr_pseudo, int_hexagon_brev_stb, I32>;
+def : T_stb_pat <S2_storeri_pbr_pseudo, int_hexagon_brev_stw, I32>;
+def : T_stb_pat <S2_storerf_pbr_pseudo, int_hexagon_brev_sthhi, I32>;
+def : T_stb_pat <S2_storerd_pbr_pseudo, int_hexagon_brev_std, I64>;
+
+class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val>
+ : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s),
+ (MI I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s)>;
+
+def: T_stc_pat<S2_storerb_pci_pseudo, int_hexagon_circ_stb, s4_0ImmPred, I32>;
+def: T_stc_pat<S2_storerh_pci_pseudo, int_hexagon_circ_sth, s4_1ImmPred, I32>;
+def: T_stc_pat<S2_storeri_pci_pseudo, int_hexagon_circ_stw, s4_2ImmPred, I32>;
+def: T_stc_pat<S2_storerd_pci_pseudo, int_hexagon_circ_std, s4_3ImmPred, I64>;
+def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
include "HexagonIntrinsicsV3.td"
include "HexagonIntrinsicsV4.td"
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
index df89378603a4..4c28b28337f4 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
@@ -14,8 +14,8 @@
def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
(i64
(A2_combinew
- (HEXAGON_M2_maci
- (HEXAGON_M2_maci
+ (M2_maci
+ (M2_maci
(i32
(EXTRACT_SUBREG
(i64
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/lib/Target/Hexagon/HexagonIntrinsicsV3.td
index 2a54e62d20ae..6152cb098825 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsV3.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV3.td
@@ -11,40 +11,17 @@
//
//===----------------------------------------------------------------------===//
-
-
-
-// MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
-def Hexagon_M2_vrcmpys_s1:
- di_MInst_disi_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1>;
-def Hexagon_M2_vrcmpys_acc_s1:
- di_MInst_didisi_acc_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_acc_s1>;
-def Hexagon_M2_vrcmpys_s1rp:
- si_MInst_disi_s1_rnd_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1rp>;
-
-
-
-
-/********************************************************************
-* MTYPE/VB *
-*********************************************************************/
-
-// MTYPE / VB / Vector reduce add unsigned bytes.
-def Hexagon_M2_vradduh:
- si_MInst_didi <"vradduh", int_hexagon_M2_vradduh>;
-
-
-/********************************************************************
-* ALU64/ALU *
-*********************************************************************/
-
-// ALU64 / ALU / Add.
-def Hexagon_A2_addsp:
- di_ALU64_sidi <"add", int_hexagon_A2_addsp>;
-def Hexagon_A2_addpsat:
- di_ALU64_didi <"add", int_hexagon_A2_addpsat>;
-
-def Hexagon_A2_maxp:
- di_ALU64_didi <"max", int_hexagon_A2_maxp>;
-def Hexagon_A2_maxup:
- di_ALU64_didi <"maxu", int_hexagon_A2_maxup>;
+// Vector reduce complex multiply real or imaginary
+def : T_PR_pat <M2_vrcmpys_s1, int_hexagon_M2_vrcmpys_s1>;
+def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>;
+def : T_PR_pat <M2_vrcmpys_s1rp, int_hexagon_M2_vrcmpys_s1rp>;
+
+// Vector reduce add unsigned halfwords
+def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>;
+
+def: T_RP_pat<A2_addsp, int_hexagon_A2_addsp>;
+def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>;
+def: T_PP_pat<A2_minp, int_hexagon_A2_minp>;
+def: T_PP_pat<A2_minup, int_hexagon_A2_minup>;
+def: T_PP_pat<A2_maxp, int_hexagon_A2_maxp>;
+def: T_PP_pat<A2_maxup, int_hexagon_A2_maxup>;
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
index 77b148b9f2bd..c80a188d82e7 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsV4.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -12,359 +12,307 @@
// 80-V9418-12 Rev. A
// June 15, 2010
+// Vector reduce multiply word by signed half (32x16)
+//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
+def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
+def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
+
+//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
+
+// Vector multiply halfwords, signed by unsigned
+// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
+def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
+
+// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
+def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
+// Rxx[^]=vpmpyh(Rs,Rt)
+def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
+// Rxx^=pmpyw(Rs,Rt)
+def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
+
+//Rxx^=asr(Rss,Rt)
+def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
+//Rxx^=asl(Rss,Rt)
+def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
+//Rxx^=lsr(Rss,Rt)
+def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
+//Rxx^=lsl(Rss,Rt)
+def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
+
+// Multiply and use upper result
+def : MType_R32_pat <int_hexagon_M2_mpysu_up, M2_mpysu_up>;
+def : MType_R32_pat <int_hexagon_M2_mpy_up_s1, M2_mpy_up_s1>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyh_s1, M2_hmmpyh_s1>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyl_s1, M2_hmmpyl_s1>;
+def : MType_R32_pat <int_hexagon_M2_mpy_up_s1_sat, M2_mpy_up_s1_sat>;
+
+// Vector reduce add unsigned halfwords
+def : Pat <(int_hexagon_M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2)>;
+
+def : T_P_pat <S2_brevp, int_hexagon_S2_brevp>;
+
+def: T_P_pat <S2_ct0p, int_hexagon_S2_ct0p>;
+def: T_P_pat <S2_ct1p, int_hexagon_S2_ct1p>;
+def: T_RR_pat<C4_nbitsset, int_hexagon_C4_nbitsset>;
+def: T_RR_pat<C4_nbitsclr, int_hexagon_C4_nbitsclr>;
+def: T_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
+
+
+class vcmpImm_pat <InstHexagon MI, Intrinsic IntID, PatLeaf immPred> :
+ Pat <(IntID (i64 DoubleRegs:$src1), immPred:$src2),
+ (MI (i64 DoubleRegs:$src1), immPred:$src2)>;
+
+def : vcmpImm_pat <A4_vcmpbeqi, int_hexagon_A4_vcmpbeqi, u8ImmPred>;
+def : vcmpImm_pat <A4_vcmpbgti, int_hexagon_A4_vcmpbgti, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmpbgtui, int_hexagon_A4_vcmpbgtui, u7ImmPred>;
+
+def : vcmpImm_pat <A4_vcmpheqi, int_hexagon_A4_vcmpheqi, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmphgti, int_hexagon_A4_vcmphgti, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmphgtui, int_hexagon_A4_vcmphgtui, u7ImmPred>;
+
+def : vcmpImm_pat <A4_vcmpweqi, int_hexagon_A4_vcmpweqi, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmpwgti, int_hexagon_A4_vcmpwgti, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmpwgtui, int_hexagon_A4_vcmpwgtui, u7ImmPred>;
+
+def : T_PP_pat<A4_vcmpbeq_any, int_hexagon_A4_vcmpbeq_any>;
+
+def : T_RR_pat<A4_cmpbeq, int_hexagon_A4_cmpbeq>;
+def : T_RR_pat<A4_cmpbgt, int_hexagon_A4_cmpbgt>;
+def : T_RR_pat<A4_cmpbgtu, int_hexagon_A4_cmpbgtu>;
+def : T_RR_pat<A4_cmpheq, int_hexagon_A4_cmpheq>;
+def : T_RR_pat<A4_cmphgt, int_hexagon_A4_cmphgt>;
+def : T_RR_pat<A4_cmphgtu, int_hexagon_A4_cmphgtu>;
+
+def : T_RI_pat<A4_cmpbeqi, int_hexagon_A4_cmpbeqi>;
+def : T_RI_pat<A4_cmpbgti, int_hexagon_A4_cmpbgti>;
+def : T_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
+
+def : T_RI_pat<A4_cmpheqi, int_hexagon_A4_cmpheqi>;
+def : T_RI_pat<A4_cmphgti, int_hexagon_A4_cmphgti>;
+def : T_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
+
+def : T_RP_pat <A4_boundscheck, int_hexagon_A4_boundscheck>;
+
+def : T_PR_pat<A4_tlbmatch, int_hexagon_A4_tlbmatch>;
+
+def : Pat <(int_hexagon_M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ (M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def : T_IRR_pat <M4_mpyrr_addi, int_hexagon_M4_mpyrr_addi>;
+def : T_IRI_pat <M4_mpyri_addi, int_hexagon_M4_mpyri_addi>;
+def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
+def : T_RRI_pat <M4_mpyri_addr, int_hexagon_M4_mpyri_addr>;
+// Multiply 32x32 and use upper result
+def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
+def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
+
+// Complex multiply 32x16
+def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
+def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
+
+def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
+def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
+
+def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
+def : T_PP_pat<A4_ornp, int_hexagon_A4_ornp>;
+
+// Complex add/sub halfwords/words
+def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
+def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
+def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
+def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
+
+def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
+def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
+
+// Extract bitfield
+def : T_PP_pat <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
+def : T_RP_pat <S4_extract_rp, int_hexagon_S4_extract_rp>;
+def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
+def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
+
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
+
+// Shift an immediate left by register amount
+def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
+
+// Vector reduce maximum halfwords
+def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
+def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
-//
-// ALU 32 types.
-//
-
-class si_ALU32_sisi_not<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_ALU32_s8si<string opc, Intrinsic IntID>
- : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
+// Vector reduce maximum words
+def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
+def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
-class di_ALU32_sis8<string opc, Intrinsic IntID>
- : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+// Vector reduce minimum halfwords
+def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
+def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
-class qi_neg_ALU32_sisi<string opc, Intrinsic IntID>
- : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+// Vector reduce minimum words
+def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
+def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
-class qi_neg_ALU32_sis10<string opc, Intrinsic IntID>
- : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
- !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+// Rotate and reduce bytes
+def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
+ u2ImmPred:$src3),
+ (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2ImmPred:$src3)>;
+
+// Rotate and reduce bytes with accumulation
+// Rxx+=vrcrotate(Rss,Rt,#u2)
+def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3, u2ImmPred:$src4),
+ (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3, u2ImmPred:$src4)>;
+
+// Vector conditional negate
+def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
-class qi_neg_ALU32_siu9<string opc, Intrinsic IntID>
- : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
- !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+// Logical xor with xor accumulation
+def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
+
+// ALU64 - Vector min/max byte
+def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
+def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
+
+// Shift and add/sub/and/or
+def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
+def : T_IRI_pat <S4_ori_asl_ri, int_hexagon_S4_ori_asl_ri>;
+def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
+def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
+def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
+def : T_IRI_pat <S4_ori_lsr_ri, int_hexagon_S4_ori_lsr_ri>;
+def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
+def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
-class si_neg_ALU32_sisi<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_neg_ALU32_sis8<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
- !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_ALU32_sis8<string opc, Intrinsic IntID>
- : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-
-//
-// SInst Classes.
-//
-class qi_neg_SInst_qiqi<string opc, Intrinsic IntID>
- : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class qi_SInst_qi_andqiqi_neg<string opc, Intrinsic IntID>
- : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, and($src2, !$src3)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
- IntRegs:$src3))]>;
-
-class qi_SInst_qi_andqiqi<string opc, Intrinsic IntID>
- : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, and($src2, $src3)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
- IntRegs:$src3))]>;
-
-class qi_SInst_qi_orqiqi_neg<string opc, Intrinsic IntID>
- : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, or($src2, !$src3)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
- IntRegs:$src3))]>;
-
-class qi_SInst_qi_orqiqi<string opc, Intrinsic IntID>
- : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, or($src2, $src3)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
- IntRegs:$src3))]>;
-
-class si_SInst_si_addsis6<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, add($src2, #$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
- imm:$src3))]>;
-
-class si_SInst_si_subs6si<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, sub(#$src2, $src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
- IntRegs:$src3))]>;
-
-class di_ALU64_didi_neg<string opc, Intrinsic IntID>
- : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class di_MInst_dididi_xacc<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- DoubleRegs:$src2),
- !strconcat("$dst ^= ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
- DoubleRegs:$src2))],
- "$dst2 = $dst">;
-
-class si_MInst_sisisi_and<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst &= ", !strconcat(opc , "($src2, $src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
- IntRegs:$src3))]>;
-
-class si_MInst_sisisi_andn<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst &= ", !strconcat(opc , "($src2, ~$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
- IntRegs:$src3))]>;
-
-class si_SInst_sisis10_andi<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s10Imm:$src3),
- !strconcat("$dst = ", !strconcat(opc ,
- "($src1, and($src2, #$src3))")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
- imm:$src3))]>;
-
-class si_MInst_sisisi_xor<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
- IntRegs:$src3))]>;
-
-class si_MInst_sisisi_xorn<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst ^= ", !strconcat(opc , "($src2, ~$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
- IntRegs:$src3))]>;
-
-class si_SInst_sisis10_or<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, s10Imm:$src3),
- !strconcat("$dst |= ", !strconcat(opc , "($src2, #$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
- imm:$src3))]>;
-
-class si_MInst_sisisi_or<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst |= ", !strconcat(opc , "($src2, $src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
- IntRegs:$src3))]>;
-
-class si_MInst_sisisi_orn<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
- IntRegs:$src3),
- !strconcat("$dst |= ", !strconcat(opc , "($src2, ~$src3)")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
- IntRegs:$src3))]>;
-
-class si_SInst_siu5_sat<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+// Split bitfield
+def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
+def : T_RR_pat <A4_bitsplit, int_hexagon_A4_bitsplit>;
+def: T_RR_pat<S4_parity, int_hexagon_S4_parity>;
+
+def: T_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>;
+def: T_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>;
+
+def: T_RI_pat<S4_clbaddi, int_hexagon_S4_clbaddi>;
+def: T_PI_pat<S4_clbpaddi, int_hexagon_S4_clbpaddi>;
+def: T_P_pat <S4_clbpnorm, int_hexagon_S4_clbpnorm>;
/********************************************************************
* ALU32/ALU *
*********************************************************************/
// ALU32 / ALU / Logical Operations.
-def Hexagon_A4_orn : si_ALU32_sisi_not <"or", int_hexagon_A4_orn>;
-def Hexagon_A4_andn : si_ALU32_sisi_not <"and", int_hexagon_A4_andn>;
-
+def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
+def: T_RR_pat<A4_orn, int_hexagon_A4_orn>;
/********************************************************************
* ALU32/PERM *
*********************************************************************/
-// ALU32 / PERM / Combine Words Into Doublewords.
-def Hexagon_A4_combineir : di_ALU32_s8si <"combine", int_hexagon_A4_combineir>;
-def Hexagon_A4_combineri : di_ALU32_sis8 <"combine", int_hexagon_A4_combineri>;
-
+// Combine Words Into Doublewords.
+def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32ImmPred>;
+def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32ImmPred>;
/********************************************************************
* ALU32/PRED *
*********************************************************************/
-// ALU32 / PRED / Conditional Shift Halfword.
-// ALU32 / PRED / Conditional Sign Extend.
-// ALU32 / PRED / Conditional Zero Extend.
-// ALU32 / PRED / Compare.
-def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>;
-def Hexagon_C4_cmplte : qi_neg_ALU32_sisi <"cmp.gt", int_hexagon_C4_cmplte>;
-def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi <"cmp.gtu",int_hexagon_C4_cmplteu>;
+// Compare
+def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32ImmPred>;
+def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32ImmPred>;
+def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32ImmPred>;
-def: T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi>;
-def: T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei>;
-def: T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui>;
-
-// ALU32 / PRED / cmpare To General Register.
-def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>;
-def Hexagon_A4_rcmpneqi: si_neg_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpneqi>;
-def Hexagon_A4_rcmpeq : si_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpeq>;
-def Hexagon_A4_rcmpeqi : si_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpeqi>;
+def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>;
+def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
+def: T_RI_pat<A4_rcmpeqi, int_hexagon_A4_rcmpeqi>;
+def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
/********************************************************************
* CR *
*********************************************************************/
-// CR / Corner Detection Acceleration.
-def Hexagon_C4_fastcorner9:
- qi_SInst_qiqi<"fastcorner9", int_hexagon_C4_fastcorner9>;
-def Hexagon_C4_fastcorner9_not:
- qi_neg_SInst_qiqi<"fastcorner9",int_hexagon_C4_fastcorner9_not>;
-
// CR / Logical Operations On Predicates.
-def Hexagon_C4_and_andn:
- qi_SInst_qi_andqiqi_neg <"and", int_hexagon_C4_and_andn>;
-def Hexagon_C4_and_and:
- qi_SInst_qi_andqiqi <"and", int_hexagon_C4_and_and>;
-def Hexagon_C4_and_orn:
- qi_SInst_qi_orqiqi_neg <"and", int_hexagon_C4_and_orn>;
-def Hexagon_C4_and_or:
- qi_SInst_qi_orqiqi <"and", int_hexagon_C4_and_or>;
-def Hexagon_C4_or_andn:
- qi_SInst_qi_andqiqi_neg <"or", int_hexagon_C4_or_andn>;
-def Hexagon_C4_or_and:
- qi_SInst_qi_andqiqi <"or", int_hexagon_C4_or_and>;
-def Hexagon_C4_or_orn:
- qi_SInst_qi_orqiqi_neg <"or", int_hexagon_C4_or_orn>;
-def Hexagon_C4_or_or:
- qi_SInst_qi_orqiqi <"or", int_hexagon_C4_or_or>;
+class qi_CRInst_qiqiqi_pat<Intrinsic IntID, InstHexagon Inst> :
+ Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt, IntRegs:$Ru)),
+ (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs),
+ (C2_tfrrp IntRegs:$Rt),
+ (C2_tfrrp IntRegs:$Ru))))>;
+
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_and, C4_and_and>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_andn, C4_and_andn>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_or, C4_and_or>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_orn, C4_and_orn>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_and, C4_or_and>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_andn, C4_or_andn>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_or, C4_or_or>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_orn, C4_or_orn>;
/********************************************************************
* XTYPE/ALU *
*********************************************************************/
-// XTYPE / ALU / Add And Accumulate.
-def Hexagon_S4_addaddi:
- si_SInst_si_addsis6 <"add", int_hexagon_S4_addaddi>;
-def Hexagon_S4_subaddi:
- si_SInst_si_subs6si <"add", int_hexagon_S4_subaddi>;
+// Add And Accumulate.
-// XTYPE / ALU / Logical Doublewords.
-def Hexagon_S4_andnp:
- di_ALU64_didi_neg <"and", int_hexagon_A4_andnp>;
-def Hexagon_S4_ornp:
- di_ALU64_didi_neg <"or", int_hexagon_A4_ornp>;
+def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
+def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
-// XTYPE / ALU / Logical-logical Doublewords.
-def Hexagon_M4_xor_xacc:
- di_MInst_dididi_xacc <"xor", int_hexagon_M4_xor_xacc>;
// XTYPE / ALU / Logical-logical Words.
-def HEXAGON_M4_and_and:
- si_MInst_sisisi_and <"and", int_hexagon_M4_and_and>;
-def HEXAGON_M4_and_or:
- si_MInst_sisisi_and <"or", int_hexagon_M4_and_or>;
-def HEXAGON_M4_and_xor:
- si_MInst_sisisi_and <"xor", int_hexagon_M4_and_xor>;
-def HEXAGON_M4_and_andn:
- si_MInst_sisisi_andn <"and", int_hexagon_M4_and_andn>;
-def HEXAGON_M4_xor_and:
- si_MInst_sisisi_xor <"and", int_hexagon_M4_xor_and>;
-def HEXAGON_M4_xor_or:
- si_MInst_sisisi_xor <"or", int_hexagon_M4_xor_or>;
-def HEXAGON_M4_xor_andn:
- si_MInst_sisisi_xorn <"and", int_hexagon_M4_xor_andn>;
-def HEXAGON_M4_or_and:
- si_MInst_sisisi_or <"and", int_hexagon_M4_or_and>;
-def HEXAGON_M4_or_or:
- si_MInst_sisisi_or <"or", int_hexagon_M4_or_or>;
-def HEXAGON_M4_or_xor:
- si_MInst_sisisi_or <"xor", int_hexagon_M4_or_xor>;
-def HEXAGON_M4_or_andn:
- si_MInst_sisisi_orn <"and", int_hexagon_M4_or_andn>;
-def HEXAGON_S4_or_andix:
- si_SInst_sisis10_andi <"or", int_hexagon_S4_or_andix>;
-def HEXAGON_S4_or_andi:
- si_SInst_sisis10_or <"and", int_hexagon_S4_or_andi>;
-def HEXAGON_S4_or_ori:
- si_SInst_sisis10_or <"or", int_hexagon_S4_or_ori>;
-
-// XTYPE / ALU / Modulo wrap.
-def HEXAGON_A4_modwrapu:
- si_ALU64_sisi <"modwrap", int_hexagon_A4_modwrapu>;
-
-// XTYPE / ALU / Round.
-def HEXAGON_A4_cround_ri:
- si_SInst_siu5 <"cround", int_hexagon_A4_cround_ri>;
-def HEXAGON_A4_cround_rr:
- si_SInst_sisi <"cround", int_hexagon_A4_cround_rr>;
-def HEXAGON_A4_round_ri:
- si_SInst_siu5 <"round", int_hexagon_A4_round_ri>;
-def HEXAGON_A4_round_rr:
- si_SInst_sisi <"round", int_hexagon_A4_round_rr>;
-def HEXAGON_A4_round_ri_sat:
- si_SInst_siu5_sat <"round", int_hexagon_A4_round_ri_sat>;
-def HEXAGON_A4_round_rr_sat:
- si_SInst_sisi_sat <"round", int_hexagon_A4_round_rr_sat>;
-
-// XTYPE / ALU / Vector reduce add unsigned halfwords.
-// XTYPE / ALU / Vector add bytes.
-// XTYPE / ALU / Vector conditional negate.
-// XTYPE / ALU / Vector maximum bytes.
-// XTYPE / ALU / Vector reduce maximum halfwords.
-// XTYPE / ALU / Vector reduce maximum words.
-// XTYPE / ALU / Vector minimum bytes.
-// XTYPE / ALU / Vector reduce minimum halfwords.
-// XTYPE / ALU / Vector reduce minimum words.
-// XTYPE / ALU / Vector subtract bytes.
-
-
-/********************************************************************
-* XTYPE/BIT *
-*********************************************************************/
-
-// XTYPE / BIT / Count leading.
-// XTYPE / BIT / Count trailing.
-// XTYPE / BIT / Extract bitfield.
-// XTYPE / BIT / Masked parity.
-// XTYPE / BIT / Bit reverse.
-// XTYPE / BIT / Split bitfield.
-
-
-/********************************************************************
-* XTYPE/COMPLEX *
-*********************************************************************/
-
-// XTYPE / COMPLEX / Complex add/sub halfwords.
-// XTYPE / COMPLEX / Complex add/sub words.
-// XTYPE / COMPLEX / Complex multiply 32x16.
-// XTYPE / COMPLEX / Vector reduce complex rotate.
-
-
-/********************************************************************
-* XTYPE/MPY *
-*********************************************************************/
-
-// XTYPE / COMPLEX / Complex add/sub halfwords.
+def : T_RRR_pat <M4_or_xor, int_hexagon_M4_or_xor>;
+def : T_RRR_pat <M4_and_xor, int_hexagon_M4_and_xor>;
+def : T_RRR_pat <M4_or_and, int_hexagon_M4_or_and>;
+def : T_RRR_pat <M4_and_and, int_hexagon_M4_and_and>;
+def : T_RRR_pat <M4_xor_and, int_hexagon_M4_xor_and>;
+def : T_RRR_pat <M4_or_or, int_hexagon_M4_or_or>;
+def : T_RRR_pat <M4_and_or, int_hexagon_M4_and_or>;
+def : T_RRR_pat <M4_xor_or, int_hexagon_M4_xor_or>;
+def : T_RRR_pat <M4_or_andn, int_hexagon_M4_or_andn>;
+def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
+def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
+
+def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
+def : T_RRI_pat <S4_or_andix, int_hexagon_S4_or_andix>;
+def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
+
+// Modulo wrap.
+def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
+
+// Arithmetic/Convergent round
+// Rd=[cround|round](Rs,Rt)[:sat]
+// Rd=[cround|round](Rs,#u5)[:sat]
+def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
+def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
+
+def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
+def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
+
+def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
+def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
+
+def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/lib/Target/Hexagon/HexagonIntrinsicsV5.td
index 1d44b526d298..60e6b1eb4479 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsV5.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV5.td
@@ -1,395 +1,111 @@
-class sf_SInst_sf<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
- !strconcat("$dst = ", !strconcat(opc , "($src1)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1))]>;
-
-class si_SInst_sf<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
- !strconcat("$dst = ", !strconcat(opc , "($src1)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1))]>;
-
-class sf_SInst_si<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
- !strconcat("$dst = ", !strconcat(opc , "($src1)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1))]>;
-
-class sf_SInst_di<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
- !strconcat("$dst = ", !strconcat(opc , "($src1)")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>;
-
-class sf_SInst_df<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
- !strconcat("$dst = ", !strconcat(opc , "($src1)")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>;
-
-class si_SInst_df<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
- !strconcat("$dst = ", !strconcat(opc , "($src1)")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>;
-
-class df_SInst_sf<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
- !strconcat("$dst = ", !strconcat(opc , "($src1)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
-
-class di_SInst_sf<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
- !strconcat("$dst = ", !strconcat(opc , "($src1)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
-
-class df_SInst_si<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
- !strconcat("$dst = ", !strconcat(opc , "($src1)")),
- [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
-
-class df_SInst_df<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
- !strconcat("$dst = ", !strconcat(opc , "($src1)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>;
-
-class di_SInst_df<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
- !strconcat("$dst = ", !strconcat(opc , "($src1)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>;
-
-
-class df_SInst_di<string opc, Intrinsic IntID>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
- !strconcat("$dst = ", !strconcat(opc , "($src1)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>;
-
-class sf_MInst_sfsf<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class df_MInst_dfdf<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class qi_ALU64_dfdf<string opc, Intrinsic IntID>
- : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class qi_ALU64_dfu5<string opc, Intrinsic IntID>
- : ALU64_ri<(outs PredRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set PredRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
-
-
-class sf_MInst_sfsfsf_acc<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
- IntRegs:$dst2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1,
- IntRegs:$src2, IntRegs:$dst2))],
- "$dst2 = $dst">;
-
-class sf_MInst_sfsfsf_nac<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
- IntRegs:$dst2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1, $src2)")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1,
- IntRegs:$src2, IntRegs:$dst2))],
- "$dst2 = $dst">;
-
-
-class sf_MInst_sfsfsfsi_sc<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2, IntRegs:$src3),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1, $src2, $src3):scale")),
- [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
- IntRegs:$src2, IntRegs:$src3))],
- "$dst2 = $dst">;
-
-class sf_MInst_sfsfsf_acc_lib<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
- IntRegs:$dst2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1, $src2):lib")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1,
- IntRegs:$src2, IntRegs:$dst2))],
- "$dst2 = $dst">;
-
-class sf_MInst_sfsfsf_nac_lib<string opc, Intrinsic IntID>
- : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
- IntRegs:$dst2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1, $src2):lib")),
- [(set IntRegs:$dst, (IntID IntRegs:$src1,
- IntRegs:$src2, IntRegs:$dst2))],
- "$dst2 = $dst">;
-
-class df_MInst_dfdfdf_acc<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
- DoubleRegs:$dst2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2, DoubleRegs:$dst2))],
- "$dst2 = $dst">;
-
-class df_MInst_dfdfdf_nac<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
- DoubleRegs:$dst2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1, $src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2, DoubleRegs:$dst2))],
- "$dst2 = $dst">;
-
-
-class df_MInst_dfdfdfsi_sc<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
- DoubleRegs:$src2, IntRegs:$src3),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1, $src2, $src3):scale")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
- DoubleRegs:$src2, IntRegs:$src3))],
- "$dst2 = $dst">;
-
-class df_MInst_dfdfdf_acc_lib<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
- DoubleRegs:$dst2),
- !strconcat("$dst += ", !strconcat(opc ,
- "($src1, $src2):lib")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2, DoubleRegs:$dst2))],
- "$dst2 = $dst">;
-
-class df_MInst_dfdfdf_nac_lib<string opc, Intrinsic IntID>
- : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
- DoubleRegs:$dst2),
- !strconcat("$dst -= ", !strconcat(opc ,
- "($src1, $src2):lib")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
- DoubleRegs:$src2, DoubleRegs:$dst2))],
- "$dst2 = $dst">;
-
-class qi_SInst_sfsf<string opc, Intrinsic IntID>
- : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class qi_SInst_sfu5<string opc, Intrinsic IntID>
- : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class sf_ALU64_u10_pos<string opc, Intrinsic IntID>
- : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1),
- !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")),
- [(set IntRegs:$dst, (IntID imm:$src1))]>;
-
-class sf_ALU64_u10_neg<string opc, Intrinsic IntID>
- : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1),
- !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")),
- [(set IntRegs:$dst, (IntID imm:$src1))]>;
-
-class df_ALU64_u10_pos<string opc, Intrinsic IntID>
- : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1),
- !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")),
- [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
-
-class df_ALU64_u10_neg<string opc, Intrinsic IntID>
- : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1),
- !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")),
- [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
-
-class di_MInst_diu6<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
-
-class di_MInst_diu4_rnd<string opc, Intrinsic IntID>
- : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")),
- [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
-
-class si_MInst_diu4_rnd_sat<string opc, Intrinsic IntID>
- : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd:sat")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
-
-class si_SInst_diu4_sat<string opc, Intrinsic IntID>
- : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2),
- !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")),
- [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
-
-
-def HEXAGON_C4_fastcorner9:
- qi_SInst_qiqi <"fastcorner9", int_hexagon_C4_fastcorner9>;
-def HEXAGON_C4_fastcorner9_not:
- qi_SInst_qiqi <"!fastcorner9", int_hexagon_C4_fastcorner9_not>;
-def HEXAGON_M5_vrmpybuu:
- di_MInst_didi <"vrmpybu", int_hexagon_M5_vrmpybuu>;
-def HEXAGON_M5_vrmacbuu:
- di_MInst_dididi_acc <"vrmpybu", int_hexagon_M5_vrmacbuu>;
-def HEXAGON_M5_vrmpybsu:
- di_MInst_didi <"vrmpybsu", int_hexagon_M5_vrmpybsu>;
-def HEXAGON_M5_vrmacbsu:
- di_MInst_dididi_acc <"vrmpybsu", int_hexagon_M5_vrmacbsu>;
-def HEXAGON_M5_vmpybuu:
- di_MInst_sisi <"vmpybu", int_hexagon_M5_vmpybuu>;
-def HEXAGON_M5_vmpybsu:
- di_MInst_sisi <"vmpybsu", int_hexagon_M5_vmpybsu>;
-def HEXAGON_M5_vmacbuu:
- di_MInst_disisi_acc <"vmpybu", int_hexagon_M5_vmacbuu>;
-def HEXAGON_M5_vmacbsu:
- di_MInst_disisi_acc <"vmpybsu", int_hexagon_M5_vmacbsu>;
-def HEXAGON_M5_vdmpybsu:
- di_MInst_didi_sat <"vdmpybsu", int_hexagon_M5_vdmpybsu>;
-def HEXAGON_M5_vdmacbsu:
- di_MInst_dididi_acc_sat <"vdmpybsu", int_hexagon_M5_vdmacbsu>;
-def HEXAGON_A5_vaddhubs:
- si_SInst_didi_sat <"vaddhub", int_hexagon_A5_vaddhubs>;
-def HEXAGON_S5_popcountp:
- si_SInst_di <"popcount", int_hexagon_S5_popcountp>;
-def HEXAGON_S5_asrhub_rnd_sat_goodsyntax:
- si_MInst_diu4_rnd_sat <"vasrhub", int_hexagon_S5_asrhub_rnd_sat_goodsyntax>;
-def HEXAGON_S5_asrhub_sat:
- si_SInst_diu4_sat <"vasrhub", int_hexagon_S5_asrhub_sat>;
-def HEXAGON_S5_vasrhrnd_goodsyntax:
- di_MInst_diu4_rnd <"vasrh", int_hexagon_S5_vasrhrnd_goodsyntax>;
-def HEXAGON_S2_asr_i_p_rnd:
- di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p_rnd>;
-def HEXAGON_S2_asr_i_p_rnd_goodsyntax:
- di_MInst_diu6 <"asrrnd", int_hexagon_S2_asr_i_p_rnd_goodsyntax>;
-def HEXAGON_F2_sfadd:
- sf_MInst_sfsf <"sfadd", int_hexagon_F2_sfadd>;
-def HEXAGON_F2_sfsub:
- sf_MInst_sfsf <"sfsub", int_hexagon_F2_sfsub>;
-def HEXAGON_F2_sfmpy:
- sf_MInst_sfsf <"sfmpy", int_hexagon_F2_sfmpy>;
-def HEXAGON_F2_sffma:
- sf_MInst_sfsfsf_acc <"sfmpy", int_hexagon_F2_sffma>;
-def HEXAGON_F2_sffma_sc:
- sf_MInst_sfsfsfsi_sc <"sfmpy", int_hexagon_F2_sffma_sc>;
-def HEXAGON_F2_sffms:
- sf_MInst_sfsfsf_nac <"sfmpy", int_hexagon_F2_sffms>;
-def HEXAGON_F2_sffma_lib:
- sf_MInst_sfsfsf_acc_lib <"sfmpy", int_hexagon_F2_sffma_lib>;
-def HEXAGON_F2_sffms_lib:
- sf_MInst_sfsfsf_nac_lib <"sfmpy", int_hexagon_F2_sffms_lib>;
-def HEXAGON_F2_sfcmpeq:
- qi_SInst_sfsf <"sfcmp.eq", int_hexagon_F2_sfcmpeq>;
-def HEXAGON_F2_sfcmpgt:
- qi_SInst_sfsf <"sfcmp.gt", int_hexagon_F2_sfcmpgt>;
-def HEXAGON_F2_sfcmpge:
- qi_SInst_sfsf <"sfcmp.ge", int_hexagon_F2_sfcmpge>;
-def HEXAGON_F2_sfcmpuo:
- qi_SInst_sfsf <"sfcmp.uo", int_hexagon_F2_sfcmpuo>;
-def HEXAGON_F2_sfmax:
- sf_MInst_sfsf <"sfmax", int_hexagon_F2_sfmax>;
-def HEXAGON_F2_sfmin:
- sf_MInst_sfsf <"sfmin", int_hexagon_F2_sfmin>;
-def HEXAGON_F2_sfclass:
- qi_SInst_sfu5 <"sfclass", int_hexagon_F2_sfclass>;
-def HEXAGON_F2_sfimm_p:
- sf_ALU64_u10_pos <"sfmake", int_hexagon_F2_sfimm_p>;
-def HEXAGON_F2_sfimm_n:
- sf_ALU64_u10_neg <"sfmake", int_hexagon_F2_sfimm_n>;
-def HEXAGON_F2_sffixupn:
- sf_MInst_sfsf <"sffixupn", int_hexagon_F2_sffixupn>;
-def HEXAGON_F2_sffixupd:
- sf_MInst_sfsf <"sffixupd", int_hexagon_F2_sffixupd>;
-def HEXAGON_F2_sffixupr:
- sf_SInst_sf <"sffixupr", int_hexagon_F2_sffixupr>;
-def HEXAGON_F2_dfadd:
- df_MInst_dfdf <"dfadd", int_hexagon_F2_dfadd>;
-def HEXAGON_F2_dfsub:
- df_MInst_dfdf <"dfsub", int_hexagon_F2_dfsub>;
-def HEXAGON_F2_dfmpy:
- df_MInst_dfdf <"dfmpy", int_hexagon_F2_dfmpy>;
-def HEXAGON_F2_dffma:
- df_MInst_dfdfdf_acc <"dfmpy", int_hexagon_F2_dffma>;
-def HEXAGON_F2_dffms:
- df_MInst_dfdfdf_nac <"dfmpy", int_hexagon_F2_dffms>;
-def HEXAGON_F2_dffma_lib:
- df_MInst_dfdfdf_acc_lib <"dfmpy", int_hexagon_F2_dffma_lib>;
-def HEXAGON_F2_dffms_lib:
- df_MInst_dfdfdf_nac_lib <"dfmpy", int_hexagon_F2_dffms_lib>;
-def HEXAGON_F2_dffma_sc:
- df_MInst_dfdfdfsi_sc <"dfmpy", int_hexagon_F2_dffma_sc>;
-def HEXAGON_F2_dfmax:
- df_MInst_dfdf <"dfmax", int_hexagon_F2_dfmax>;
-def HEXAGON_F2_dfmin:
- df_MInst_dfdf <"dfmin", int_hexagon_F2_dfmin>;
-def HEXAGON_F2_dfcmpeq:
- qi_ALU64_dfdf <"dfcmp.eq", int_hexagon_F2_dfcmpeq>;
-def HEXAGON_F2_dfcmpgt:
- qi_ALU64_dfdf <"dfcmp.gt", int_hexagon_F2_dfcmpgt>;
-def HEXAGON_F2_dfcmpge:
- qi_ALU64_dfdf <"dfcmp.ge", int_hexagon_F2_dfcmpge>;
-def HEXAGON_F2_dfcmpuo:
- qi_ALU64_dfdf <"dfcmp.uo", int_hexagon_F2_dfcmpuo>;
-def HEXAGON_F2_dfclass:
- qi_ALU64_dfu5 <"dfclass", int_hexagon_F2_dfclass>;
-def HEXAGON_F2_dfimm_p:
- df_ALU64_u10_pos <"dfmake", int_hexagon_F2_dfimm_p>;
-def HEXAGON_F2_dfimm_n:
- df_ALU64_u10_neg <"dfmake", int_hexagon_F2_dfimm_n>;
-def HEXAGON_F2_dffixupn:
- df_MInst_dfdf <"dffixupn", int_hexagon_F2_dffixupn>;
-def HEXAGON_F2_dffixupd:
- df_MInst_dfdf <"dffixupd", int_hexagon_F2_dffixupd>;
-def HEXAGON_F2_dffixupr:
- df_SInst_df <"dffixupr", int_hexagon_F2_dffixupr>;
-def HEXAGON_F2_conv_sf2df:
- df_SInst_sf <"convert_sf2df", int_hexagon_F2_conv_sf2df>;
-def HEXAGON_F2_conv_df2sf:
- sf_SInst_df <"convert_df2sf", int_hexagon_F2_conv_df2sf>;
-def HEXAGON_F2_conv_uw2sf:
- sf_SInst_si <"convert_uw2sf", int_hexagon_F2_conv_uw2sf>;
-def HEXAGON_F2_conv_uw2df:
- df_SInst_si <"convert_uw2df", int_hexagon_F2_conv_uw2df>;
-def HEXAGON_F2_conv_w2sf:
- sf_SInst_si <"convert_w2sf", int_hexagon_F2_conv_w2sf>;
-def HEXAGON_F2_conv_w2df:
- df_SInst_si <"convert_w2df", int_hexagon_F2_conv_w2df>;
-def HEXAGON_F2_conv_ud2sf:
- sf_SInst_di <"convert_ud2sf", int_hexagon_F2_conv_ud2sf>;
-def HEXAGON_F2_conv_ud2df:
- df_SInst_di <"convert_ud2df", int_hexagon_F2_conv_ud2df>;
-def HEXAGON_F2_conv_d2sf:
- sf_SInst_di <"convert_d2sf", int_hexagon_F2_conv_d2sf>;
-def HEXAGON_F2_conv_d2df:
- df_SInst_di <"convert_d2df", int_hexagon_F2_conv_d2df>;
-def HEXAGON_F2_conv_sf2uw:
- si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw>;
-def HEXAGON_F2_conv_sf2w:
- si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w>;
-def HEXAGON_F2_conv_sf2ud:
- di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud>;
-def HEXAGON_F2_conv_sf2d:
- di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d>;
-def HEXAGON_F2_conv_df2uw:
- si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw>;
-def HEXAGON_F2_conv_df2w:
- si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w>;
-def HEXAGON_F2_conv_df2ud:
- di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud>;
-def HEXAGON_F2_conv_df2d:
- di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d>;
-def HEXAGON_F2_conv_sf2uw_chop:
- si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw_chop>;
-def HEXAGON_F2_conv_sf2w_chop:
- si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w_chop>;
-def HEXAGON_F2_conv_sf2ud_chop:
- di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud_chop>;
-def HEXAGON_F2_conv_sf2d_chop:
- di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d_chop>;
-def HEXAGON_F2_conv_df2uw_chop:
- si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw_chop>;
-def HEXAGON_F2_conv_df2w_chop:
- si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w_chop>;
-def HEXAGON_F2_conv_df2ud_chop:
- di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud_chop>;
-def HEXAGON_F2_conv_df2d_chop:
- di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d_chop>;
+//===- HexagonIntrinsicsV5.td - V5 Instruction intrinsics --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//Rdd[+]=vrmpybsu(Rss,Rtt)
+//Rdd[+]=vrmpybuu(Rss,Rtt)
+let Predicates = [HasV5T] in {
+def : T_PP_pat <M5_vrmpybsu, int_hexagon_M5_vrmpybsu>;
+def : T_PP_pat <M5_vrmpybuu, int_hexagon_M5_vrmpybuu>;
+
+def : T_PP_pat <M5_vdmpybsu, int_hexagon_M5_vdmpybsu>;
+
+def : T_PPP_pat <M5_vrmacbsu, int_hexagon_M5_vrmacbsu>;
+def : T_PPP_pat <M5_vrmacbuu, int_hexagon_M5_vrmacbuu>;
+//Rxx+=vdmpybsu(Rss,Rtt):sat
+def : T_PPP_pat <M5_vdmacbsu, int_hexagon_M5_vdmacbsu>;
+
+// Vector multiply bytes
+// Rdd=vmpyb[s]u(Rs,Rt)
+def : T_RR_pat <M5_vmpybsu, int_hexagon_M5_vmpybsu>;
+def : T_RR_pat <M5_vmpybuu, int_hexagon_M5_vmpybuu>;
+
+// Rxx+=vmpyb[s]u(Rs,Rt)
+def : T_PRR_pat <M5_vmacbsu, int_hexagon_M5_vmacbsu>;
+def : T_PRR_pat <M5_vmacbuu, int_hexagon_M5_vmacbuu>;
+
+// Rd=vaddhub(Rss,Rtt):sat
+def : T_PP_pat <A5_vaddhubs, int_hexagon_A5_vaddhubs>;
+}
+
+def : T_FF_pat<F2_sfadd, int_hexagon_F2_sfadd>;
+def : T_FF_pat<F2_sfsub, int_hexagon_F2_sfsub>;
+def : T_FF_pat<F2_sfmpy, int_hexagon_F2_sfmpy>;
+def : T_FF_pat<F2_sfmax, int_hexagon_F2_sfmax>;
+def : T_FF_pat<F2_sfmin, int_hexagon_F2_sfmin>;
+
+def : T_FF_pat<F2_sffixupn, int_hexagon_F2_sffixupn>;
+def : T_FF_pat<F2_sffixupd, int_hexagon_F2_sffixupd>;
+def : T_F_pat <F2_sffixupr, int_hexagon_F2_sffixupr>;
+
+def: qi_CRInst_qiqi_pat<C4_fastcorner9, int_hexagon_C4_fastcorner9>;
+def: qi_CRInst_qiqi_pat<C4_fastcorner9_not, int_hexagon_C4_fastcorner9_not>;
+
+def : T_P_pat <S5_popcountp, int_hexagon_S5_popcountp>;
+def : T_PI_pat <S5_asrhub_sat, int_hexagon_S5_asrhub_sat>;
+
+def : T_PI_pat <S2_asr_i_p_rnd, int_hexagon_S2_asr_i_p_rnd>;
+def : T_PI_pat <S2_asr_i_p_rnd_goodsyntax,
+ int_hexagon_S2_asr_i_p_rnd_goodsyntax>;
+
+def : T_PI_pat <S5_asrhub_rnd_sat_goodsyntax,
+ int_hexagon_S5_asrhub_rnd_sat_goodsyntax>;
+
+def : T_PI_pat <S5_vasrhrnd_goodsyntax, int_hexagon_S5_vasrhrnd_goodsyntax>;
+
+def : T_FFF_pat <F2_sffma, int_hexagon_F2_sffma>;
+def : T_FFF_pat <F2_sffms, int_hexagon_F2_sffms>;
+def : T_FFF_pat <F2_sffma_lib, int_hexagon_F2_sffma_lib>;
+def : T_FFF_pat <F2_sffms_lib, int_hexagon_F2_sffms_lib>;
+def : T_FFFQ_pat <F2_sffma_sc, int_hexagon_F2_sffma_sc>;
+
+// Compare floating-point value
+def : T_FF_pat <F2_sfcmpge, int_hexagon_F2_sfcmpge>;
+def : T_FF_pat <F2_sfcmpuo, int_hexagon_F2_sfcmpuo>;
+def : T_FF_pat <F2_sfcmpeq, int_hexagon_F2_sfcmpeq>;
+def : T_FF_pat <F2_sfcmpgt, int_hexagon_F2_sfcmpgt>;
+
+def : T_DD_pat <F2_dfcmpeq, int_hexagon_F2_dfcmpeq>;
+def : T_DD_pat <F2_dfcmpgt, int_hexagon_F2_dfcmpgt>;
+def : T_DD_pat <F2_dfcmpge, int_hexagon_F2_dfcmpge>;
+def : T_DD_pat <F2_dfcmpuo, int_hexagon_F2_dfcmpuo>;
+
+// Create floating-point value
+def : T_I_pat <F2_sfimm_p, int_hexagon_F2_sfimm_p>;
+def : T_I_pat <F2_sfimm_n, int_hexagon_F2_sfimm_n>;
+def : T_I_pat <F2_dfimm_p, int_hexagon_F2_dfimm_p>;
+def : T_I_pat <F2_dfimm_n, int_hexagon_F2_dfimm_n>;
+
+def : T_DI_pat <F2_dfclass, int_hexagon_F2_dfclass>;
+def : T_FI_pat <F2_sfclass, int_hexagon_F2_sfclass>;
+def : T_F_pat <F2_conv_sf2df, int_hexagon_F2_conv_sf2df>;
+def : T_D_pat <F2_conv_df2sf, int_hexagon_F2_conv_df2sf>;
+def : T_R_pat <F2_conv_uw2sf, int_hexagon_F2_conv_uw2sf>;
+def : T_R_pat <F2_conv_uw2df, int_hexagon_F2_conv_uw2df>;
+def : T_R_pat <F2_conv_w2sf, int_hexagon_F2_conv_w2sf>;
+def : T_R_pat <F2_conv_w2df, int_hexagon_F2_conv_w2df>;
+def : T_P_pat <F2_conv_ud2sf, int_hexagon_F2_conv_ud2sf>;
+def : T_P_pat <F2_conv_ud2df, int_hexagon_F2_conv_ud2df>;
+def : T_P_pat <F2_conv_d2sf, int_hexagon_F2_conv_d2sf>;
+def : T_P_pat <F2_conv_d2df, int_hexagon_F2_conv_d2df>;
+def : T_F_pat <F2_conv_sf2uw, int_hexagon_F2_conv_sf2uw>;
+def : T_F_pat <F2_conv_sf2w, int_hexagon_F2_conv_sf2w>;
+def : T_F_pat <F2_conv_sf2ud, int_hexagon_F2_conv_sf2ud>;
+def : T_F_pat <F2_conv_sf2d, int_hexagon_F2_conv_sf2d>;
+def : T_D_pat <F2_conv_df2uw, int_hexagon_F2_conv_df2uw>;
+def : T_D_pat <F2_conv_df2w, int_hexagon_F2_conv_df2w>;
+def : T_D_pat <F2_conv_df2ud, int_hexagon_F2_conv_df2ud>;
+def : T_D_pat <F2_conv_df2d, int_hexagon_F2_conv_df2d>;
+def : T_F_pat <F2_conv_sf2uw_chop, int_hexagon_F2_conv_sf2uw_chop>;
+def : T_F_pat <F2_conv_sf2w_chop, int_hexagon_F2_conv_sf2w_chop>;
+def : T_F_pat <F2_conv_sf2ud_chop, int_hexagon_F2_conv_sf2ud_chop>;
+def : T_F_pat <F2_conv_sf2d_chop, int_hexagon_F2_conv_sf2d_chop>;
+def : T_D_pat <F2_conv_df2uw_chop, int_hexagon_F2_conv_df2uw_chop>;
+def : T_D_pat <F2_conv_df2w_chop, int_hexagon_F2_conv_df2w_chop>;
+def : T_D_pat <F2_conv_df2ud_chop, int_hexagon_F2_conv_df2ud_chop>;
+def : T_D_pat <F2_conv_df2d_chop, int_hexagon_F2_conv_df2d_chop>;
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 3f5d6d84a108..535d1f91b493 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -15,7 +15,6 @@
#include "Hexagon.h"
#include "HexagonAsmPrinter.h"
#include "HexagonMachineFunctionInfo.h"
-#include "MCTargetDesc/HexagonMCInst.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Mangler.h"
@@ -35,11 +34,11 @@ static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol,
ME = MCBinaryExpr::CreateAdd(ME, MCConstantExpr::Create(MO.getOffset(), MC),
MC);
- return (MCOperand::CreateExpr(ME));
+ return (MCOperand::createExpr(ME));
}
// Create an MCInst from a MachineInstr
-void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI,
+void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCI,
HexagonAsmPrinter& AP) {
MCI.setOpcode(MI->getOpcode());
@@ -54,20 +53,20 @@ void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI,
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
if (MO.isImplicit()) continue;
- MCO = MCOperand::CreateReg(MO.getReg());
+ MCO = MCOperand::createReg(MO.getReg());
break;
case MachineOperand::MO_FPImmediate: {
APFloat Val = MO.getFPImm()->getValueAPF();
// FP immediates are used only when setting GPRs, so they may be dealt
// with like regular immediates from this point on.
- MCO = MCOperand::CreateImm(*Val.bitcastToAPInt().getRawData());
+ MCO = MCOperand::createImm(*Val.bitcastToAPInt().getRawData());
break;
}
case MachineOperand::MO_Immediate:
- MCO = MCOperand::CreateImm(MO.getImm());
+ MCO = MCOperand::createImm(MO.getImm());
break;
case MachineOperand::MO_MachineBasicBlock:
- MCO = MCOperand::CreateExpr
+ MCO = MCOperand::createExpr
(MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(),
AP.OutContext));
break;
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index cb18df6ed198..76723586c66e 100644
--- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -27,6 +27,7 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
// returning the value of the returned struct in a register. This field
// holds the virtual register into which the sret argument is passed.
unsigned SRetReturnReg;
+ unsigned StackAlignBaseReg;
std::vector<MachineInstr*> AllocaAdjustInsts;
int VarArgsFrameIndex;
bool HasClobberLR;
@@ -35,10 +36,11 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
virtual void anchor();
public:
- HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0),
- HasEHReturn(false) {}
+ HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseReg(0),
+ HasClobberLR(0), HasEHReturn(false) {}
HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
+ StackAlignBaseReg(0),
HasClobberLR(0),
HasEHReturn(false) {}
@@ -74,6 +76,9 @@ public:
bool hasEHReturn() const { return HasEHReturn; };
void setHasEHReturn(bool H = true) { HasEHReturn = H; };
+
+ void setStackAlignBaseVReg(unsigned R) { StackAlignBaseReg = R; }
+ unsigned getStackAlignBaseVReg() const { return StackAlignBaseReg; }
};
} // End llvm namespace
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 97c626fdf7af..35f732cd6207 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -205,20 +205,17 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
// Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
// are disabled, then these HazardRecs will be disabled.
const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
- const TargetMachine &TM = DAG->MF.getTarget();
+ const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
delete Top.HazardRec;
delete Bot.HazardRec;
- Top.HazardRec =
- TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer(
- Itin, DAG);
- Bot.HazardRec =
- TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer(
- Itin, DAG);
+ Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+ Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
delete Top.ResourceModel;
delete Bot.ResourceModel;
- Top.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel());
- Bot.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel());
+ Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
+ Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) &&
"-misched-topdown incompatible with -misched-bottomup");
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index 1e023c32bb8c..60343442e327 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -54,11 +54,9 @@ class VLIWResourceModel {
unsigned TotalPackets;
public:
-VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) :
- SchedModel(SM), TotalPackets(0) {
- ResourcesModel =
- TM.getSubtargetImpl()->getInstrInfo()->CreateTargetScheduleState(
- *TM.getSubtargetImpl());
+ VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM)
+ : SchedModel(SM), TotalPackets(0) {
+ ResourcesModel = STI.getInstrInfo()->CreateTargetScheduleState(STI);
// This hard requirement could be relaxed,
// but for now do not let it proceed.
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 7edba92e7e0d..81af4db912cc 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -40,6 +40,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -199,10 +200,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA,
// of registers by individual passes in the backend. At this time,
// we don't know the scope of usage and definitions of these
// instructions.
- if (MII->getOpcode() == Hexagon::TFR_condset_ii ||
- MII->getOpcode() == Hexagon::TFR_condset_ri ||
- MII->getOpcode() == Hexagon::TFR_condset_ir ||
- MII->getOpcode() == Hexagon::LDriw_pred ||
+ if (MII->getOpcode() == Hexagon::LDriw_pred ||
MII->getOpcode() == Hexagon::STriw_pred)
return false;
}
@@ -299,48 +297,48 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
switch (MI->getOpcode()) {
case Hexagon::C2_cmpeq:
- return taken ? Hexagon::CMPEQrr_t_Jumpnv_t_V4
- : Hexagon::CMPEQrr_t_Jumpnv_nt_V4;
+ return taken ? Hexagon::J4_cmpeq_t_jumpnv_t
+ : Hexagon::J4_cmpeq_t_jumpnv_nt;
case Hexagon::C2_cmpeqi: {
if (reg >= 0)
- return taken ? Hexagon::CMPEQri_t_Jumpnv_t_V4
- : Hexagon::CMPEQri_t_Jumpnv_nt_V4;
+ return taken ? Hexagon::J4_cmpeqi_t_jumpnv_t
+ : Hexagon::J4_cmpeqi_t_jumpnv_nt;
else
- return taken ? Hexagon::CMPEQn1_t_Jumpnv_t_V4
- : Hexagon::CMPEQn1_t_Jumpnv_nt_V4;
+ return taken ? Hexagon::J4_cmpeqn1_t_jumpnv_t
+ : Hexagon::J4_cmpeqn1_t_jumpnv_nt;
}
case Hexagon::C2_cmpgt: {
if (secondRegNewified)
- return taken ? Hexagon::CMPLTrr_t_Jumpnv_t_V4
- : Hexagon::CMPLTrr_t_Jumpnv_nt_V4;
+ return taken ? Hexagon::J4_cmplt_t_jumpnv_t
+ : Hexagon::J4_cmplt_t_jumpnv_nt;
else
- return taken ? Hexagon::CMPGTrr_t_Jumpnv_t_V4
- : Hexagon::CMPGTrr_t_Jumpnv_nt_V4;
+ return taken ? Hexagon::J4_cmpgt_t_jumpnv_t
+ : Hexagon::J4_cmpgt_t_jumpnv_nt;
}
case Hexagon::C2_cmpgti: {
if (reg >= 0)
- return taken ? Hexagon::CMPGTri_t_Jumpnv_t_V4
- : Hexagon::CMPGTri_t_Jumpnv_nt_V4;
+ return taken ? Hexagon::J4_cmpgti_t_jumpnv_t
+ : Hexagon::J4_cmpgti_t_jumpnv_nt;
else
- return taken ? Hexagon::CMPGTn1_t_Jumpnv_t_V4
- : Hexagon::CMPGTn1_t_Jumpnv_nt_V4;
+ return taken ? Hexagon::J4_cmpgtn1_t_jumpnv_t
+ : Hexagon::J4_cmpgtn1_t_jumpnv_nt;
}
case Hexagon::C2_cmpgtu: {
if (secondRegNewified)
- return taken ? Hexagon::CMPLTUrr_t_Jumpnv_t_V4
- : Hexagon::CMPLTUrr_t_Jumpnv_nt_V4;
+ return taken ? Hexagon::J4_cmpltu_t_jumpnv_t
+ : Hexagon::J4_cmpltu_t_jumpnv_nt;
else
- return taken ? Hexagon::CMPGTUrr_t_Jumpnv_t_V4
- : Hexagon::CMPGTUrr_t_Jumpnv_nt_V4;
+ return taken ? Hexagon::J4_cmpgtu_t_jumpnv_t
+ : Hexagon::J4_cmpgtu_t_jumpnv_nt;
}
case Hexagon::C2_cmpgtui:
- return taken ? Hexagon::CMPGTUri_t_Jumpnv_t_V4
- : Hexagon::CMPGTUri_t_Jumpnv_nt_V4;
+ return taken ? Hexagon::J4_cmpgtui_t_jumpnv_t
+ : Hexagon::J4_cmpgtui_t_jumpnv_nt;
default:
llvm_unreachable("Could not find matching New Value Jump instruction.");
@@ -355,19 +353,15 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
<< "********** Function: "
<< MF.getName() << "\n");
-#if 0
- // for now disable this, if we move NewValueJump before register
- // allocation we need this information.
- LiveVariables &LVs = getAnalysis<LiveVariables>();
-#endif
+ // If we move NewValueJump before register allocation we'll need live variable
+ // analysis here too.
QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
QRI = static_cast<const HexagonRegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- if (!QRI->Subtarget.hasV4TOps() ||
- DisableNewValueJumps) {
+ if (DisableNewValueJumps) {
return false;
}
diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td
index 5a6de0ae2746..b7f364ef0751 100644
--- a/lib/Target/Hexagon/HexagonOperands.td
+++ b/lib/Target/Hexagon/HexagonOperands.td
@@ -66,162 +66,131 @@ def nOneImm : Operand<i32>;
// Immediate predicates
//
def s32ImmPred : PatLeaf<(i32 imm), [{
- // s32ImmPred predicate - True if the immediate fits in a 32-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<32>(v);
}]>;
-def s32_24ImmPred : PatLeaf<(i32 imm), [{
- // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign
- // extended field that is a multiple of 0x1000000.
+def s32_0ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<32>(v);
+}]>;
+
+def s31_1ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<31,1>(v);
+}]>;
+
+def s30_2ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<31,1>(v);
+}]>;
+
+def s29_3ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<31,1>(v);
+}]>;
+
+def s22_10ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<22,10>(v);
+}]>;
+
+def s8_24ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<32,24>(v);
+ return isShiftedInt<8,24>(v);
}]>;
-def s32_16s8ImmPred : PatLeaf<(i32 imm), [{
- // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign
- // extended field that is a multiple of 0x10000.
+def s16_16ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<24,16>(v);
+ return isShiftedInt<16,16>(v);
}]>;
def s26_6ImmPred : PatLeaf<(i32 imm), [{
- // s26_6ImmPred predicate - True if the immediate fits in a 32-bit
- // sign extended field.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<26,6>(v);
}]>;
-
def s16ImmPred : PatLeaf<(i32 imm), [{
- // s16ImmPred predicate - True if the immediate fits in a 16-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<16>(v);
}]>;
-
def s13ImmPred : PatLeaf<(i32 imm), [{
- // s13ImmPred predicate - True if the immediate fits in a 13-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<13>(v);
}]>;
-
def s12ImmPred : PatLeaf<(i32 imm), [{
- // s12ImmPred predicate - True if the immediate fits in a 12-bit
- // sign extended field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<12>(v);
}]>;
def s11_0ImmPred : PatLeaf<(i32 imm), [{
- // s11_0ImmPred predicate - True if the immediate fits in a 11-bit
- // sign extended field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<11>(v);
}]>;
-
def s11_1ImmPred : PatLeaf<(i32 imm), [{
- // s11_1ImmPred predicate - True if the immediate fits in a 12-bit
- // sign extended field and is a multiple of 2.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<11,1>(v);
}]>;
-
def s11_2ImmPred : PatLeaf<(i32 imm), [{
- // s11_2ImmPred predicate - True if the immediate fits in a 13-bit
- // sign extended field and is a multiple of 4.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<11,2>(v);
}]>;
-
def s11_3ImmPred : PatLeaf<(i32 imm), [{
- // s11_3ImmPred predicate - True if the immediate fits in a 14-bit
- // sign extended field and is a multiple of 8.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<11,3>(v);
}]>;
-
def s10ImmPred : PatLeaf<(i32 imm), [{
- // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<10>(v);
}]>;
-
def s9ImmPred : PatLeaf<(i32 imm), [{
- // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<9>(v);
}]>;
def m9ImmPred : PatLeaf<(i32 imm), [{
- // m9ImmPred predicate - True if the immediate fits in a 9-bit magnitude
- // field. The range of m9 is -255 to 255.
int64_t v = (int64_t)N->getSExtValue();
return isInt<9>(v) && (v != -256);
}]>;
def s8ImmPred : PatLeaf<(i32 imm), [{
- // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<8>(v);
}]>;
-
def s8Imm64Pred : PatLeaf<(i64 imm), [{
- // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<8>(v);
}]>;
-
def s6ImmPred : PatLeaf<(i32 imm), [{
- // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<6>(v);
}]>;
-
def s4_0ImmPred : PatLeaf<(i32 imm), [{
- // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<4>(v);
}]>;
-
def s4_1ImmPred : PatLeaf<(i32 imm), [{
- // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field of 2.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<4,1>(v);
}]>;
-
def s4_2ImmPred : PatLeaf<(i32 imm), [{
- // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field that is a multiple of 4.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<4,2>(v);
}]>;
-
def s4_3ImmPred : PatLeaf<(i32 imm), [{
- // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field that is a multiple of 8.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<4,3>(v);
}]>;
@@ -233,50 +202,61 @@ def u64ImmPred : PatLeaf<(i64 imm), [{
}]>;
def u32ImmPred : PatLeaf<(i32 imm), [{
- // u32ImmPred predicate - True if the immediate fits in a 32-bit field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<32>(v);
}]>;
+def u32_0ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<32>(v);
+}]>;
+
+def u31_1ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<31,1>(v);
+}]>;
+
+def u30_2ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<30,2>(v);
+}]>;
+
+def u29_3ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<29,3>(v);
+}]>;
+
def u26_6ImmPred : PatLeaf<(i32 imm), [{
- // u26_6ImmPred - True if the immediate fits in a 32-bit field and
- // is a multiple of 64.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<26,6>(v);
}]>;
def u16ImmPred : PatLeaf<(i32 imm), [{
- // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<16>(v);
}]>;
def u16_s8ImmPred : PatLeaf<(i32 imm), [{
- // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign
- // extended s8 field.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<16,8>(v);
}]>;
+def u16_0ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<16>(v);
+}]>;
+
def u11_3ImmPred : PatLeaf<(i32 imm), [{
- // True if the immediate fits in a 14-bit unsigned field, and the lowest
- // three bits are 0.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<11,3>(v);
}]>;
def u9ImmPred : PatLeaf<(i32 imm), [{
- // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<9>(v);
}]>;
-
def u8ImmPred : PatLeaf<(i32 imm), [{
- // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<8>(v);
}]>;
@@ -288,75 +268,56 @@ def u7StrictPosImmPred : ImmLeaf<i32, [{
}]>;
def u7ImmPred : PatLeaf<(i32 imm), [{
- // u7ImmPred predicate - True if the immediate fits in a 7-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<7>(v);
}]>;
-
def u6ImmPred : PatLeaf<(i32 imm), [{
- // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<6>(v);
}]>;
def u6_0ImmPred : PatLeaf<(i32 imm), [{
- // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned
- // field. Same as u6ImmPred.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<6>(v);
}]>;
def u6_1ImmPred : PatLeaf<(i32 imm), [{
- // u6_1ImmPred predicate - True if the immediate fits in a 7-bit unsigned
- // field that is 1 bit alinged - multiple of 2.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<6,1>(v);
}]>;
def u6_2ImmPred : PatLeaf<(i32 imm), [{
- // u6_2ImmPred predicate - True if the immediate fits in a 8-bit unsigned
- // field that is 2 bits alinged - multiple of 4.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<6,2>(v);
}]>;
def u6_3ImmPred : PatLeaf<(i32 imm), [{
- // u6_3ImmPred predicate - True if the immediate fits in a 9-bit unsigned
- // field that is 3 bits alinged - multiple of 8.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<6,3>(v);
}]>;
def u5ImmPred : PatLeaf<(i32 imm), [{
- // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<5>(v);
}]>;
+def u4ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<4>(v);
+}]>;
def u3ImmPred : PatLeaf<(i32 imm), [{
- // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<3>(v);
}]>;
-
def u2ImmPred : PatLeaf<(i32 imm), [{
- // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<2>(v);
}]>;
-
def u1ImmPred : PatLeaf<(i1 imm), [{
- // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<1>(v);
}]>;
@@ -499,356 +460,20 @@ let PrintMethod = "printExtOperand" in {
def u6_3Ext : Operand<i32>;
}
-let PrintMethod = "printImmOperand" in
-def u0AlwaysExt : Operand<i32>;
-
-// Predicates for constant extendable operands
-def s16ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 16-bit sign extended field.
- return isInt<16>(v);
- else {
- if (isInt<16>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
- }
-}]>;
-
-def s10ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 10-bit sign extended field.
- return isInt<10>(v);
- else {
- if (isInt<10>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
- }
-}]>;
-
-def s9ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 9-bit sign extended field.
- return isInt<9>(v);
- else {
- if (isInt<9>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isInt<32>(v);
- }
-}]>;
-
-def s8ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 8-bit sign extended field.
- return isInt<8>(v);
- else {
- if (isInt<8>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
- }
-}]>;
-
-def s8_16ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate fits in a 8-bit sign extended field.
- return isInt<8>(v);
- else {
- if (isInt<8>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can't fit in a 16-bit signed field. This is required to avoid
- // unnecessary constant extenders.
- return isConstExtProfitable(Node) && !isInt<16>(v);
- }
-}]>;
-
-def s6ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 6-bit sign extended field.
- return isInt<6>(v);
- else {
- if (isInt<6>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isInt<32>(v);
- }
-}]>;
-
-def s6_16ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate fits in a 6-bit sign extended field.
- return isInt<6>(v);
- else {
- if (isInt<6>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can't fit in a 16-bit signed field. This is required to avoid
- // unnecessary constant extenders.
- return isConstExtProfitable(Node) && !isInt<16>(v);
- }
-}]>;
-
-def s6_10ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 6-bit sign extended field.
- return isInt<6>(v);
- else {
- if (isInt<6>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can't fit in a 10-bit signed field. This is required to avoid
- // unnecessary constant extenders.
- return isConstExtProfitable(Node) && !isInt<10>(v);
- }
-}]>;
-
-def s11_0ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 11-bit sign extended field.
- return isShiftedInt<11,0>(v);
- else {
- if (isInt<11>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
- }
-}]>;
-
-def s11_1ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 12-bit sign extended field and
- // is 2 byte aligned.
- return isShiftedInt<11,1>(v);
- else {
- if (isInt<12>(v))
- return isShiftedInt<11,1>(v);
-
- // Return true if extending this immediate is profitable and the low 1 bit
- // is zero (2-byte aligned).
- return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 2) == 0);
- }
-}]>;
-
-def s11_2ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 13-bit sign extended field and
- // is 4-byte aligned.
- return isShiftedInt<11,2>(v);
- else {
- if (isInt<13>(v))
- return isShiftedInt<11,2>(v);
-
- // Return true if extending this immediate is profitable and the low 2-bits
- // are zero (4-byte aligned).
- return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 4) == 0);
- }
-}]>;
-
-def s11_3ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 14-bit sign extended field and
- // is 8-byte aligned.
- return isShiftedInt<11,3>(v);
- else {
- if (isInt<14>(v))
- return isShiftedInt<11,3>(v);
-
- // Return true if extending this immediate is profitable and the low 3-bits
- // are zero (8-byte aligned).
- return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 8) == 0);
- }
-}]>;
-
-def u0AlwaysExtPred : PatLeaf<(i32 imm), [{
- // Predicate for an unsigned 32-bit value that always needs to be extended.
- if (Subtarget.hasV4TOps()) {
- if (isConstExtProfitable(Node)) {
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<32>(v);
- }
- }
- return false;
-}]>;
-
-def u6ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 6-bit unsigned field.
- return isUInt<6>(v);
- else {
- if (isUInt<6>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
- }
-}]>;
-
-def u7ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 7-bit unsigned field.
- return isUInt<7>(v);
- else {
- if (isUInt<7>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
- }
-}]>;
-
-def u8ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 8-bit unsigned field.
- return isUInt<8>(v);
- else {
- if (isUInt<8>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
- }
-}]>;
-
-def u9ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 9-bit unsigned field.
- return isUInt<9>(v);
- else {
- if (isUInt<9>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
- }
-}]>;
-
-def u6_1ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 7-bit unsigned field and
- // is 2-byte aligned.
- return isShiftedUInt<6,1>(v);
- else {
- if (isUInt<7>(v))
- return isShiftedUInt<6,1>(v);
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 2) == 0);
- }
-}]>;
-
-def u6_2ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 8-bit unsigned field and
- // is 4-byte aligned.
- return isShiftedUInt<6,2>(v);
- else {
- if (isUInt<8>(v))
- return isShiftedUInt<6,2>(v);
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 4) == 0);
- }
-}]>;
-
-def u6_3ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (!Subtarget.hasV4TOps())
- // Return true if the immediate can fit in a 9-bit unsigned field and
- // is 8-byte aligned.
- return isShiftedUInt<6,3>(v);
- else {
- if (isUInt<9>(v))
- return isShiftedUInt<6,3>(v);
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 8) == 0);
- }
-}]>;
-
// This complex pattern exists only to create a machine instruction operand
// of type "frame index". There doesn't seem to be a way to do that directly
// in the patterns.
def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>;
-// Addressing modes.
-
-def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
-def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
-def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
-def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
-def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
-def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
-def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
-def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
+// These complex patterns are not strictly necessary, since global address
+// folding will happen during DAG combining. For distinguishing between GA
+// and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used.
+def AddrGA : ComplexPattern<i32, 1, "SelectAddrGA", [], []>;
+def AddrGP : ComplexPattern<i32, 1, "SelectAddrGP", [], []>;
// Address operands.
-def MEMrr : Operand<i32> {
- let PrintMethod = "printMEMrrOperand";
- let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-def MEMri : Operand<i32> {
- let PrintMethod = "printMEMriOperand";
- let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-def MEMri_s11_2 : Operand<i32>,
- ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
- let PrintMethod = "printMEMriOperand";
- let MIOperandInfo = (ops IntRegs, s11Imm);
-}
-
-def FrameIndex : Operand<i32> {
- let PrintMethod = "printFrameIndexOperand";
- let MIOperandInfo = (ops IntRegs, s11Imm);
-}
-
let PrintMethod = "printGlobalOperand" in {
def globaladdress : Operand<i32>;
def globaladdressExt : Operand<i32>;
@@ -858,7 +483,9 @@ let PrintMethod = "printJumpTable" in
def jumptablebase : Operand<i32>;
def brtarget : Operand<OtherVT>;
-def brtargetExt : Operand<OtherVT>;
+def brtargetExt : Operand<OtherVT> {
+ let PrintMethod = "printExtBrtarget";
+}
def calltarget : Operand<i32>;
def bblabel : Operand<i32>;
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index e9b2ef6b3911..503bfdb6b3eb 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -112,7 +112,7 @@ INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole",
bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
- QRI = MF.getTarget().getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ QRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
MRI = &MF.getRegInfo();
DenseMap<unsigned, unsigned> PeepholeMap;
@@ -271,15 +271,8 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
switch (Op) {
case Hexagon::C2_mux:
case Hexagon::C2_muxii:
- case Hexagon::TFR_condset_ii:
NewOp = Op;
break;
- case Hexagon::TFR_condset_ri:
- NewOp = Hexagon::TFR_condset_ir;
- break;
- case Hexagon::TFR_condset_ir:
- NewOp = Hexagon::TFR_condset_ri;
- break;
case Hexagon::C2_muxri:
NewOp = Hexagon::C2_muxir;
break;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index a64c9df9a047..8f255a08f534 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -30,36 +30,58 @@
#include "llvm/IR/Type.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+HexagonRegisterInfo::HexagonRegisterInfo()
+ : HexagonGenRegisterInfo(Hexagon::R31) {}
-HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st)
- : HexagonGenRegisterInfo(Hexagon::R31),
- Subtarget(st) {
+
+bool HexagonRegisterInfo::isEHReturnCalleeSaveReg(unsigned R) const {
+ return R == Hexagon::R0 || R == Hexagon::R1 || R == Hexagon::R2 ||
+ R == Hexagon::R3 || R == Hexagon::D0 || R == Hexagon::D1;
+}
+
+bool HexagonRegisterInfo::isCalleeSaveReg(unsigned Reg) const {
+ return Hexagon::R16 <= Reg && Reg <= Hexagon::R27;
}
+
const MCPhysReg *
-HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- static const MCPhysReg CalleeSavedRegsV2[] = {
- Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
+HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF) const {
+ static const MCPhysReg CallerSavedRegsV4[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9,
+ Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
+ Hexagon::R15, 0
};
+
+ auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
+ switch (HST.getHexagonArchVersion()) {
+ case HexagonSubtarget::V4:
+ case HexagonSubtarget::V5:
+ return CallerSavedRegsV4;
+ }
+ llvm_unreachable(
+ "Callee saved registers requested for unknown archtecture version");
+}
+
+
+const MCPhysReg *
+HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
static const MCPhysReg CalleeSavedRegsV3[] = {
Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23,
Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
};
- switch(Subtarget.getHexagonArchVersion()) {
- case HexagonSubtarget::V1:
- break;
- case HexagonSubtarget::V2:
- return CalleeSavedRegsV2;
- case HexagonSubtarget::V3:
+ switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
case HexagonSubtarget::V4:
case HexagonSubtarget::V5:
return CalleeSavedRegsV3;
@@ -86,212 +108,153 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
}
-const TargetRegisterClass* const*
-HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClassesV2[] = {
- &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
- &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
- };
- static const TargetRegisterClass * const CalleeSavedRegClassesV3[] = {
- &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
- &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
- &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
- &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
- &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
- &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
- };
-
- switch(Subtarget.getHexagonArchVersion()) {
- case HexagonSubtarget::V1:
- break;
- case HexagonSubtarget::V2:
- return CalleeSavedRegClassesV2;
- case HexagonSubtarget::V3:
- case HexagonSubtarget::V4:
- case HexagonSubtarget::V5:
- return CalleeSavedRegClassesV3;
- }
- llvm_unreachable("Callee saved register classes requested for unknown "
- "architecture version");
-}
-
void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, unsigned FIOperandNum,
+ int SPAdj, unsigned FIOp,
RegScavenger *RS) const {
//
// Hexagon_TODO: Do we need to enforce this for Hexagon?
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
- int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
- // Addressable stack objects are accessed using neg. offsets from %fp.
- MachineFunction &MF = *MI.getParent()->getParent();
- const HexagonInstrInfo &TII =
- *static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
- int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+ MachineBasicBlock &MB = *MI.getParent();
+ MachineFunction &MF = *MB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
+ auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HII = *HST.getInstrInfo();
+ auto &HFI = *HST.getFrameLowering();
- unsigned FrameReg = getFrameRegister(MF);
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- if (!TFI->hasFP(MF)) {
+ int FI = MI.getOperand(FIOp).getIndex();
+ int Offset = MFI.getObjectOffset(FI) + MI.getOperand(FIOp+1).getImm();
+ bool HasAlloca = MFI.hasVarSizedObjects();
+ bool HasAlign = needsStackRealignment(MF);
+
+ // XXX: Fixed objects cannot be accessed through SP if there are aligned
+ // objects in the local frame, or if there are dynamically allocated objects.
+ // In such cases, there has to be FP available.
+ if (!HFI.hasFP(MF)) {
+ assert(!HasAlloca && !HasAlign && "This function must have frame pointer");
// We will not reserve space on the stack for the lr and fp registers.
- Offset -= 2 * Hexagon_WordSize;
+ Offset -= 8;
}
- const unsigned FrameSize = MFI.getStackSize();
+ unsigned SP = getStackRegister(), FP = getFrameRegister();
+ unsigned AP = 0;
+ if (MachineInstr *AI = HFI.getAlignaInstr(MF))
+ AP = AI->getOperand(0).getReg();
+ unsigned FrameSize = MFI.getStackSize();
+
+ // Special handling of dbg_value instructions and INLINEASM.
+ if (MI.isDebugValue() || MI.isInlineAsm()) {
+ MI.getOperand(FIOp).ChangeToRegister(SP, false /*isDef*/);
+ MI.getOperand(FIOp+1).ChangeToImmediate(Offset+FrameSize);
+ return;
+ }
- if (!MFI.hasVarSizedObjects() &&
- TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) &&
- !TII.isSpillPredRegOp(&MI)) {
- // Replace frame index with a stack pointer reference.
- MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), false,
- false, true);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(FrameSize+Offset);
+ bool UseFP = false, UseAP = false; // Default: use SP.
+ if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
+ UseFP = HasAlloca || HasAlign;
} else {
- // Replace frame index with a frame pointer reference.
- if (!TII.isValidOffset(MI.getOpcode(), Offset)) {
-
- // If the offset overflows, then correct it.
- //
- // For loads, we do not need a reserved register
- // r0 = memw(r30 + #10000) to:
- //
- // r0 = add(r30, #10000)
- // r0 = memw(r0)
- if ( (MI.getOpcode() == Hexagon::L2_loadri_io) ||
- (MI.getOpcode() == Hexagon::L2_loadrd_io) ||
- (MI.getOpcode() == Hexagon::L2_loadrh_io) ||
- (MI.getOpcode() == Hexagon::L2_loadruh_io) ||
- (MI.getOpcode() == Hexagon::L2_loadrb_io) ||
- (MI.getOpcode() == Hexagon::L2_loadrub_io) ||
- (MI.getOpcode() == Hexagon::LDriw_f) ||
- (MI.getOpcode() == Hexagon::LDrid_f)) {
- unsigned dstReg = (MI.getOpcode() == Hexagon::L2_loadrd_io) ?
- getSubReg(MI.getOperand(0).getReg(), Hexagon::subreg_loreg) :
- MI.getOperand(0).getReg();
-
- // Check if offset can fit in addi.
- if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::A2_add),
- dstReg).addReg(FrameReg).addReg(dstReg);
- } else {
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_ri),
- dstReg).addReg(FrameReg).addImm(Offset);
- }
-
- MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
- MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
- } else if ((MI.getOpcode() == Hexagon::S2_storeri_io) ||
- (MI.getOpcode() == Hexagon::S2_storerd_io) ||
- (MI.getOpcode() == Hexagon::S2_storerh_io) ||
- (MI.getOpcode() == Hexagon::S2_storerb_io) ||
- (MI.getOpcode() == Hexagon::STrid_f) ||
- (MI.getOpcode() == Hexagon::STriw_f)) {
- // For stores, we need a reserved register. Change
- // memw(r30 + #10000) = r0 to:
- //
- // rs = add(r30, #10000);
- // memw(rs) = r0
- unsigned resReg = HEXAGON_RESERVED_REG_1;
-
- // Check if offset can fit in addi.
- if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::A2_add),
- resReg).addReg(FrameReg).addReg(resReg);
- } else {
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_ri),
- resReg).addReg(FrameReg).addImm(Offset);
- }
- MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true);
- MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
- } else if (TII.isMemOp(&MI)) {
- // use the constant extender if the instruction provides it
- // and we are V4TOps.
- if (Subtarget.hasV4TOps()) {
- if (TII.isConstExtended(&MI)) {
- MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
- MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
- TII.immediateExtend(&MI);
- } else {
- llvm_unreachable("Need to implement for memops");
- }
- } else {
- // Only V3 and older instructions here.
- unsigned ResReg = HEXAGON_RESERVED_REG_1;
- if (!MFI.hasVarSizedObjects() &&
- TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
- MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(),
- false, false, false);
- MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset);
- } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::CONST32_Int_Real), ResReg).addImm(Offset);
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::A2_add), ResReg).addReg(FrameReg).
- addReg(ResReg);
- MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
- true);
- MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
- } else {
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_ri), ResReg).addReg(FrameReg).
- addImm(Offset);
- MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
- true);
- MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
- }
- }
- } else {
- unsigned dstReg = MI.getOperand(0).getReg();
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::A2_add),
- dstReg).addReg(FrameReg).addReg(dstReg);
- // Can we delete MI??? r2 = add (r2, #0).
- MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
- MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
- }
- } else {
- // If the offset is small enough to fit in the immediate field, directly
- // encode it.
- MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
- MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
+ if (HasAlloca) {
+ if (HasAlign)
+ UseAP = true;
+ else
+ UseFP = true;
}
}
+ unsigned Opc = MI.getOpcode();
+ bool ValidSP = HII.isValidOffset(Opc, FrameSize+Offset);
+ bool ValidFP = HII.isValidOffset(Opc, Offset);
+
+ // Calculate the actual offset in the instruction.
+ int64_t RealOffset = Offset;
+ if (!UseFP && !UseAP)
+ RealOffset = FrameSize+Offset;
+
+ switch (Opc) {
+ case Hexagon::TFR_FIA:
+ MI.setDesc(HII.get(Hexagon::A2_addi));
+ MI.getOperand(FIOp).ChangeToImmediate(RealOffset);
+ MI.RemoveOperand(FIOp+1);
+ return;
+ case Hexagon::TFR_FI:
+ // Set up the instruction for updating below.
+ MI.setDesc(HII.get(Hexagon::A2_addi));
+ break;
+ }
+
+ unsigned BP = 0;
+ bool Valid = false;
+ if (UseFP) {
+ BP = FP;
+ Valid = ValidFP;
+ } else if (UseAP) {
+ BP = AP;
+ Valid = ValidFP;
+ } else {
+ BP = SP;
+ Valid = ValidSP;
+ }
+
+ if (Valid) {
+ MI.getOperand(FIOp).ChangeToRegister(BP, false);
+ MI.getOperand(FIOp+1).ChangeToImmediate(RealOffset);
+ return;
+ }
+
+#ifndef NDEBUG
+ const Function *F = MF.getFunction();
+ dbgs() << "In function ";
+ if (F) dbgs() << F->getName();
+ else dbgs() << "<?>";
+ dbgs() << ", BB#" << MB.getNumber() << "\n" << MI;
+#endif
+ llvm_unreachable("Unhandled instruction");
}
+
unsigned HexagonRegisterInfo::getRARegister() const {
return Hexagon::R31;
}
+
unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
&MF) const {
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- if (TFI->hasFP(MF)) {
+ if (TFI->hasFP(MF))
return Hexagon::R30;
- }
-
return Hexagon::R29;
}
+
unsigned HexagonRegisterInfo::getFrameRegister() const {
return Hexagon::R30;
}
+
unsigned HexagonRegisterInfo::getStackRegister() const {
return Hexagon::R29;
}
+
+bool
+HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+ return MF.getSubtarget().getFrameLowering()->hasFP(MF);
+}
+
+
+bool
+HexagonRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getMaxAlignment() > 8;
+}
+
+
+unsigned HexagonRegisterInfo::getFirstCallerSavedNonParamReg() const {
+ return Hexagon::R6;
+}
+
+
#define GET_REGINFO_TARGET_DESC
#include "HexagonGenRegisterInfo.inc"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index a83b5026467a..7edefee93993 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -37,39 +37,37 @@
#define HEXAGON_RESERVED_REG_2 Hexagon::R11
namespace llvm {
-
-class HexagonSubtarget;
-class HexagonInstrInfo;
-class Type;
-
-struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
- HexagonSubtarget &Subtarget;
-
- HexagonRegisterInfo(HexagonSubtarget &st);
+class HexagonRegisterInfo : public HexagonGenRegisterInfo {
+public:
+ HexagonRegisterInfo();
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF)
+ const override;
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction *MF = nullptr) const;
BitVector getReservedRegs(const MachineFunction &MF) const override;
- void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS = nullptr) const override;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ unsigned FIOperandNum, RegScavenger *RS = nullptr) const override;
- /// determineFrameLayout - Determine the size of the frame and maximum call
- /// frame size.
- void determineFrameLayout(MachineFunction &MF) const;
-
- /// requiresRegisterScavenging - returns true since we may need scavenging for
- /// a temporary register when generating hardware loop instructions.
+ /// Returns true since we may need scavenging for a temporary register
+ /// when generating hardware loop instructions.
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
return true;
}
+ /// Returns true. Spill code for predicate registers might need an extra
+ /// register.
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ bool needsStackRealignment(const MachineFunction &MF) const override;
+
+ /// Returns true if the frame pointer is valid.
+ bool useFPForScavengingIndex(const MachineFunction &MF) const override;
+
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
return true;
}
@@ -79,6 +77,13 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
unsigned getFrameRegister(const MachineFunction &MF) const override;
unsigned getFrameRegister() const;
unsigned getStackRegister() const;
+
+ const MCPhysReg *getCallerSavedRegs(const MachineFunction *MF) const;
+
+ unsigned getFirstCallerSavedNonParamReg() const;
+
+ bool isEHReturnCalleeSaveReg(unsigned Reg) const;
+ bool isCalleeSaveReg(unsigned Reg) const;
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
index decd94722da1..edf1c251ac77 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -163,18 +163,20 @@ let Namespace = "Hexagon" in {
// FIXME: the register order should be defined in terms of the preferred
// allocation order...
//
-def IntRegs : RegisterClass<"Hexagon", [i32,f32], 32,
+def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
(add (sequence "R%u", 0, 9),
(sequence "R%u", 12, 28),
R10, R11, R29, R30, R31)> {
}
-def DoubleRegs : RegisterClass<"Hexagon", [i64,f64], 64,
+def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
(add (sequence "D%u", 0, 4),
(sequence "D%u", 6, 13), D5, D14, D15)>;
-def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))>
+def PredRegs : RegisterClass<"Hexagon",
+ [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32,
+ (add (sequence "P%u", 0, 3))>
{
let Size = 32;
}
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 8fdd493a75dc..4efb5f75af62 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -48,12 +48,9 @@ using namespace llvm;
namespace {
class HexagonSplitConst32AndConst64 : public MachineFunctionPass {
- const HexagonTargetMachine &QTM;
-
public:
static char ID;
- HexagonSplitConst32AndConst64(const HexagonTargetMachine &TM)
- : MachineFunctionPass(ID), QTM(TM) {}
+ HexagonSplitConst32AndConst64() : MachineFunctionPass(ID) {}
const char *getPassName() const override {
return "Hexagon Split Const32s and Const64s";
@@ -68,13 +65,13 @@ char HexagonSplitConst32AndConst64::ID = 0;
bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
const HexagonTargetObjectFile &TLOF =
- (const HexagonTargetObjectFile &)QTM.getSubtargetImpl()
- ->getTargetLowering()
- ->getObjFileLowering();
+ *static_cast<const HexagonTargetObjectFile *>(
+ Fn.getTarget().getObjFileLowering());
if (TLOF.IsSmallDataEnabled())
return true;
- const TargetInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
// Loop over all of the basic blocks
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
@@ -86,7 +83,8 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
while (MII != MIE) {
MachineInstr *MI = MII;
int Opc = MI->getOpcode();
- if (Opc == Hexagon::CONST32_set) {
+ if (Opc == Hexagon::CONST32_Int_Real &&
+ MI->getOperand(1).isBlockAddress()) {
int DestReg = MI->getOperand(0).getReg();
MachineOperand &Symbol = MI->getOperand (1);
@@ -99,69 +97,53 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
MII = MBB->erase (MI);
continue;
}
- else if (Opc == Hexagon::CONST32_set_jt) {
- int DestReg = MI->getOperand(0).getReg();
- MachineOperand &Symbol = MI->getOperand (1);
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LO_jt), DestReg).addOperand(Symbol);
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HI_jt), DestReg).addOperand(Symbol);
- // MBB->erase returns the iterator to the next instruction, which is the
- // one we want to process next
- MII = MBB->erase (MI);
- continue;
- }
- else if (Opc == Hexagon::CONST32_Label) {
+ else if (Opc == Hexagon::CONST32_Int_Real ||
+ Opc == Hexagon::CONST32_Float_Real) {
int DestReg = MI->getOperand(0).getReg();
- MachineOperand &Symbol = MI->getOperand (1);
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LO_label), DestReg).addOperand(Symbol);
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HI_label), DestReg).addOperand(Symbol);
- // MBB->erase returns the iterator to the next instruction, which is the
- // one we want to process next
+ // We have to convert an FP immediate into its corresponding integer
+ // representation
+ int64_t ImmValue;
+ if (Opc == Hexagon::CONST32_Float_Real) {
+ APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF();
+ ImmValue = *Val.bitcastToAPInt().getRawData();
+ }
+ else
+ ImmValue = MI->getOperand(1).getImm();
+
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::A2_tfrsi), DestReg).addImm(ImmValue);
MII = MBB->erase (MI);
continue;
}
- else if (Opc == Hexagon::CONST32_Int_Real) {
+ else if (Opc == Hexagon::CONST64_Int_Real ||
+ Opc == Hexagon::CONST64_Float_Real) {
int DestReg = MI->getOperand(0).getReg();
- int64_t ImmValue = MI->getOperand(1).getImm ();
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LOi), DestReg).addImm(ImmValue);
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HIi), DestReg).addImm(ImmValue);
- MII = MBB->erase (MI);
- continue;
- }
- else if (Opc == Hexagon::CONST64_Int_Real) {
- int DestReg = MI->getOperand(0).getReg();
- int64_t ImmValue = MI->getOperand(1).getImm ();
- unsigned DestLo = QTM.getSubtargetImpl()->getRegisterInfo()->getSubReg(
- DestReg, Hexagon::subreg_loreg);
- unsigned DestHi = QTM.getSubtargetImpl()->getRegisterInfo()->getSubReg(
- DestReg, Hexagon::subreg_hireg);
+ // We have to convert an FP immediate into its corresponding integer
+ // representation
+ int64_t ImmValue;
+ if (Opc == Hexagon::CONST64_Float_Real) {
+ APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF();
+ ImmValue = *Val.bitcastToAPInt().getRawData();
+ }
+ else
+ ImmValue = MI->getOperand(1).getImm();
+
+ unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::subreg_loreg);
+ unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::subreg_hireg);
int32_t LowWord = (ImmValue & 0xFFFFFFFF);
int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF;
- // Lower Registers Lower Half
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LOi), DestLo).addImm(LowWord);
- // Lower Registers Higher Half
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::A2_tfrsi), DestLo).addImm(LowWord);
BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HIi), DestLo).addImm(LowWord);
- // Higher Registers Lower Half
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LOi), DestHi).addImm(HighWord);
- // Higher Registers Higher Half.
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HIi), DestHi).addImm(HighWord);
+ TII->get(Hexagon::A2_tfrsi), DestHi).addImm(HighWord);
MII = MBB->erase (MI);
continue;
- }
+ }
++MII;
}
}
@@ -176,6 +158,6 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
//===----------------------------------------------------------------------===//
FunctionPass *
-llvm::createHexagonSplitConst32AndConst64(const HexagonTargetMachine &TM) {
- return new HexagonSplitConst32AndConst64(TM);
+llvm::createHexagonSplitConst32AndConst64() {
+ return new HexagonSplitConst32AndConst64();
}
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
deleted file mode 100644
index a304e655f0b1..000000000000
--- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
+++ /dev/null
@@ -1,235 +0,0 @@
-//===-- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//
-//===----------------------------------------------------------------------===//
-// This pass tries to provide opportunities for better optimization of muxes.
-// The default code generated for something like: flag = (a == b) ? 1 : 3;
-// would be:
-//
-// {p0 = cmp.eq(r0,r1)}
-// {r3 = mux(p0,#1,#3)}
-//
-// This requires two packets. If we use .new predicated immediate transfers,
-// then we can do this in a single packet, e.g.:
-//
-// {p0 = cmp.eq(r0,r1)
-// if (p0.new) r3 = #1
-// if (!p0.new) r3 = #3}
-//
-// Note that the conditional assignments are not generated in .new form here.
-// We assume opptimisically that they will be formed later.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Hexagon.h"
-#include "HexagonMachineFunctionInfo.h"
-#include "HexagonSubtarget.h"
-#include "HexagonTargetMachine.h"
-#include "llvm/CodeGen/LatencyPriorityQueue.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "xfer"
-
-namespace llvm {
- void initializeHexagonSplitTFRCondSetsPass(PassRegistry&);
-}
-
-
-namespace {
-
-class HexagonSplitTFRCondSets : public MachineFunctionPass {
- const HexagonTargetMachine &QTM;
- const HexagonSubtarget &QST;
-
- public:
- static char ID;
- HexagonSplitTFRCondSets(const HexagonTargetMachine& TM) :
- MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
- initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry());
- }
-
- const char *getPassName() const override {
- return "Hexagon Split TFRCondSets";
- }
- bool runOnMachineFunction(MachineFunction &Fn) override;
-};
-
-
-char HexagonSplitTFRCondSets::ID = 0;
-
-
-bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
-
- const TargetInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo();
-
- // Loop over all of the basic blocks.
- for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
- MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
- // Traverse the basic block.
- for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
- ++MII) {
- MachineInstr *MI = MII;
- int Opc1, Opc2;
- switch(MI->getOpcode()) {
- case Hexagon::TFR_condset_rr_f:
- case Hexagon::TFR_condset_rr64_f: {
- int DestReg = MI->getOperand(0).getReg();
- int SrcReg1 = MI->getOperand(2).getReg();
- int SrcReg2 = MI->getOperand(3).getReg();
-
- if (MI->getOpcode() == Hexagon::TFR_condset_rr_f) {
- Opc1 = Hexagon::A2_tfrt;
- Opc2 = Hexagon::A2_tfrf;
- }
- else if (MI->getOpcode() == Hexagon::TFR_condset_rr64_f) {
- Opc1 = Hexagon::A2_tfrpt;
- Opc2 = Hexagon::A2_tfrpf;
- }
-
- // Minor optimization: do not emit the predicated copy if the source
- // and the destination is the same register.
- if (DestReg != SrcReg1) {
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc1),
- DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
- }
- if (DestReg != SrcReg2) {
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc2),
- DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
- }
- MII = MBB->erase(MI);
- --MII;
- break;
- }
- case Hexagon::TFR_condset_ri:
- case Hexagon::TFR_condset_ri_f: {
- int DestReg = MI->getOperand(0).getReg();
- int SrcReg1 = MI->getOperand(2).getReg();
-
- // Do not emit the predicated copy if the source and the destination
- // is the same register.
- if (DestReg != SrcReg1) {
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::A2_tfrt), DestReg).
- addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
- }
- if (MI->getOpcode() == Hexagon::TFR_condset_ri ) {
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::C2_cmoveif), DestReg).
- addReg(MI->getOperand(1).getReg()).
- addImm(MI->getOperand(3).getImm());
- } else if (MI->getOpcode() == Hexagon::TFR_condset_ri_f ) {
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::TFRI_cNotPt_f), DestReg).
- addReg(MI->getOperand(1).getReg()).
- addFPImm(MI->getOperand(3).getFPImm());
- }
-
- MII = MBB->erase(MI);
- --MII;
- break;
- }
- case Hexagon::TFR_condset_ir:
- case Hexagon::TFR_condset_ir_f: {
- int DestReg = MI->getOperand(0).getReg();
- int SrcReg2 = MI->getOperand(3).getReg();
-
- if (MI->getOpcode() == Hexagon::TFR_condset_ir ) {
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::C2_cmoveit), DestReg).
- addReg(MI->getOperand(1).getReg()).
- addImm(MI->getOperand(2).getImm());
- } else if (MI->getOpcode() == Hexagon::TFR_condset_ir_f ) {
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::TFRI_cPt_f), DestReg).
- addReg(MI->getOperand(1).getReg()).
- addFPImm(MI->getOperand(2).getFPImm());
- }
-
- // Do not emit the predicated copy if the source and
- // the destination is the same register.
- if (DestReg != SrcReg2) {
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::A2_tfrf), DestReg).
- addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
- }
- MII = MBB->erase(MI);
- --MII;
- break;
- }
- case Hexagon::TFR_condset_ii:
- case Hexagon::TFR_condset_ii_f: {
- int DestReg = MI->getOperand(0).getReg();
- int SrcReg1 = MI->getOperand(1).getReg();
-
- if (MI->getOpcode() == Hexagon::TFR_condset_ii ) {
- int Immed1 = MI->getOperand(2).getImm();
- int Immed2 = MI->getOperand(3).getImm();
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::C2_cmoveit),
- DestReg).addReg(SrcReg1).addImm(Immed1);
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::C2_cmoveif),
- DestReg).addReg(SrcReg1).addImm(Immed2);
- } else if (MI->getOpcode() == Hexagon::TFR_condset_ii_f ) {
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::TFRI_cPt_f), DestReg).
- addReg(SrcReg1).
- addFPImm(MI->getOperand(2).getFPImm());
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::TFRI_cNotPt_f), DestReg).
- addReg(SrcReg1).
- addFPImm(MI->getOperand(3).getFPImm());
- }
- MII = MBB->erase(MI);
- --MII;
- break;
- }
- }
- }
- }
- return true;
-}
-
-}
-
-//===----------------------------------------------------------------------===//
-// Public Constructor Functions
-//===----------------------------------------------------------------------===//
-
-static void initializePassOnce(PassRegistry &Registry) {
- const char *Name = "Hexagon Split TFRCondSets";
- PassInfo *PI = new PassInfo(Name, "hexagon-split-tfr",
- &HexagonSplitTFRCondSets::ID, nullptr, false,
- false);
- Registry.registerPass(*PI, true);
-}
-
-void llvm::initializeHexagonSplitTFRCondSetsPass(PassRegistry &Registry) {
- CALL_ONCE_INITIALIZATION(initializePassOnce)
-}
-
-FunctionPass*
-llvm::createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM) {
- return new HexagonSplitTFRCondSets(TM);
-}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 657893f32fee..d61cc5418a4a 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -48,18 +48,17 @@ EnableIEEERndNear(
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Generate non-chopped conversion from fp to int."));
+static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon MI Scheduling"));
+
HexagonSubtarget &
HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
// If the programmer has not specified a Hexagon version, default to -mv4.
if (CPUString.empty())
CPUString = "hexagonv4";
- if (CPUString == "hexagonv2") {
- HexagonArchVersion = V2;
- } else if (CPUString == "hexagonv3") {
- EnableV3 = true;
- HexagonArchVersion = V3;
- } else if (CPUString == "hexagonv4") {
+ if (CPUString == "hexagonv4") {
HexagonArchVersion = V4;
} else if (CPUString == "hexagonv5") {
HexagonArchVersion = V5;
@@ -73,10 +72,9 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
const TargetMachine &TM)
- : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU.str()),
- DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32"),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
- TSInfo(DL), FrameLowering() {
+ : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+ TSInfo(*TM.getDataLayout()), FrameLowering() {
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
@@ -97,3 +95,9 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
// Pin the vtable to this file.
void HexagonSubtarget::anchor() {}
+
+bool HexagonSubtarget::enableMachineScheduler() const {
+ if (DisableHexagonMISched.getNumOccurrences())
+ return !DisableHexagonMISched;
+ return true;
+}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 34e327f7c3a1..780567bcd36b 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -39,13 +39,12 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
public:
enum HexagonArchEnum {
- V1, V2, V3, V4, V5
+ V4, V5
};
HexagonArchEnum HexagonArchVersion;
private:
std::string CPUString;
- const DataLayout DL; // Calculates type size & alignment.
HexagonInstrInfo InstrInfo;
HexagonTargetLowering TLInfo;
HexagonSelectionDAGInfo TSInfo;
@@ -74,7 +73,6 @@ public:
const HexagonSelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
- const DataLayout *getDataLayout() const override { return &DL; }
HexagonSubtarget &initializeSubtargetDependencies(StringRef CPU,
StringRef FS);
@@ -83,18 +81,16 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- bool hasV2TOps () const { return HexagonArchVersion >= V2; }
- bool hasV2TOpsOnly () const { return HexagonArchVersion == V2; }
- bool hasV3TOps () const { return HexagonArchVersion >= V3; }
- bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; }
- bool hasV4TOps () const { return HexagonArchVersion >= V4; }
- bool hasV4TOpsOnly () const { return HexagonArchVersion == V4; }
- bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; }
- bool hasV5TOps () const { return HexagonArchVersion >= V5; }
- bool hasV5TOpsOnly () const { return HexagonArchVersion == V5; }
- bool modeIEEERndNear () const { return ModeIEEERndNear; }
-
- bool isSubtargetV2() const { return HexagonArchVersion == V2;}
+ bool useMemOps() const { return UseMemOps; }
+ bool hasV5TOps() const { return getHexagonArchVersion() >= V5; }
+ bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; }
+ bool modeIEEERndNear() const { return ModeIEEERndNear; }
+ bool enableMachineScheduler() const override;
+ // Always use the TargetLowering default scheduler.
+ // FIXME: This will use the vliw scheduler which is probably just hurting
+ // compiler time and will be removed eventually anyway.
+ bool enableMachineSchedDefaultSched() const override { return false; }
+
const std::string &getCPUString () const { return CPUString; }
// Threshold for small data section
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 52aff2787bac..06798665cb05 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -17,8 +17,8 @@
#include "HexagonMachineScheduler.h"
#include "HexagonTargetObjectFile.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
-#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
@@ -27,15 +27,15 @@
using namespace llvm;
static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
- cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
-
-static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Hexagon MI Scheduling"));
+ cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Hexagon CFG Optimization"));
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon CFG Optimization"));
+
+static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
+ cl::init(true), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Early expansion of MUX"));
/// HexagonTargetMachineModule - Note that this is used on hosts that
@@ -59,6 +59,10 @@ static MachineSchedRegistry
SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
createVLIWMachineSched);
+namespace llvm {
+ FunctionPass *createHexagonExpandCondsets();
+}
+
/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
///
@@ -69,7 +73,8 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, "e-m:e-p:32:32-i1:32-i64:64-a:0-n32", TT, CPU, FS,
+ Options, RM, CM, OL),
TLOF(make_unique<HexagonTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
@@ -83,14 +88,13 @@ class HexagonPassConfig : public TargetPassConfig {
public:
HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {
- // FIXME: Rather than calling enablePass(&MachineSchedulerID) below, define
- // HexagonSubtarget::enableMachineScheduler() { return true; }.
- // That will bypass the SelectionDAG VLIW scheduler, which is probably just
- // hurting compile time and will be removed eventually anyway.
- if (DisableHexagonMISched)
- disablePass(&MachineSchedulerID);
- else
- enablePass(&MachineSchedulerID);
+ bool NoOpt = (TM->getOptLevel() == CodeGenOpt::None);
+ if (!NoOpt) {
+ if (EnableExpandCondsets) {
+ Pass *Exp = createHexagonExpandCondsets();
+ insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp));
+ }
+ }
}
HexagonTargetMachine &getHexagonTargetMachine() const {
@@ -138,33 +142,26 @@ void HexagonPassConfig::addPreRegAlloc() {
}
void HexagonPassConfig::addPostRegAlloc() {
- const HexagonTargetMachine &TM = getHexagonTargetMachine();
if (getOptLevel() != CodeGenOpt::None)
if (!DisableHexagonCFGOpt)
- addPass(createHexagonCFGOptimizer(TM), false);
+ addPass(createHexagonCFGOptimizer(), false);
}
void HexagonPassConfig::addPreSched2() {
- const HexagonTargetMachine &TM = getHexagonTargetMachine();
-
addPass(createHexagonCopyToCombine(), false);
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID, false);
- addPass(createHexagonSplitConst32AndConst64(TM));
+ addPass(createHexagonSplitConst32AndConst64());
}
void HexagonPassConfig::addPreEmitPass() {
- const HexagonTargetMachine &TM = getHexagonTargetMachine();
bool NoOpt = (getOptLevel() == CodeGenOpt::None);
if (!NoOpt)
addPass(createHexagonNewValueJump(), false);
// Expand Spill code for predicate registers.
- addPass(createHexagonExpandPredSpillCode(TM), false);
-
- // Split up TFRcondsets into conditional transfers.
- addPass(createHexagonSplitTFRCondSets(TM), false);
+ addPass(createHexagonExpandPredSpillCode(), false);
// Create Packets.
if (!NoOpt) {
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index 4a9f44732a6b..5774f7e195b0 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -32,8 +32,7 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
~HexagonTargetMachine() override;
-
- const HexagonSubtarget *getSubtargetImpl() const override {
+ const HexagonSubtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
static unsigned getModuleMatchQuality(const Module &M);
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index f4ab5e2b5c42..4ea0e0d11998 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -33,14 +33,10 @@ void HexagonTargetObjectFile::Initialize(MCContext &Ctx,
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(TM.Options.UseInitArray);
- SmallDataSection =
- getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
- ELF::SHF_WRITE | ELF::SHF_ALLOC,
- SectionKind::getDataRel());
- SmallBSSSection =
- getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
- ELF::SHF_WRITE | ELF::SHF_ALLOC,
- SectionKind::getBSS());
+ SmallDataSection = getContext().getELFSection(
+ ".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC);
}
// sdata/sbss support taken largely from the MIPS Backend.
@@ -79,14 +75,13 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) {
Type *Ty = GV->getType()->getElementType();
- return IsInSmallSection(
- TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty));
+ return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
}
return false;
}
-const MCSection *
+MCSection *
HexagonTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
SectionKind Kind, Mangler &Mang,
const TargetMachine &TM) const {
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h
index c97420427240..da0eeeb3fd28 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.h
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -16,8 +16,9 @@
namespace llvm {
class HexagonTargetObjectFile : public TargetLoweringObjectFileELF {
- const MCSectionELF *SmallDataSection;
- const MCSectionELF *SmallBSSSection;
+ MCSectionELF *SmallDataSection;
+ MCSectionELF *SmallBSSSection;
+
public:
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
@@ -30,9 +31,9 @@ namespace llvm {
const TargetMachine &TM) const;
bool IsSmallDataEnabled () const;
- const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
- SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM) const override;
+ MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override;
};
} // namespace llvm
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index c9605278e045..0cc59bcc7671 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -264,8 +264,7 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
static bool IsIndirectCall(MachineInstr* MI) {
- return ((MI->getOpcode() == Hexagon::J2_callr) ||
- (MI->getOpcode() == Hexagon::CALLRv3));
+ return MI->getOpcode() == Hexagon::J2_callr;
}
// Reserve resources for constant extender. Trigure an assertion if
@@ -371,7 +370,7 @@ static bool IsDirectJump(MachineInstr* MI) {
static bool IsSchedBarrier(MachineInstr* MI) {
switch (MI->getOpcode()) {
- case Hexagon::BARRIER:
+ case Hexagon::Y2_barrier:
return true;
}
return false;
@@ -390,7 +389,9 @@ static bool IsLoopN(MachineInstr *MI) {
/// callee-saved register.
static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
const TargetRegisterInfo *TRI) {
- for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) {
+ for (const MCPhysReg *CSR =
+ TRI->getCalleeSavedRegs(MI->getParent()->getParent());
+ *CSR; ++CSR) {
unsigned CalleeSavedReg = *CSR;
if (MI->modifiesRegister(CalleeSavedReg, TRI))
return true;
@@ -402,10 +403,7 @@ static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
// or new-value store.
bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- if ( isCondInst(MI) || QII->mayBeNewStore(MI))
- return true;
- else
- return false;
+ return isCondInst(MI) || QII->mayBeNewStore(MI);
}
bool HexagonPacketizerList::isCondInst (MachineInstr* MI) {
@@ -721,10 +719,7 @@ bool HexagonPacketizerList::CanPromoteToNewValue(
MachineBasicBlock::iterator &MII) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- const HexagonRegisterInfo *QRI =
- (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
- if (!QRI->Subtarget.hasV4TOps() ||
- !QII->mayBeNewStore(MI))
+ if (!QII->mayBeNewStore(MI))
return false;
MachineInstr *PacketMI = PacketSU->getInstr();
@@ -955,6 +950,9 @@ bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
if (MI->isDebugValue())
return true;
+ if (MI->isCFIInstruction())
+ return false;
+
// We must print out inline assembly
if (MI->isInlineAsm())
return false;
@@ -972,11 +970,10 @@ bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
// isSoloInstruction: - Returns true for instructions that must be
// scheduled in their own packet.
bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) {
-
- if (MI->isInlineAsm())
+ if (MI->isEHLabel() || MI->isCFIInstruction())
return true;
- if (MI->isEHLabel())
+ if (MI->isInlineAsm())
return true;
// From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
@@ -1055,84 +1052,82 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
// first store is not in SLOT0. New value store, new value jump,
// dealloc_return and memop always take SLOT0.
// Arch spec 3.4.4.2
- if (QRI->Subtarget.hasV4TOps()) {
- if (MCIDI.mayStore() && MCIDJ.mayStore() &&
- (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) {
- Dependence = true;
- return false;
- }
+ if (MCIDI.mayStore() && MCIDJ.mayStore() &&
+ (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) {
+ Dependence = true;
+ return false;
+ }
- if ((QII->isMemOp(J) && MCIDI.mayStore())
- || (MCIDJ.mayStore() && QII->isMemOp(I))
- || (QII->isMemOp(J) && QII->isMemOp(I))) {
- Dependence = true;
- return false;
- }
+ if ((QII->isMemOp(J) && MCIDI.mayStore())
+ || (MCIDJ.mayStore() && QII->isMemOp(I))
+ || (QII->isMemOp(J) && QII->isMemOp(I))) {
+ Dependence = true;
+ return false;
+ }
- //if dealloc_return
- if (MCIDJ.mayStore() && QII->isDeallocRet(I)) {
- Dependence = true;
- return false;
- }
+ //if dealloc_return
+ if (MCIDJ.mayStore() && QII->isDeallocRet(I)) {
+ Dependence = true;
+ return false;
+ }
- // If an instruction feeds new value jump, glue it.
- MachineBasicBlock::iterator NextMII = I;
- ++NextMII;
- if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) {
- MachineInstr *NextMI = NextMII;
+ // If an instruction feeds new value jump, glue it.
+ MachineBasicBlock::iterator NextMII = I;
+ ++NextMII;
+ if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) {
+ MachineInstr *NextMI = NextMII;
- bool secondRegMatch = false;
- bool maintainNewValueJump = false;
+ bool secondRegMatch = false;
+ bool maintainNewValueJump = false;
- if (NextMI->getOperand(1).isReg() &&
- I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) {
- secondRegMatch = true;
- maintainNewValueJump = true;
- }
+ if (NextMI->getOperand(1).isReg() &&
+ I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) {
+ secondRegMatch = true;
+ maintainNewValueJump = true;
+ }
- if (!secondRegMatch &&
- I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) {
- maintainNewValueJump = true;
- }
+ if (!secondRegMatch &&
+ I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) {
+ maintainNewValueJump = true;
+ }
- for (std::vector<MachineInstr*>::iterator
- VI = CurrentPacketMIs.begin(),
- VE = CurrentPacketMIs.end();
- (VI != VE && maintainNewValueJump); ++VI) {
- SUnit *PacketSU = MIToSUnit.find(*VI)->second;
+ for (std::vector<MachineInstr*>::iterator
+ VI = CurrentPacketMIs.begin(),
+ VE = CurrentPacketMIs.end();
+ (VI != VE && maintainNewValueJump); ++VI) {
+ SUnit *PacketSU = MIToSUnit.find(*VI)->second;
- // NVJ can not be part of the dual jump - Arch Spec: section 7.8
- if (PacketSU->getInstr()->getDesc().isCall()) {
- Dependence = true;
- break;
- }
- // Validate
- // 1. Packet does not have a store in it.
- // 2. If the first operand of the nvj is newified, and the second
- // operand is also a reg, it (second reg) is not defined in
- // the same packet.
- // 3. If the second operand of the nvj is newified, (which means
- // first operand is also a reg), first reg is not defined in
- // the same packet.
- if (PacketSU->getInstr()->getDesc().mayStore() ||
- PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe ||
- // Check #2.
- (!secondRegMatch && NextMI->getOperand(1).isReg() &&
- PacketSU->getInstr()->modifiesRegister(
- NextMI->getOperand(1).getReg(), QRI)) ||
- // Check #3.
- (secondRegMatch &&
- PacketSU->getInstr()->modifiesRegister(
- NextMI->getOperand(0).getReg(), QRI))) {
- Dependence = true;
- break;
- }
+ // NVJ can not be part of the dual jump - Arch Spec: section 7.8
+ if (PacketSU->getInstr()->getDesc().isCall()) {
+ Dependence = true;
+ break;
+ }
+ // Validate
+ // 1. Packet does not have a store in it.
+ // 2. If the first operand of the nvj is newified, and the second
+ // operand is also a reg, it (second reg) is not defined in
+ // the same packet.
+ // 3. If the second operand of the nvj is newified, (which means
+ // first operand is also a reg), first reg is not defined in
+ // the same packet.
+ if (PacketSU->getInstr()->getDesc().mayStore() ||
+ PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe ||
+ // Check #2.
+ (!secondRegMatch && NextMI->getOperand(1).isReg() &&
+ PacketSU->getInstr()->modifiesRegister(
+ NextMI->getOperand(1).getReg(), QRI)) ||
+ // Check #3.
+ (secondRegMatch &&
+ PacketSU->getInstr()->modifiesRegister(
+ NextMI->getOperand(0).getReg(), QRI))) {
+ Dependence = true;
+ break;
}
- if (!Dependence)
- GlueToNewValueJump = true;
- else
- return false;
}
+ if (!Dependence)
+ GlueToNewValueJump = true;
+ else
+ return false;
}
if (SUJ->isSucc(SUI)) {
@@ -1254,9 +1249,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
else if ((DepType == SDep::Order) &&
!I->hasOrderedMemoryRef() &&
!J->hasOrderedMemoryRef()) {
- if (QRI->Subtarget.hasV4TOps() &&
- // hexagonv4 allows dual store.
- MCIDI.mayStore() && MCIDJ.mayStore()) {
+ if (MCIDI.mayStore() && MCIDJ.mayStore()) {
/* do nothing */
}
// store followed by store-- not OK on V2
@@ -1278,7 +1271,6 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
// packetized in a same packet. This implies that the store is using
// caller's SP. Hence, offset needs to be updated accordingly.
else if (DepType == SDep::Data
- && QRI->Subtarget.hasV4TOps()
&& J->getOpcode() == Hexagon::S2_allocframe
&& (I->getOpcode() == Hexagon::S2_storerd_io
|| I->getOpcode() == Hexagon::S2_storeri_io
diff --git a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
deleted file mode 100644
index edbe29a5344a..000000000000
--- a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
+++ /dev/null
@@ -1,149 +0,0 @@
-//===-- HexagonVarargsCallingConvention.h - Calling Conventions -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the functions that assign locations to outgoing function
-// arguments. Adapted from the target independent version but this handles
-// calls to varargs functions
-//
-//===----------------------------------------------------------------------===//
-//
-
-
-
-
-static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
- EVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags,
- Hexagon_CCState &State,
- int NonVarArgsParams,
- int CurrentParam,
- bool ForceMem);
-
-
-static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
- EVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags,
- Hexagon_CCState &State,
- int NonVarArgsParams,
- int CurrentParam,
- bool ForceMem) {
- unsigned ByValSize = 0;
- if (ArgFlags.isByVal() &&
- ((ByValSize = ArgFlags.getByValSize()) >
- (MVT(MVT::i64).getSizeInBits() / 8))) {
- ForceMem = true;
- }
-
-
- // Only assign registers for named (non-varargs) arguments
- if ( !ForceMem && ((NonVarArgsParams == -1) || (CurrentParam <=
- NonVarArgsParams))) {
-
- if (LocVT == MVT::i32 ||
- LocVT == MVT::i16 ||
- LocVT == MVT::i8 ||
- LocVT == MVT::f32) {
- static const unsigned RegList1[] = {
- Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
- Hexagon::R5
- };
- if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
- LocVT.getSimpleVT(), LocInfo));
- return false;
- }
- }
-
- if (LocVT == MVT::i64 ||
- LocVT == MVT::f64) {
- static const unsigned RegList2[] = {
- Hexagon::D0, Hexagon::D1, Hexagon::D2
- };
- if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
- LocVT.getSimpleVT(), LocInfo));
- return false;
- }
- }
- }
-
- const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
- unsigned Alignment = State.getTarget()
- .getSubtargetImpl()
- ->getDataLayout()
- ->getABITypeAlignment(ArgTy);
- unsigned Size =
- State.getTarget().getSubtargetImpl()->getDataLayout()->getTypeSizeInBits(
- ArgTy) /
- 8;
-
- // If it's passed by value, then we need the size of the aggregate not of
- // the pointer.
- if (ArgFlags.isByVal()) {
- Size = ByValSize;
-
- // Hexagon_TODO: Get the alignment of the contained type here.
- Alignment = 8;
- }
-
- unsigned Offset3 = State.AllocateStack(Size, Alignment);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
- LocVT.getSimpleVT(), LocInfo));
- return false;
-}
-
-
-static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
- EVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags,
- Hexagon_CCState &State,
- int NonVarArgsParams,
- int CurrentParam,
- bool ForceMem) {
-
- if (LocVT == MVT::i32 ||
- LocVT == MVT::f32) {
- static const unsigned RegList1[] = {
- Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
- Hexagon::R5
- };
- if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
- LocVT.getSimpleVT(), LocInfo));
- return false;
- }
- }
-
- if (LocVT == MVT::i64 ||
- LocVT == MVT::f64) {
- static const unsigned RegList2[] = {
- Hexagon::D0, Hexagon::D1, Hexagon::D2
- };
- if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
- LocVT.getSimpleVT(), LocInfo));
- return false;
- }
- }
-
- const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
- unsigned Alignment = State.getTarget()
- .getSubtargetImpl()
- ->getDataLayout()
- ->getABITypeAlignment(ArgTy);
- unsigned Size =
- State.getTarget().getSubtargetImpl()->getDataLayout()->getTypeSizeInBits(
- ArgTy) /
- 8;
-
- unsigned Offset3 = State.AllocateStack(Size, Alignment);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
- LocVT.getSimpleVT(), LocInfo));
- return false;
-}
diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
index 2a6124ee0c5a..4c987ed32a64 100644
--- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
@@ -4,7 +4,7 @@ add_llvm_library(LLVMHexagonDesc
HexagonInstPrinter.cpp
HexagonMCAsmInfo.cpp
HexagonMCCodeEmitter.cpp
- HexagonMCInst.cpp
+ HexagonMCInstrInfo.cpp
HexagonMCTargetDesc.cpp
)
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index bdccf880d65f..155aa9ef9557 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -57,7 +57,7 @@ public:
ELFHexagonAsmBackend(Target const &T, uint8_t OSABI)
: HexagonAsmBackend(T), OSABI(OSABI) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
StringRef CPU("HexagonV4");
return createHexagonELFObjectWriter(OS, OSABI, CPU);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 8e02f799d7e4..6a72f205e9d3 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -70,7 +70,7 @@ namespace HexagonII {
PostInc = 6 // Post increment addressing mode
};
- enum MemAccessSize {
+ enum class MemAccessSize {
NoMemAccess = 0, // Not a memory acces instruction.
ByteAccess = 1, // Byte access instruction (memb).
HalfWordAccess = 2, // Half word access instruction (memh).
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index 56c9dc712a6e..fde935b2758b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -11,6 +11,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "hexagon-elf-writer"
@@ -26,8 +27,8 @@ private:
public:
HexagonELFObjectWriter(uint8_t OSABI, StringRef C);
- virtual unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
- bool IsPCRel) const override;
+ unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
+ bool IsPCRel) const override;
};
}
@@ -54,9 +55,9 @@ unsigned HexagonELFObjectWriter::GetRelocType(MCValue const &/*Target*/,
return Type;
}
-MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS,
uint8_t OSABI,
StringRef CPU) {
MCELFObjectTargetWriter *MOTW = new HexagonELFObjectWriter(OSABI, CPU);
return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true);
-} \ No newline at end of file
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
new file mode 100644
index 000000000000..4bbfbec883c4
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
@@ -0,0 +1,137 @@
+//===-- HexagonFixupKinds.h - Hexagon Specific Fixup Entries --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_HEXAGON_HEXAGONFIXUPKINDS_H
+#define LLVM_HEXAGON_HEXAGONFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace Hexagon {
+enum Fixups {
+ // Branch fixups for R_HEX_B{22,15,7}_PCREL.
+ fixup_Hexagon_B22_PCREL = FirstTargetFixupKind,
+ fixup_Hexagon_B15_PCREL,
+ fixup_Hexagon_B7_PCREL,
+ fixup_Hexagon_LO16,
+ fixup_Hexagon_HI16,
+ fixup_Hexagon_32,
+ fixup_Hexagon_16,
+ fixup_Hexagon_8,
+ fixup_Hexagon_GPREL16_0,
+ fixup_Hexagon_GPREL16_1,
+ fixup_Hexagon_GPREL16_2,
+ fixup_Hexagon_GPREL16_3,
+ fixup_Hexagon_HL16,
+ fixup_Hexagon_B13_PCREL,
+ fixup_Hexagon_B9_PCREL,
+ fixup_Hexagon_B32_PCREL_X,
+ fixup_Hexagon_32_6_X,
+ fixup_Hexagon_B22_PCREL_X,
+ fixup_Hexagon_B15_PCREL_X,
+ fixup_Hexagon_B13_PCREL_X,
+ fixup_Hexagon_B9_PCREL_X,
+ fixup_Hexagon_B7_PCREL_X,
+ fixup_Hexagon_16_X,
+ fixup_Hexagon_12_X,
+ fixup_Hexagon_11_X,
+ fixup_Hexagon_10_X,
+ fixup_Hexagon_9_X,
+ fixup_Hexagon_8_X,
+ fixup_Hexagon_7_X,
+ fixup_Hexagon_6_X,
+ fixup_Hexagon_32_PCREL,
+ fixup_Hexagon_COPY,
+ fixup_Hexagon_GLOB_DAT,
+ fixup_Hexagon_JMP_SLOT,
+ fixup_Hexagon_RELATIVE,
+ fixup_Hexagon_PLT_B22_PCREL,
+ fixup_Hexagon_GOTREL_LO16,
+ fixup_Hexagon_GOTREL_HI16,
+ fixup_Hexagon_GOTREL_32,
+ fixup_Hexagon_GOT_LO16,
+ fixup_Hexagon_GOT_HI16,
+ fixup_Hexagon_GOT_32,
+ fixup_Hexagon_GOT_16,
+ fixup_Hexagon_DTPMOD_32,
+ fixup_Hexagon_DTPREL_LO16,
+ fixup_Hexagon_DTPREL_HI16,
+ fixup_Hexagon_DTPREL_32,
+ fixup_Hexagon_DTPREL_16,
+ fixup_Hexagon_GD_PLT_B22_PCREL,
+ fixup_Hexagon_LD_PLT_B22_PCREL,
+ fixup_Hexagon_GD_GOT_LO16,
+ fixup_Hexagon_GD_GOT_HI16,
+ fixup_Hexagon_GD_GOT_32,
+ fixup_Hexagon_GD_GOT_16,
+ fixup_Hexagon_LD_GOT_LO16,
+ fixup_Hexagon_LD_GOT_HI16,
+ fixup_Hexagon_LD_GOT_32,
+ fixup_Hexagon_LD_GOT_16,
+ fixup_Hexagon_IE_LO16,
+ fixup_Hexagon_IE_HI16,
+ fixup_Hexagon_IE_32,
+ fixup_Hexagon_IE_16,
+ fixup_Hexagon_IE_GOT_LO16,
+ fixup_Hexagon_IE_GOT_HI16,
+ fixup_Hexagon_IE_GOT_32,
+ fixup_Hexagon_IE_GOT_16,
+ fixup_Hexagon_TPREL_LO16,
+ fixup_Hexagon_TPREL_HI16,
+ fixup_Hexagon_TPREL_32,
+ fixup_Hexagon_TPREL_16,
+ fixup_Hexagon_6_PCREL_X,
+ fixup_Hexagon_GOTREL_32_6_X,
+ fixup_Hexagon_GOTREL_16_X,
+ fixup_Hexagon_GOTREL_11_X,
+ fixup_Hexagon_GOT_32_6_X,
+ fixup_Hexagon_GOT_16_X,
+ fixup_Hexagon_GOT_11_X,
+ fixup_Hexagon_DTPREL_32_6_X,
+ fixup_Hexagon_DTPREL_16_X,
+ fixup_Hexagon_DTPREL_11_X,
+ fixup_Hexagon_GD_GOT_32_6_X,
+ fixup_Hexagon_GD_GOT_16_X,
+ fixup_Hexagon_GD_GOT_11_X,
+ fixup_Hexagon_LD_GOT_32_6_X,
+ fixup_Hexagon_LD_GOT_16_X,
+ fixup_Hexagon_LD_GOT_11_X,
+ fixup_Hexagon_IE_32_6_X,
+ fixup_Hexagon_IE_16_X,
+ fixup_Hexagon_IE_GOT_32_6_X,
+ fixup_Hexagon_IE_GOT_16_X,
+ fixup_Hexagon_IE_GOT_11_X,
+ fixup_Hexagon_TPREL_32_6_X,
+ fixup_Hexagon_TPREL_16_X,
+ fixup_Hexagon_TPREL_11_X,
+
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+enum FixupBitmaps : unsigned {
+ Word8 = 0xff,
+ Word16 = 0xffff,
+ Word32 = 0xffffffff,
+ Word32_LO = 0x00c03fff,
+ Word32_HL = 0x0, // Not Implemented
+ Word32_GP = 0x0, // Not Implemented
+ Word32_B7 = 0x00001f18,
+ Word32_B9 = 0x003000fe,
+ Word32_B13 = 0x00202ffe,
+ Word32_B15 = 0x00df20fe,
+ Word32_B22 = 0x01ff3ffe,
+ Word32_R6 = 0x000007e0,
+ Word32_U6 = 0x0, // Not Implemented
+ Word32_U16 = 0x0, // Not Implemented
+ Word32_X26 = 0x0fff3fff
+};
+} // namespace Hexagon
+} // namespace llvm
+
+#endif // LLVM_HEXAGON_HEXAGONFIXUPKINDS_H
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 5c6f0363f78a..15cda717cf1c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -14,7 +14,7 @@
#include "HexagonAsmPrinter.h"
#include "Hexagon.h"
#include "HexagonInstPrinter.h"
-#include "MCTargetDesc/HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -73,50 +73,46 @@ StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
return MII.getName(Opcode);
}
-StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
- return getRegisterName(RegNo);
+void HexagonInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
}
-void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
- printInst((const HexagonMCInst*)(MI), O, Annot);
-}
-
-void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O,
- StringRef Annot) {
+void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O,
+ StringRef Annot,
+ const MCSubtargetInfo &STI) {
const char startPacket = '{',
endPacket = '}';
// TODO: add outer HW loop when it's supported too.
if (MI->getOpcode() == Hexagon::ENDLOOP0) {
// Ending a harware loop is different from ending an regular packet.
- assert(MI->isPacketEnd() && "Loop-end must also end the packet");
+ assert(HexagonMCInstrInfo::isPacketEnd(*MI) && "Loop-end must also end the packet");
- if (MI->isPacketBegin()) {
+ if (HexagonMCInstrInfo::isPacketBegin(*MI)) {
// There must be a packet to end a loop.
// FIXME: when shuffling is always run, this shouldn't be needed.
- HexagonMCInst Nop;
+ MCInst Nop;
StringRef NoAnnot;
Nop.setOpcode (Hexagon::A2_nop);
- Nop.setPacketBegin (MI->isPacketBegin());
- printInst (&Nop, O, NoAnnot);
+ HexagonMCInstrInfo::setPacketBegin (Nop, HexagonMCInstrInfo::isPacketBegin(*MI));
+ printInst (&Nop, O, NoAnnot, STI);
}
// Close the packet.
- if (MI->isPacketEnd())
+ if (HexagonMCInstrInfo::isPacketEnd(*MI))
O << PacketPadding << endPacket;
printInstruction(MI, O);
}
else {
// Prefix the insn opening the packet.
- if (MI->isPacketBegin())
+ if (HexagonMCInstrInfo::isPacketBegin(*MI))
O << PacketPadding << startPacket << '\n';
printInstruction(MI, O);
// Suffix the insn closing the packet.
- if (MI->isPacketEnd())
+ if (HexagonMCInstrInfo::isPacketEnd(*MI))
// Suffix the packet in a new line always, since the GNU assembler has
// issues with a closing brace on the same line as CONST{32,64}.
O << '\n' << PacketPadding << endPacket;
@@ -130,7 +126,7 @@ void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCOperand& MO = MI->getOperand(OpNo);
if (MO.isReg()) {
- O << getRegisterName(MO.getReg());
+ printRegName(O, MO.getReg());
} else if(MO.isExpr()) {
O << *MO.getExpr();
} else if(MO.isImm()) {
@@ -192,7 +188,7 @@ void HexagonInstPrinter::printMEMriOperand(const MCInst *MI, unsigned OpNo,
const MCOperand& MO0 = MI->getOperand(OpNo);
const MCOperand& MO1 = MI->getOperand(OpNo + 1);
- O << getRegisterName(MO0.getReg());
+ printRegName(O, MO0.getReg());
O << " + #" << MO1.getImm();
}
@@ -201,7 +197,8 @@ void HexagonInstPrinter::printFrameIndexOperand(const MCInst *MI, unsigned OpNo,
const MCOperand& MO0 = MI->getOperand(OpNo);
const MCOperand& MO1 = MI->getOperand(OpNo + 1);
- O << getRegisterName(MO0.getReg()) << ", #" << MO1.getImm();
+ printRegName(O, MO0.getReg());
+ O << ", #" << MO1.getImm();
}
void HexagonInstPrinter::printGlobalOperand(const MCInst *MI, unsigned OpNo,
@@ -252,3 +249,17 @@ void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo, O);
O << ')';
}
+
+void HexagonInstPrinter::printExtBrtarget(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ const MCOperand &MO = MI->getOperand(OpNo);
+ const MCInstrDesc &MII = getMII().get(MI->getOpcode());
+
+ assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) &&
+ "Expecting an extendable operand");
+
+ if (MO.isExpr() || isExtended(MII.TSFlags)) {
+ O << "##";
+ }
+ printOperand(MI, OpNo, O);
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index 55ae95cd06df..3fedaed8fbf9 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -18,20 +18,18 @@
#include "llvm/MC/MCInstrInfo.h"
namespace llvm {
- class HexagonMCInst;
-
class HexagonInstPrinter : public MCInstPrinter {
public:
- explicit HexagonInstPrinter(const MCAsmInfo &MAI,
- const MCInstrInfo &MII,
- const MCRegisterInfo &MRI)
+ explicit HexagonInstPrinter(MCAsmInfo const &MAI,
+ MCInstrInfo const &MII,
+ MCRegisterInfo const &MRI)
: MCInstPrinter(MAI, MII, MRI), MII(MII) {}
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
- void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot);
+ void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
virtual StringRef getOpcodeName(unsigned Opcode) const;
void printInstruction(const MCInst *MI, raw_ostream &O);
- StringRef getRegName(unsigned RegNo) const;
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
static const char *getRegisterName(unsigned RegNo);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
@@ -58,6 +56,7 @@ namespace llvm {
void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
const;
void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+ void printExtBrtarget(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
void printConstantPool(const MCInst *MI, unsigned OpNo,
raw_ostream &O) const;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 487872a1f5dd..ae3953abba10 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -9,8 +9,9 @@
#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonFixupKinds.h"
#include "MCTargetDesc/HexagonMCCodeEmitter.h"
-#include "MCTargetDesc/HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -35,10 +36,11 @@ namespace {
/// Possible values for instruction packet parse field.
enum class ParseField { duplex = 0x0, last0 = 0x1, last1 = 0x2, end = 0x3 };
/// \brief Returns the packet bits based on instruction position.
-uint32_t getPacketBits(HexagonMCInst const &HMI) {
+uint32_t getPacketBits(MCInst const &HMI) {
unsigned const ParseFieldOffset = 14;
- ParseField Field = HMI.isPacketEnd() ? ParseField::end : ParseField::last0;
- return static_cast <uint32_t> (Field) << ParseFieldOffset;
+ ParseField Field = HexagonMCInstrInfo::isPacketEnd(HMI) ? ParseField::end
+ : ParseField::last0;
+ return static_cast<uint32_t>(Field) << ParseFieldOffset;
}
void emitLittleEndian(uint64_t Binary, raw_ostream &OS) {
OS << static_cast<uint8_t>((Binary >> 0x00) & 0xff);
@@ -49,20 +51,455 @@ void emitLittleEndian(uint64_t Binary, raw_ostream &OS) {
}
HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII,
- MCSubtargetInfo const &aMST,
MCContext &aMCT)
- : MST(aMST), MCT(aMCT) {}
+ : MCT(aMCT), MCII(aMII), Addend(new unsigned(0)),
+ Extended(new bool(false)) {}
-void HexagonMCCodeEmitter::EncodeInstruction(MCInst const &MI, raw_ostream &OS,
+void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
MCSubtargetInfo const &STI) const {
- HexagonMCInst const &HMB = static_cast<HexagonMCInst const &>(MI);
- uint64_t Binary = getBinaryCodeForInstr(HMB, Fixups, STI) | getPacketBits(HMB);
- assert(HMB.getDesc().getSize() == 4 && "All instructions should be 32bit");
+ uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI) | getPacketBits(MI);
+ assert(HexagonMCInstrInfo::getDesc(MCII, MI).getSize() == 4 &&
+ "All instructions should be 32bit");
+ (void)&MCII;
emitLittleEndian(Binary, OS);
++MCNumEmitted;
}
+static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
+ const MCOperand &MO,
+ const MCSymbolRefExpr::VariantKind kind) {
+ const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI);
+ unsigned insnType = llvm::HexagonMCInstrInfo::getType(MCII, MI);
+
+ if (insnType == HexagonII::TypePREFIX) {
+ switch (kind) {
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ return Hexagon::fixup_Hexagon_GOTREL_32_6_X;
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ return Hexagon::fixup_Hexagon_GOT_32_6_X;
+ case llvm::MCSymbolRefExpr::VK_TPREL:
+ return Hexagon::fixup_Hexagon_TPREL_32_6_X;
+ case llvm::MCSymbolRefExpr::VK_DTPREL:
+ return Hexagon::fixup_Hexagon_DTPREL_32_6_X;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ return Hexagon::fixup_Hexagon_GD_GOT_32_6_X;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ return Hexagon::fixup_Hexagon_LD_GOT_32_6_X;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+ return Hexagon::fixup_Hexagon_IE_32_6_X;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ return Hexagon::fixup_Hexagon_IE_GOT_32_6_X;
+ default:
+ if (MCID.isBranch())
+ return Hexagon::fixup_Hexagon_B32_PCREL_X;
+ else
+ return Hexagon::fixup_Hexagon_32_6_X;
+ }
+ } else if (MCID.isBranch())
+ return (Hexagon::fixup_Hexagon_B13_PCREL);
+
+ switch (MCID.getOpcode()) {
+ case Hexagon::HI:
+ case Hexagon::A2_tfrih:
+ switch (kind) {
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ return Hexagon::fixup_Hexagon_GOT_HI16;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ return Hexagon::fixup_Hexagon_GOTREL_HI16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ return Hexagon::fixup_Hexagon_GD_GOT_HI16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ return Hexagon::fixup_Hexagon_LD_GOT_HI16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+ return Hexagon::fixup_Hexagon_IE_HI16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ return Hexagon::fixup_Hexagon_IE_GOT_HI16;
+ case llvm::MCSymbolRefExpr::VK_TPREL:
+ return Hexagon::fixup_Hexagon_TPREL_HI16;
+ case llvm::MCSymbolRefExpr::VK_DTPREL:
+ return Hexagon::fixup_Hexagon_DTPREL_HI16;
+ default:
+ return Hexagon::fixup_Hexagon_HI16;
+ }
+
+ case Hexagon::LO:
+ case Hexagon::A2_tfril:
+ switch (kind) {
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ return Hexagon::fixup_Hexagon_GOT_LO16;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ return Hexagon::fixup_Hexagon_GOTREL_LO16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ return Hexagon::fixup_Hexagon_GD_GOT_LO16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ return Hexagon::fixup_Hexagon_LD_GOT_LO16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+ return Hexagon::fixup_Hexagon_IE_LO16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ return Hexagon::fixup_Hexagon_IE_GOT_LO16;
+ case llvm::MCSymbolRefExpr::VK_TPREL:
+ return Hexagon::fixup_Hexagon_TPREL_LO16;
+ case llvm::MCSymbolRefExpr::VK_DTPREL:
+ return Hexagon::fixup_Hexagon_DTPREL_LO16;
+ default:
+ return Hexagon::fixup_Hexagon_LO16;
+ }
+
+ // The only relocs left should be GP relative:
+ default:
+ if (MCID.mayStore() || MCID.mayLoad()) {
+ for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses;
+ ++ImpUses) {
+ if (*ImpUses == Hexagon::GP) {
+ switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
+ case HexagonII::MemAccessSize::ByteAccess:
+ return fixup_Hexagon_GPREL16_0;
+ case HexagonII::MemAccessSize::HalfWordAccess:
+ return fixup_Hexagon_GPREL16_1;
+ case HexagonII::MemAccessSize::WordAccess:
+ return fixup_Hexagon_GPREL16_2;
+ case HexagonII::MemAccessSize::DoubleWordAccess:
+ return fixup_Hexagon_GPREL16_3;
+ default:
+ llvm_unreachable("unhandled fixup");
+ }
+ }
+ }
+ } else
+ llvm_unreachable("unhandled fixup");
+ }
+
+ return LastTargetFixupKind;
+}
+
+unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ const MCExpr *ME,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const
+
+{
+ int64_t Res;
+
+ if (ME->EvaluateAsAbsolute(Res))
+ return Res;
+
+ MCExpr::ExprKind MK = ME->getKind();
+ if (MK == MCExpr::Constant) {
+ return cast<MCConstantExpr>(ME)->getValue();
+ }
+ if (MK == MCExpr::Binary) {
+ unsigned Res;
+ Res = getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI);
+ Res +=
+ getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI);
+ return Res;
+ }
+
+ assert(MK == MCExpr::SymbolRef);
+
+ Hexagon::Fixups FixupKind =
+ Hexagon::Fixups(Hexagon::fixup_Hexagon_TPREL_LO16);
+ const MCSymbolRefExpr *MCSRE = static_cast<const MCSymbolRefExpr *>(ME);
+ const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI);
+ unsigned bits = HexagonMCInstrInfo::getExtentBits(MCII, MI) -
+ HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
+ const MCSymbolRefExpr::VariantKind kind = MCSRE->getKind();
+
+ DEBUG(dbgs() << "----------------------------------------\n");
+ DEBUG(dbgs() << "Opcode Name: " << HexagonMCInstrInfo::getName(MCII, MI)
+ << "\n");
+ DEBUG(dbgs() << "Opcode: " << MCID.getOpcode() << "\n");
+ DEBUG(dbgs() << "Relocation bits: " << bits << "\n");
+ DEBUG(dbgs() << "Addend: " << *Addend << "\n");
+ DEBUG(dbgs() << "----------------------------------------\n");
+
+ switch (bits) {
+ default:
+ DEBUG(dbgs() << "unrecognized bit count of " << bits << '\n');
+ break;
+
+ case 32:
+ switch (kind) {
+ case llvm::MCSymbolRefExpr::VK_Hexagon_PCREL:
+ FixupKind = Hexagon::fixup_Hexagon_32_PCREL;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOT_32_6_X
+ : Hexagon::fixup_Hexagon_GOT_32;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOTREL_32_6_X
+ : Hexagon::fixup_Hexagon_GOTREL_32;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_GOT_32_6_X
+ : Hexagon::fixup_Hexagon_GD_GOT_32;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_GOT_32_6_X
+ : Hexagon::fixup_Hexagon_LD_GOT_32;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_32_6_X
+ : Hexagon::fixup_Hexagon_IE_32;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_GOT_32_6_X
+ : Hexagon::fixup_Hexagon_IE_GOT_32;
+ break;
+ case llvm::MCSymbolRefExpr::VK_TPREL:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_TPREL_32_6_X
+ : Hexagon::fixup_Hexagon_TPREL_32;
+ break;
+ case llvm::MCSymbolRefExpr::VK_DTPREL:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_DTPREL_32_6_X
+ : Hexagon::fixup_Hexagon_DTPREL_32;
+ break;
+ default:
+ FixupKind =
+ *Extended ? Hexagon::fixup_Hexagon_32_6_X : Hexagon::fixup_Hexagon_32;
+ break;
+ }
+ break;
+
+ case 22:
+ switch (kind) {
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_PLT:
+ FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_PLT:
+ FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL;
+ break;
+ default:
+ if (MCID.isBranch() || MCID.isCall()) {
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_B22_PCREL_X
+ : Hexagon::fixup_Hexagon_B22_PCREL;
+ } else {
+ errs() << "unrecognized relocation, bits: " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+ }
+ break;
+
+ case 16:
+ if (*Extended) {
+ switch (kind) {
+ default:
+ FixupKind = Hexagon::fixup_Hexagon_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_GOT_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+ FixupKind = Hexagon::fixup_Hexagon_IE_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_TPREL:
+ FixupKind = Hexagon::fixup_Hexagon_TPREL_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_DTPREL:
+ FixupKind = Hexagon::fixup_Hexagon_DTPREL_16_X;
+ break;
+ }
+ } else
+ switch (kind) {
+ default:
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ if ((MCID.getOpcode() == Hexagon::HI) ||
+ (MCID.getOpcode() == Hexagon::LO_H))
+ FixupKind = Hexagon::fixup_Hexagon_GOTREL_HI16;
+ else
+ FixupKind = Hexagon::fixup_Hexagon_GOTREL_LO16;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GPREL:
+ FixupKind = Hexagon::fixup_Hexagon_GPREL16_0;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LO16:
+ FixupKind = Hexagon::fixup_Hexagon_LO16;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_HI16:
+ FixupKind = Hexagon::fixup_Hexagon_HI16;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16;
+ break;
+ case llvm::MCSymbolRefExpr::VK_TPREL:
+ FixupKind = Hexagon::fixup_Hexagon_TPREL_16;
+ break;
+ case llvm::MCSymbolRefExpr::VK_DTPREL:
+ FixupKind = Hexagon::fixup_Hexagon_DTPREL_16;
+ break;
+ }
+ break;
+
+ case 15:
+ if (MCID.isBranch() || MCID.isCall())
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_B15_PCREL_X
+ : Hexagon::fixup_Hexagon_B15_PCREL;
+ break;
+
+ case 13:
+ if (MCID.isBranch())
+ FixupKind = Hexagon::fixup_Hexagon_B13_PCREL;
+ else {
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+
+ case 12:
+ if (*Extended)
+ switch (kind) {
+ default:
+ FixupKind = Hexagon::fixup_Hexagon_12_X;
+ break;
+ // There isn't a GOT_12_X, both 11_X and 16_X resolve to 6/26
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_GOT_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X;
+ break;
+ }
+ else {
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+
+ case 11:
+ if (*Extended)
+ switch (kind) {
+ default:
+ FixupKind = Hexagon::fixup_Hexagon_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_GOT_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_GD_GOT_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_LD_GOT_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_IE_GOT_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_TPREL:
+ FixupKind = Hexagon::fixup_Hexagon_TPREL_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_DTPREL:
+ FixupKind = Hexagon::fixup_Hexagon_DTPREL_11_X;
+ break;
+ }
+ else {
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+
+ case 10:
+ if (*Extended)
+ FixupKind = Hexagon::fixup_Hexagon_10_X;
+ break;
+
+ case 9:
+ if (MCID.isBranch() ||
+ (llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR))
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_B9_PCREL_X
+ : Hexagon::fixup_Hexagon_B9_PCREL;
+ else if (*Extended)
+ FixupKind = Hexagon::fixup_Hexagon_9_X;
+ else {
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+
+ case 8:
+ if (*Extended)
+ FixupKind = Hexagon::fixup_Hexagon_8_X;
+ else {
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+
+ case 7:
+ if (MCID.isBranch() ||
+ (llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR))
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_B7_PCREL_X
+ : Hexagon::fixup_Hexagon_B7_PCREL;
+ else if (*Extended)
+ FixupKind = Hexagon::fixup_Hexagon_7_X;
+ else {
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+
+ case 6:
+ if (*Extended) {
+ switch (kind) {
+ default:
+ FixupKind = Hexagon::fixup_Hexagon_6_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_PCREL:
+ FixupKind = Hexagon::fixup_Hexagon_6_PCREL_X;
+ break;
+ // This is part of an extender, GOT_11 is a
+ // Word32_U6 unsigned/truncated reloc.
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_GOT_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X;
+ break;
+ }
+ } else {
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+
+ case 0:
+ FixupKind = getFixupNoBits(MCII, MI, MO, kind);
+ break;
+ }
+
+ MCFixup fixup =
+ MCFixup::create(*Addend, MO.getExpr(), MCFixupKind(FixupKind));
+ Fixups.push_back(fixup);
+ // All of the information is in the fixup.
+ return (0);
+}
+
unsigned
HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
SmallVectorImpl<MCFixup> &Fixups,
@@ -71,18 +508,16 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
return MCT.getRegisterInfo()->getEncodingValue(MO.getReg());
if (MO.isImm())
return static_cast<unsigned>(MO.getImm());
- llvm_unreachable("Only Immediates and Registers implemented right now");
-}
-MCSubtargetInfo const &HexagonMCCodeEmitter::getSubtargetInfo() const {
- return MST;
+ // MO must be an ME.
+ assert(MO.isExpr());
+ return getExprOpValue(MI, MO, MO.getExpr(), Fixups, STI);
}
MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII,
MCRegisterInfo const &MRI,
- MCSubtargetInfo const &MST,
MCContext &MCT) {
- return new HexagonMCCodeEmitter(MII, MST, MCT);
+ return new HexagonMCCodeEmitter(MII, MCT);
}
#include "HexagonGenMCCodeEmitter.inc"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index 96048adf34b7..939380af1013 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -26,16 +26,22 @@
namespace llvm {
class HexagonMCCodeEmitter : public MCCodeEmitter {
- MCSubtargetInfo const &MST;
MCContext &MCT;
+ MCInstrInfo const &MCII;
+ std::unique_ptr<unsigned> Addend;
+ std::unique_ptr<bool> Extended;
+
+ // helper routine for getMachineOpValue()
+ unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO,
+ const MCExpr *ME, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
public:
- HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCSubtargetInfo const &aMST,
- MCContext &aMCT);
+ HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT);
MCSubtargetInfo const &getSubtargetInfo() const;
- void EncodeInstruction(MCInst const &MI, raw_ostream &OS,
+ void encodeInstruction(MCInst const &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
MCSubtargetInfo const &STI) const override;
@@ -51,8 +57,8 @@ public:
MCSubtargetInfo const &STI) const;
private:
- HexagonMCCodeEmitter(HexagonMCCodeEmitter const &) LLVM_DELETED_FUNCTION;
- void operator=(HexagonMCCodeEmitter const &) LLVM_DELETED_FUNCTION;
+ HexagonMCCodeEmitter(HexagonMCCodeEmitter const &) = delete;
+ void operator=(HexagonMCCodeEmitter const &) = delete;
}; // class HexagonMCCodeEmitter
} // namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
deleted file mode 100644
index d8b9a2567eeb..000000000000
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
+++ /dev/null
@@ -1,223 +0,0 @@
-//===- HexagonMCInst.cpp - Hexagon sub-class of MCInst --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class extends MCInst to allow some Hexagon VLIW annotations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "HexagonInstrInfo.h"
-#include "MCTargetDesc/HexagonBaseInfo.h"
-#include "MCTargetDesc/HexagonMCInst.h"
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
-
-using namespace llvm;
-
-std::unique_ptr <MCInstrInfo const> HexagonMCInst::MCII;
-
-HexagonMCInst::HexagonMCInst() : MCInst() {}
-HexagonMCInst::HexagonMCInst(MCInstrDesc const &mcid) : MCInst() {}
-
-void HexagonMCInst::AppendImplicitOperands(MCInst &MCI) {
- MCI.addOperand(MCOperand::CreateImm(0));
- MCI.addOperand(MCOperand::CreateInst(nullptr));
-}
-
-std::bitset<16> HexagonMCInst::GetImplicitBits(MCInst const &MCI) {
- SanityCheckImplicitOperands(MCI);
- std::bitset<16> Bits(MCI.getOperand(MCI.getNumOperands() - 2).getImm());
- return Bits;
-}
-
-void HexagonMCInst::SetImplicitBits(MCInst &MCI, std::bitset<16> Bits) {
- SanityCheckImplicitOperands(MCI);
- MCI.getOperand(MCI.getNumOperands() - 2).setImm(Bits.to_ulong());
-}
-
-void HexagonMCInst::setPacketBegin(bool f) {
- std::bitset<16> Bits(GetImplicitBits(*this));
- Bits.set(packetBeginIndex, f);
- SetImplicitBits(*this, Bits);
-}
-
-bool HexagonMCInst::isPacketBegin() const {
- std::bitset<16> Bits(GetImplicitBits(*this));
- return Bits.test(packetBeginIndex);
-}
-
-void HexagonMCInst::setPacketEnd(bool f) {
- std::bitset<16> Bits(GetImplicitBits(*this));
- Bits.set(packetEndIndex, f);
- SetImplicitBits(*this, Bits);
-}
-
-bool HexagonMCInst::isPacketEnd() const {
- std::bitset<16> Bits(GetImplicitBits(*this));
- return Bits.test(packetEndIndex);
-}
-
-void HexagonMCInst::resetPacket() {
- setPacketBegin(false);
- setPacketEnd(false);
-}
-
-// Return the slots used by the insn.
-unsigned HexagonMCInst::getUnits(const HexagonTargetMachine *TM) const {
- const HexagonInstrInfo *QII = TM->getSubtargetImpl()->getInstrInfo();
- const InstrItineraryData *II =
- TM->getSubtargetImpl()->getInstrItineraryData();
- const InstrStage *IS =
- II->beginStage(QII->get(this->getOpcode()).getSchedClass());
-
- return (IS->getUnits());
-}
-
-MCInstrDesc const& HexagonMCInst::getDesc() const { return (MCII->get(getOpcode())); }
-
-// Return the Hexagon ISA class for the insn.
-unsigned HexagonMCInst::getType() const {
- const uint64_t F = getDesc().TSFlags;
-
- return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
-}
-
-// Return whether the insn is an actual insn.
-bool HexagonMCInst::isCanon() const {
- return (!getDesc().isPseudo() && !isPrefix() &&
- getType() != HexagonII::TypeENDLOOP);
-}
-
-// Return whether the insn is a prefix.
-bool HexagonMCInst::isPrefix() const {
- return (getType() == HexagonII::TypePREFIX);
-}
-
-// Return whether the insn is solo, i.e., cannot be in a packet.
-bool HexagonMCInst::isSolo() const {
- const uint64_t F = getDesc().TSFlags;
- return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
-}
-
-// Return whether the insn is a new-value consumer.
-bool HexagonMCInst::isNewValue() const {
- const uint64_t F = getDesc().TSFlags;
- return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
-}
-
-// Return whether the instruction is a legal new-value producer.
-bool HexagonMCInst::hasNewValue() const {
- const uint64_t F = getDesc().TSFlags;
- return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
-}
-
-// Return the operand that consumes or produces a new value.
-const MCOperand &HexagonMCInst::getNewValue() const {
- const uint64_t F = getDesc().TSFlags;
- const unsigned O =
- (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask;
- const MCOperand &MCO = getOperand(O);
-
- assert((isNewValue() || hasNewValue()) && MCO.isReg());
- return (MCO);
-}
-
-// Return whether the instruction needs to be constant extended.
-// 1) Always return true if the instruction has 'isExtended' flag set.
-//
-// isExtendable:
-// 2) For immediate extended operands, return true only if the value is
-// out-of-range.
-// 3) For global address, always return true.
-
-bool HexagonMCInst::isConstExtended(void) const {
- if (isExtended())
- return true;
-
- if (!isExtendable())
- return false;
-
- short ExtOpNum = getCExtOpNum();
- int MinValue = getMinValue();
- int MaxValue = getMaxValue();
- const MCOperand &MO = getOperand(ExtOpNum);
-
- // We could be using an instruction with an extendable immediate and shoehorn
- // a global address into it. If it is a global address it will be constant
- // extended. We do this for COMBINE.
- // We currently only handle isGlobal() because it is the only kind of
- // object we are going to end up with here for now.
- // In the future we probably should add isSymbol(), etc.
- if (MO.isExpr())
- return true;
-
- // If the extendable operand is not 'Immediate' type, the instruction should
- // have 'isExtended' flag set.
- assert(MO.isImm() && "Extendable operand must be Immediate type");
-
- int ImmValue = MO.getImm();
- return (ImmValue < MinValue || ImmValue > MaxValue);
-}
-
-// Return whether the instruction must be always extended.
-bool HexagonMCInst::isExtended(void) const {
- const uint64_t F = getDesc().TSFlags;
- return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
-}
-
-// Return true if the instruction may be extended based on the operand value.
-bool HexagonMCInst::isExtendable(void) const {
- const uint64_t F = getDesc().TSFlags;
- return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
-}
-
-// Return number of bits in the constant extended operand.
-unsigned HexagonMCInst::getBitCount(void) const {
- const uint64_t F = getDesc().TSFlags;
- return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
-}
-
-// Return constant extended operand number.
-unsigned short HexagonMCInst::getCExtOpNum(void) const {
- const uint64_t F = getDesc().TSFlags;
- return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
-}
-
-// Return whether the operand can be constant extended.
-bool HexagonMCInst::isOperandExtended(const unsigned short OperandNum) const {
- const uint64_t F = getDesc().TSFlags;
- return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) ==
- OperandNum;
-}
-
-// Return the min value that a constant extendable operand can have
-// without being extended.
-int HexagonMCInst::getMinValue(void) const {
- const uint64_t F = getDesc().TSFlags;
- unsigned isSigned =
- (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
- unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
-
- if (isSigned) // if value is signed
- return -1U << (bits - 1);
- else
- return 0;
-}
-
-// Return the max value that a constant extendable operand can have
-// without being extended.
-int HexagonMCInst::getMaxValue(void) const {
- const uint64_t F = getDesc().TSFlags;
- unsigned isSigned =
- (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
- unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
-
- if (isSigned) // if value is signed
- return ~(-1U << (bits - 1));
- else
- return ~(-1U << bits);
-}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
deleted file mode 100644
index ce9a8db5ac44..000000000000
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
+++ /dev/null
@@ -1,108 +0,0 @@
-//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class extends MCInst to allow some VLIW annotations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINST_H
-#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINST_H
-
-#include "HexagonTargetMachine.h"
-#include "llvm/MC/MCInst.h"
-#include <memory>
-
-extern "C" void LLVMInitializeHexagonTargetMC();
-namespace llvm {
-class MCOperand;
-
-class HexagonMCInst : public MCInst {
- friend void ::LLVMInitializeHexagonTargetMC();
- // Used to access TSFlags
- static std::unique_ptr <MCInstrInfo const> MCII;
-
-public:
- explicit HexagonMCInst();
- HexagonMCInst(const MCInstrDesc &mcid);
-
- static void AppendImplicitOperands(MCInst &MCI);
- static std::bitset<16> GetImplicitBits(MCInst const &MCI);
- static void SetImplicitBits(MCInst &MCI, std::bitset<16> Bits);
- static void SanityCheckImplicitOperands(MCInst const &MCI) {
- assert(MCI.getNumOperands() >= 2 && "At least the two implicit operands");
- assert(MCI.getOperand(MCI.getNumOperands() - 1).isInst() &&
- "Implicit bits and flags");
- assert(MCI.getOperand(MCI.getNumOperands() - 2).isImm() &&
- "Parent pointer");
- }
-
- void setPacketBegin(bool Y);
- bool isPacketBegin() const;
- static const size_t packetBeginIndex = 0;
- void setPacketEnd(bool Y);
- bool isPacketEnd() const;
- static const size_t packetEndIndex = 1;
- void resetPacket();
-
- // Return the slots used by the insn.
- unsigned getUnits(const HexagonTargetMachine *TM) const;
-
- // Return the Hexagon ISA class for the insn.
- unsigned getType() const;
-
- MCInstrDesc const &getDesc() const;
-
- // Return whether the insn is an actual insn.
- bool isCanon() const;
-
- // Return whether the insn is a prefix.
- bool isPrefix() const;
-
- // Return whether the insn is solo, i.e., cannot be in a packet.
- bool isSolo() const;
-
- // Return whether the instruction needs to be constant extended.
- bool isConstExtended() const;
-
- // Return constant extended operand number.
- unsigned short getCExtOpNum(void) const;
-
- // Return whether the insn is a new-value consumer.
- bool isNewValue() const;
-
- // Return whether the instruction is a legal new-value producer.
- bool hasNewValue() const;
-
- // Return the operand that consumes or produces a new value.
- const MCOperand &getNewValue() const;
-
- // Return number of bits in the constant extended operand.
- unsigned getBitCount(void) const;
-
-private:
- // Return whether the instruction must be always extended.
- bool isExtended() const;
-
- // Return true if the insn may be extended based on the operand value.
- bool isExtendable() const;
-
- // Return true if the operand can be constant extended.
- bool isOperandExtended(const unsigned short OperandNum) const;
-
- // Return the min value that a constant extendable operand can have
- // without being extended.
- int getMinValue() const;
-
- // Return the max value that a constant extendable operand can have
- // without being extended.
- int getMaxValue() const;
-};
-}
-
-#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
new file mode 100644
index 000000000000..93c7a0d98bf2
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -0,0 +1,248 @@
+//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInstrInfo to allow Hexagon specific MCInstr queries
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCInstrInfo.h"
+#include "HexagonBaseInfo.h"
+
+namespace llvm {
+void HexagonMCInstrInfo::AppendImplicitOperands(MCInst &MCI) {
+ MCI.addOperand(MCOperand::createImm(0));
+ MCI.addOperand(MCOperand::createInst(nullptr));
+}
+
+HexagonII::MemAccessSize
+HexagonMCInstrInfo::getAccessSize(MCInstrInfo const &MCII, MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+
+ return (HexagonII::MemAccessSize((F >> HexagonII::MemAccessSizePos) &
+ HexagonII::MemAccesSizeMask));
+}
+
+unsigned HexagonMCInstrInfo::getBitCount(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
+}
+
+// Return constant extended operand number.
+unsigned short HexagonMCInstrInfo::getCExtOpNum(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+}
+
+MCInstrDesc const &HexagonMCInstrInfo::getDesc(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ return (MCII.get(MCI.getOpcode()));
+}
+
+unsigned HexagonMCInstrInfo::getExtentAlignment(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::ExtentAlignPos) & HexagonII::ExtentAlignMask);
+}
+
+unsigned HexagonMCInstrInfo::getExtentBits(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
+}
+
+std::bitset<16> HexagonMCInstrInfo::GetImplicitBits(MCInst const &MCI) {
+ SanityCheckImplicitOperands(MCI);
+ std::bitset<16> Bits(MCI.getOperand(MCI.getNumOperands() - 2).getImm());
+ return Bits;
+}
+
+// Return the max value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ unsigned isSigned =
+ (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return ~(-1U << (bits - 1));
+ else
+ return ~(-1U << bits);
+}
+
+// Return the min value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ unsigned isSigned =
+ (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return -1U << (bits - 1);
+ else
+ return 0;
+}
+
+char const *HexagonMCInstrInfo::getName(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ return MCII.getName(MCI.getOpcode());
+}
+
+// Return the operand that consumes or produces a new value.
+MCOperand const &HexagonMCInstrInfo::getNewValue(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ unsigned const O =
+ (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask;
+ MCOperand const &MCO = MCI.getOperand(O);
+
+ assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) ||
+ HexagonMCInstrInfo::hasNewValue(MCII, MCI)) &&
+ MCO.isReg());
+ return (MCO);
+}
+
+// Return the Hexagon ISA class for the insn.
+unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+
+ return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+}
+
+// Return whether the instruction is a legal new-value producer.
+bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
+}
+
+// Return whether the insn is an actual insn.
+bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
+ return (!HexagonMCInstrInfo::getDesc(MCII, MCI).isPseudo() &&
+ !HexagonMCInstrInfo::isPrefix(MCII, MCI) &&
+ HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP);
+}
+
+// Return whether the instruction needs to be constant extended.
+// 1) Always return true if the instruction has 'isExtended' flag set.
+//
+// isExtendable:
+// 2) For immediate extended operands, return true only if the value is
+// out-of-range.
+// 3) For global address, always return true.
+
+bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ if (HexagonMCInstrInfo::isExtended(MCII, MCI))
+ return true;
+
+ if (!HexagonMCInstrInfo::isExtendable(MCII, MCI))
+ return false;
+
+ short ExtOpNum = HexagonMCInstrInfo::getCExtOpNum(MCII, MCI);
+ int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI);
+ int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI);
+ MCOperand const &MO = MCI.getOperand(ExtOpNum);
+
+ // We could be using an instruction with an extendable immediate and shoehorn
+ // a global address into it. If it is a global address it will be constant
+ // extended. We do this for COMBINE.
+ // We currently only handle isGlobal() because it is the only kind of
+ // object we are going to end up with here for now.
+ // In the future we probably should add isSymbol(), etc.
+ if (MO.isExpr())
+ return true;
+
+ // If the extendable operand is not 'Immediate' type, the instruction should
+ // have 'isExtended' flag set.
+ assert(MO.isImm() && "Extendable operand must be Immediate type");
+
+ int ImmValue = MO.getImm();
+ return (ImmValue < MinValue || ImmValue > MaxValue);
+}
+
+// Return true if the instruction may be extended based on the operand value.
+bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+}
+
+// Return whether the instruction must be always extended.
+bool HexagonMCInstrInfo::isExtended(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+}
+
+// Return whether the insn is a new-value consumer.
+bool HexagonMCInstrInfo::isNewValue(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
+// Return whether the operand can be constant extended.
+bool HexagonMCInstrInfo::isOperandExtended(MCInstrInfo const &MCII,
+ MCInst const &MCI,
+ unsigned short OperandNum) {
+ uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) ==
+ OperandNum;
+}
+
+bool HexagonMCInstrInfo::isPacketBegin(MCInst const &MCI) {
+ std::bitset<16> Bits(GetImplicitBits(MCI));
+ return Bits.test(packetBeginIndex);
+}
+
+bool HexagonMCInstrInfo::isPacketEnd(MCInst const &MCI) {
+ std::bitset<16> Bits(GetImplicitBits(MCI));
+ return Bits.test(packetEndIndex);
+}
+
+// Return whether the insn is a prefix.
+bool HexagonMCInstrInfo::isPrefix(MCInstrInfo const &MCII, MCInst const &MCI) {
+ return (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypePREFIX);
+}
+
+// Return whether the insn is solo, i.e., cannot be in a packet.
+bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
+}
+
+void HexagonMCInstrInfo::resetPacket(MCInst &MCI) {
+ setPacketBegin(MCI, false);
+ setPacketEnd(MCI, false);
+}
+
+void HexagonMCInstrInfo::SetImplicitBits(MCInst &MCI, std::bitset<16> Bits) {
+ SanityCheckImplicitOperands(MCI);
+ MCI.getOperand(MCI.getNumOperands() - 2).setImm(Bits.to_ulong());
+}
+
+void HexagonMCInstrInfo::setPacketBegin(MCInst &MCI, bool f) {
+ std::bitset<16> Bits(GetImplicitBits(MCI));
+ Bits.set(packetBeginIndex, f);
+ SetImplicitBits(MCI, Bits);
+}
+
+void HexagonMCInstrInfo::setPacketEnd(MCInst &MCI, bool f) {
+ std::bitset<16> Bits(GetImplicitBits(MCI));
+ Bits.set(packetEndIndex, f);
+ SetImplicitBits(MCI, Bits);
+}
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
new file mode 100644
index 000000000000..082c80d5ac05
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -0,0 +1,122 @@
+//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility functions for Hexagon specific MCInst queries
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+
+#include <bitset>
+
+namespace llvm {
+class MCInstrDesc;
+class MCInstrInfo;
+class MCInst;
+class MCOperand;
+namespace HexagonII {
+enum class MemAccessSize;
+}
+namespace HexagonMCInstrInfo {
+void AppendImplicitOperands(MCInst &MCI);
+
+// Return memory access size
+HexagonII::MemAccessSize getAccessSize(MCInstrInfo const &MCII,
+ MCInst const &MCI);
+
+// Return number of bits in the constant extended operand.
+unsigned getBitCount(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return constant extended operand number.
+unsigned short getCExtOpNum(MCInstrInfo const &MCII, MCInst const &MCI);
+
+MCInstrDesc const &getDesc(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the implicit alignment of the extendable operand
+unsigned getExtentAlignment(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the number of logical bits of the extendable operand
+unsigned getExtentBits(MCInstrInfo const &MCII, MCInst const &MCI);
+
+std::bitset<16> GetImplicitBits(MCInst const &MCI);
+
+// Return the max value that a constant extendable operand can have
+// without being extended.
+int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the min value that a constant extendable operand can have
+// without being extended.
+int getMinValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return instruction name
+char const *getName(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the operand that consumes or produces a new value.
+MCOperand const &getNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the Hexagon ISA class for the insn.
+unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the instruction is a legal new-value producer.
+bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the insn is an actual insn.
+bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the instruction needs to be constant extended.
+bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return true if the insn may be extended based on the operand value.
+bool isExtendable(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the instruction must be always extended.
+bool isExtended(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the insn is a new-value consumer.
+bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return true if the operand can be constant extended.
+bool isOperandExtended(MCInstrInfo const &MCII, MCInst const &MCI,
+ unsigned short OperandNum);
+
+bool isPacketBegin(MCInst const &MCI);
+
+bool isPacketEnd(MCInst const &MCI);
+
+// Return whether the insn is a prefix.
+bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the insn is solo, i.e., cannot be in a packet.
+bool isSolo(MCInstrInfo const &MCII, MCInst const &MCI);
+
+static const size_t packetBeginIndex = 0;
+static const size_t packetEndIndex = 1;
+
+void resetPacket(MCInst &MCI);
+
+inline void SanityCheckImplicitOperands(MCInst const &MCI) {
+ assert(MCI.getNumOperands() >= 2 && "At least the two implicit operands");
+ assert(MCI.getOperand(MCI.getNumOperands() - 1).isInst() &&
+ "Implicit bits and flags");
+ assert(MCI.getOperand(MCI.getNumOperands() - 2).isImm() && "Parent pointer");
+}
+
+void SetImplicitBits(MCInst &MCI, std::bitset<16> Bits);
+
+void setPacketBegin(MCInst &MCI, bool Y);
+
+void setPacketEnd(MCInst &MCI, bool Y);
+}
+}
+
+#endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index ae5a22bdb01b..59395e230fa9 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -14,7 +14,6 @@
#include "HexagonMCTargetDesc.h"
#include "HexagonMCAsmInfo.h"
#include "MCTargetDesc/HexagonInstPrinter.h"
-#include "MCTargetDesc/HexagonMCInst.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -36,7 +35,7 @@ using namespace llvm;
#define GET_REGINFO_MC_DESC
#include "HexagonGenRegisterInfo.inc"
-static MCInstrInfo *createHexagonMCInstrInfo() {
+MCInstrInfo *llvm::createHexagonMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitHexagonMCInstrInfo(X);
return X;
@@ -48,15 +47,6 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
return X;
}
-static MCStreamer *
-createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *CE,
- bool RelaxAll) {
- MCELFStreamer *ES = new MCELFStreamer(Context, MAB, OS, CE);
- return ES;
-}
-
-
static MCSubtargetInfo *
createHexagonMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
@@ -76,32 +66,25 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static MCStreamer *createMCStreamer(Target const &T, StringRef TT,
- MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- MCSubtargetInfo const &STI, bool RelaxAll) {
- MCStreamer *ES = createHexagonELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
- new MCTargetStreamer(*ES);
- return ES;
-}
-
-
static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
// For the time being, use static relocations, since there's really no
// support for PIC yet.
- X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
+ X->initMCCodeGenInfo(Reloc::Static, CM, OL);
return X;
}
-static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
+
+static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
- return new HexagonInstPrinter(MAI, MII, MRI);
+ const MCRegisterInfo &MRI) {
+ if (SyntaxVariant == 0)
+ return(new HexagonInstPrinter(MAI, MII, MRI));
+ else
+ return nullptr;
}
// Force static initialization.
@@ -116,7 +99,6 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
// Register the MC instruction info.
TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget,
createHexagonMCInstrInfo);
- HexagonMCInst::MCII.reset (createHexagonMCInstrInfo());
// Register the MC register info.
TargetRegistry::RegisterMCRegInfo(TheHexagonTarget,
@@ -137,7 +119,4 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
// Register the asm backend
TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget,
createHexagonAsmBackend);
-
- // Register the obj streamer
- TargetRegistry::RegisterMCObjectStreamer(TheHexagonTarget, createMCStreamer);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 02fd5161d24a..de63fd271aea 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -27,20 +27,22 @@ class MCSubtargetInfo;
class Target;
class StringRef;
class raw_ostream;
+class raw_pwrite_stream;
extern Target TheHexagonTarget;
+MCInstrInfo *createHexagonMCInstrInfo();
+
MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII,
MCRegisterInfo const &MRI,
- MCSubtargetInfo const &MST,
MCContext &MCT);
MCAsmBackend *createHexagonAsmBackend(Target const &T,
MCRegisterInfo const &MRI, StringRef TT,
StringRef CPU);
-MCObjectWriter *createHexagonELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
- StringRef CPU);
+MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
+ uint8_t OSABI, StringRef CPU);
} // End llvm namespace
diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile
index 329c9d3018f0..04b41e5986ac 100644
--- a/lib/Target/Hexagon/Makefile
+++ b/lib/Target/Hexagon/Makefile
@@ -14,12 +14,12 @@ TARGET = Hexagon
BUILT_SOURCES = HexagonGenRegisterInfo.inc \
HexagonGenInstrInfo.inc \
HexagonGenAsmWriter.inc \
- HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \
- HexagonGenCallingConv.inc \
- HexagonGenDFAPacketizer.inc \
- HexagonGenMCCodeEmitter.inc \
- HexagonGenDisassemblerTables.inc
-
-DIRS = TargetInfo MCTargetDesc Disassembler
-
-include $(LEVEL)/Makefile.common
+ HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \
+ HexagonGenCallingConv.inc \
+ HexagonGenDFAPacketizer.inc \
+ HexagonGenMCCodeEmitter.inc \
+ HexagonGenDisassemblerTables.inc
+
+DIRS = TargetInfo MCTargetDesc Disassembler
+
+include $(LEVEL)/Makefile.common