aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.h60
-rw-r--r--lib/Target/ARM/ARM.td76
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h20
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp (renamed from lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp)171
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp134
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h81
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp563
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h29
-rw-r--r--lib/Target/ARM/ARMCallingConv.td4
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp72
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp96
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp359
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp665
-rw-r--r--lib/Target/ARM/ARMGlobalMerge.cpp212
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp371
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp635
-rw-r--r--lib/Target/ARM/ARMISelLowering.h42
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td296
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td406
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td627
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td19
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td730
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td76
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp139
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp (renamed from lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp)0
-rw-r--r--lib/Target/ARM/ARMMCInstLower.h (renamed from lib/Target/ARM/AsmPrinter/ARMMCInstLower.h)0
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h11
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td160
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp5
-rw-r--r--lib/Target/ARM/ARMSubtarget.h22
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp14
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h1
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp245
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp66
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.h2
-rw-r--r--lib/Target/ARM/AsmPrinter/CMakeLists.txt2
-rw-r--r--lib/Target/ARM/CMakeLists.txt10
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp13
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp183
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.h66
-rw-r--r--lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h227
-rw-r--r--lib/Target/ARM/Makefile5
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp2
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp147
-rw-r--r--lib/Target/ARM/README.txt42
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp279
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h22
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp68
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp8
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp14
50 files changed, 5013 insertions, 2484 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 14825a785649..271ca44c2b69 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -30,22 +30,22 @@ class formatted_raw_ostream;
namespace ARMCC {
// The CondCodes constants map directly to the 4-bit encoding of the
// condition field for predicated instructions.
- enum CondCodes {
- EQ,
- NE,
- HS,
- LO,
- MI,
- PL,
- VS,
- VC,
- HI,
- LS,
- GE,
- LT,
- GT,
- LE,
- AL
+ enum CondCodes { // Meaning (integer) Meaning (floating-point)
+ EQ, // Equal Equal
+ NE, // Not equal Not equal, or unordered
+ HS, // Carry set >, ==, or unordered
+ LO, // Carry clear Less than
+ MI, // Minus, negative Less than
+ PL, // Plus, positive or zero >, ==, or unordered
+ VS, // Overflow Unordered
+ VC, // No overflow Not unordered
+ HI, // Unsigned higher Greater than, or unordered
+ LS, // Unsigned lower or same Less than or equal
+ GE, // Greater than or equal Greater than or equal
+ LT, // Less than Less than, or unordered
+ GT, // Greater than Greater than
+ LE, // Less than or equal <, ==, or unordered
+ AL // Always (unconditional) Always (unconditional)
};
inline static CondCodes getOppositeCondition(CondCodes CC) {
@@ -90,6 +90,33 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
}
}
+namespace ARM_MB {
+ // The Memory Barrier Option constants map directly to the 4-bit encoding of
+ // the option field for memory barrier operations.
+ enum MemBOpt {
+ ST = 14,
+ ISH = 11,
+ ISHST = 10,
+ NSH = 7,
+ NSHST = 6,
+ OSH = 3,
+ OSHST = 2
+ };
+
+ inline static const char *MemBOptToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown memory opetion");
+ case ST: return "st";
+ case ISH: return "ish";
+ case ISHST: return "ishst";
+ case NSH: return "nsh";
+ case NSHST: return "nshst";
+ case OSH: return "osh";
+ case OSHST: return "oshst";
+ }
+ }
+} // namespace ARM_MB
+
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -98,6 +125,7 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
FunctionPass *createARMConstantIslandPass();
FunctionPass *createNEONPreAllocPass();
FunctionPass *createNEONMoveFixPass();
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index fa64d6c2a4b4..d6a8f19724dc 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -1,4 +1,4 @@
-//===- ARM.td - Describe the ARM Target Machine -----------------*- C++ -*-===//
+//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,20 +20,6 @@ include "llvm/Target/Target.td"
// ARM Subtarget features.
//
-def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
- "ARM v4T">;
-def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
- "ARM v5T">;
-def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
- "ARM v5TE, v5TEj, v5TExp">;
-def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
- "ARM v6">;
-def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
- "ARM v6t2">;
-def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
- "ARM v7A">;
-def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
- "ARM v7M">;
def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
"Enable VFP2 instructions">;
def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
@@ -42,14 +28,20 @@ def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON",
"Enable NEON instructions">;
def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
"Enable Thumb2 instructions">;
+def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
+ "Does not support ARM mode execution">;
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
"Enable divide instructions">;
-def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
+def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
"Enable Thumb2 extract and pack instructions">;
+def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
+ "Has data barrier (dmb / dsb) instructions">;
def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
"FP compare + branch is slow">;
+def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
+ "Floating point unit supports single precision only">;
// Some processors have multiply-accumulate instructions that don't
// play nicely with other VFP instructions, and it's generally better
@@ -57,14 +49,41 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
// FIXME: Currently, this is only flagged for Cortex-A8. It may be true for
// others as well. We should do more benchmarking and confirm one way or
// the other.
-def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true",
- "Disable VFP MAC instructions">;
+def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true",
+ "Disable VFP MAC instructions">;
// Some processors benefit from using NEON instructions for scalar
// single-precision FP operations.
def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
"true",
"Use NEON for single precision FP">;
+// Disable 32-bit to 16-bit narrowing for experimentation.
+def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
+ "Prefer 32-bit Thumb instrs">;
+
+
+// ARM architectures.
+def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
+ "ARM v4T">;
+def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
+ "ARM v5T">;
+def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
+ "ARM v5TE, v5TEj, v5TExp">;
+def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
+ "ARM v6">;
+def ArchV6M : SubtargetFeature<"v6m", "ARMArchVersion", "V6M",
+ "ARM v6m",
+ [FeatureNoARM, FeatureDB]>;
+def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
+ "ARM v6t2",
+ [FeatureThumb2]>;
+def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
+ "ARM v7A",
+ [FeatureThumb2, FeatureNEON, FeatureDB]>;
+def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
+ "ARM v7M",
+ [FeatureThumb2, FeatureNoARM, FeatureDB,
+ FeatureHWDiv]>;
//===----------------------------------------------------------------------===//
// ARM Processors supported.
@@ -122,20 +141,23 @@ def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>;
def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
+// V6M Processors.
+def : Processor<"cortex-m0", ARMV6Itineraries, [ArchV6M]>;
+
// V6T2 Processors.
-def : Processor<"arm1156t2-s", ARMV6Itineraries,
- [ArchV6T2, FeatureThumb2]>;
-def : Processor<"arm1156t2f-s", ARMV6Itineraries,
- [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
+def : Processor<"arm1156t2-s", ARMV6Itineraries, [ArchV6T2]>;
+def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2]>;
// V7 Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
- [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
- FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack]>;
+ [ArchV7A, FeatureHasSlowVMLx,
+ FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2XtPk]>;
def : Processor<"cortex-a9", CortexA9Itineraries,
- [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>;
-def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
-def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
+ [ArchV7A, FeatureT2XtPk]>;
+
+// V7M Processors.
+def : ProcNoItin<"cortex-m3", [ArchV7M]>;
+def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureVFP2, FeatureVFPOnlySP]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index 92a13f1d751c..db481005b3a4 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -458,6 +458,7 @@ namespace ARM_AM {
// IB - Increment before
// DA - Decrement after
// DB - Decrement before
+ // For VFP instructions, only the IA and DB modes are valid.
static inline AMSubMode getAM4SubMode(unsigned Mode) {
return (AMSubMode)(Mode & 0x7);
@@ -477,14 +478,6 @@ namespace ARM_AM {
//
// The first operand is always a Reg. The second operand encodes the
// operation in bit 8 and the immediate in bits 0-7.
- //
- // This is also used for FP load/store multiple ops. The second operand
- // encodes the number of registers (or 2 times the number of registers
- // for DPR ops) in bits 0-7. In addition, bits 8-10 encode one of the
- // following two sub-modes:
- //
- // IA - Increment after
- // DB - Decrement before
/// getAM5Opc - This function encodes the addrmode5 opc field.
static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
@@ -498,17 +491,6 @@ namespace ARM_AM {
return ((AM5Opc >> 8) & 1) ? sub : add;
}
- /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and
- /// VSTM instructions.
- static inline unsigned getAM5Opc(AMSubMode SubMode, unsigned char Offset) {
- assert((SubMode == ia || SubMode == db) &&
- "Illegal addressing mode 5 sub-mode!");
- return ((int)SubMode << 8) | Offset;
- }
- static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
- return (AMSubMode)((AM5Opc >> 8) & 0x7);
- }
-
//===--------------------------------------------------------------------===//
// Addressing Mode #6
//===--------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 946f4744f5bb..6cfd5961149f 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -17,7 +17,7 @@
#include "ARMBuildAttrs.h"
#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
-#include "ARMInstPrinter.h"
+#include "AsmPrinter/ARMInstPrinter.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMMCInstLower.h"
#include "ARMTargetMachine.h"
@@ -47,6 +47,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
@@ -56,6 +57,15 @@ static cl::opt<bool>
EnableMCInst("enable-arm-mcinst-printer", cl::Hidden,
cl::desc("enable experimental asmprinter gunk in the arm backend"));
+namespace llvm {
+ namespace ARM {
+ enum DW_ISA {
+ DW_ISA_ARM_thumb = 1,
+ DW_ISA_ARM_arm = 2
+ };
+ }
+}
+
namespace {
class ARMAsmPrinter : public AsmPrinter {
@@ -80,9 +90,9 @@ namespace {
virtual const char *getPassName() const {
return "ARM Assembly Printer";
}
-
+
void printInstructionThroughMCStreamer(const MachineInstr *MI);
-
+
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
const char *Modifier = 0);
@@ -110,8 +120,12 @@ namespace {
void printAddrModePCOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O,
const char *Modifier = 0);
- void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum,
- raw_ostream &O);
+ void printBitfieldInvMaskImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
+ void printMemBOption(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
+ void printShiftImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
void printThumbS4ImmOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O);
@@ -190,12 +204,32 @@ namespace {
virtual void EmitInstruction(const MachineInstr *MI);
bool runOnMachineFunction(MachineFunction &F);
-
+
virtual void EmitConstantPool() {} // we emit constant pools customly!
virtual void EmitFunctionEntryLabel();
void EmitStartOfAsmFile(Module &M);
void EmitEndOfAsmFile(Module &M);
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+ }
+
+ virtual unsigned getISAEncoding() {
+ // ARM/Darwin adds ISA to the DWARF info for each function.
+ if (!Subtarget->isTargetDarwin())
+ return 0;
+ return Subtarget->isThumb() ?
+ llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+ }
+
MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
const MachineBasicBlock *MBB) const;
MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
@@ -208,7 +242,7 @@ namespace {
EmitMachineConstantPoolValue(MCPV, OS);
OutStreamer.EmitRawText(OS.str());
}
-
+
void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV,
raw_ostream &O) {
switch (TM.getTargetData()->getTypeAllocSize(MCPV->getType())) {
@@ -234,7 +268,7 @@ namespace {
// FIXME: Remove this when Darwin transition to @GOT like syntax.
MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
O << *Sym;
-
+
MachineModuleInfoMachO &MMIMachO =
MMI->getObjFileInfo<MachineModuleInfoMachO>();
MachineModuleInfoImpl::StubValueTy &StubSym =
@@ -278,7 +312,7 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer.EmitRawText(OS.str());
}
}
-
+
OutStreamer.EmitLabel(CurrentFnSym);
}
@@ -358,7 +392,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
case MachineOperand::MO_ExternalSymbol: {
bool isCallOp = Modifier && !strcmp(Modifier, "call");
O << *GetExternalSymbolSymbol(MO.getSymbolName());
-
+
if (isCallOp && Subtarget->isTargetELF() &&
TM.getRelocationModel() == Reloc::PIC_)
O << "(PLT)";
@@ -438,15 +472,13 @@ void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op,
O << getRegisterName(MO1.getReg());
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()))
- << " ";
-
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
if (MO2.getReg()) {
- O << getRegisterName(MO2.getReg());
+ O << ' ' << getRegisterName(MO2.getReg());
assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
- } else {
- O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ } else if (ShOpc != ARM_AM::rrx) {
+ O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
}
}
@@ -575,16 +607,6 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
- if (Modifier && strcmp(Modifier, "submode") == 0) {
- ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
- O << ARM_AM::getAMSubModeStr(Mode);
- return;
- } else if (Modifier && strcmp(Modifier, "base") == 0) {
- // Used for FSTM{D|S} and LSTM{D|S} operations.
- O << getRegisterName(MO1.getReg());
- return;
- }
-
O << "[" << getRegisterName(MO1.getReg());
if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
@@ -641,6 +663,32 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op,
O << "#" << lsb << ", #" << width;
}
+void
+ARMAsmPrinter::printMemBOption(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMAsmPrinter::printShiftImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ switch (Opc) {
+ case ARM_AM::no_shift:
+ return;
+ case ARM_AM::lsl:
+ O << ", lsl #";
+ break;
+ case ARM_AM::asr:
+ O << ", asr #";
+ break;
+ default:
+ assert(0 && "unexpected shift opcode for shift immediate operand");
+ }
+ O << ARM_AM::getSORegOffset(ShiftOp);
+}
+
//===--------------------------------------------------------------------===//
void ARMAsmPrinter::printThumbS4ImmOperand(const MachineInstr *MI, int Op,
@@ -737,12 +785,11 @@ void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum,
O << getRegisterName(Reg);
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
- << " ";
-
assert(MO2.isImm() && "Not a valid t2_so_reg value!");
- O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc != ARM_AM::rrx)
+ O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
}
void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI,
@@ -916,12 +963,12 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum,
const MachineOperand &MO1 = MI->getOperand(OpNum);
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
-
+
unsigned JTI = MO1.getIndex();
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
// Can't use EmitLabel until instprinter happens, label comes out in the wrong
// order.
- O << *JTISymbol << ":\n";
+ O << "\n" << *JTISymbol << ":\n";
const char *JTEntryDirective = MAI->getData32bitsDirective();
@@ -958,12 +1005,12 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum,
const MachineOperand &MO1 = MI->getOperand(OpNum);
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
unsigned JTI = MO1.getIndex();
-
+
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
-
+
// Can't use EmitLabel until instprinter happens, label comes out in the wrong
// order.
- O << *JTISymbol << ":\n";
+ O << "\n" << *JTISymbol << ":\n";
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -980,7 +1027,7 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum,
O << MAI->getData8bitsDirective();
else if (HalfWordOffset)
O << MAI->getData16bitsDirective();
-
+
if (ByteOffset || HalfWordOffset)
O << '(' << *MBB->getSymbol() << "-" << *JTISymbol << ")/2";
else
@@ -1086,10 +1133,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
printInstructionThroughMCStreamer(MI);
return;
}
-
+
if (MI->getOpcode() == ARM::CONSTPOOL_ENTRY)
EmitAlignment(2);
-
+
SmallString<128> Str;
raw_svector_ostream OS(Str);
if (MI->getOpcode() == ARM::DBG_VALUE) {
@@ -1112,7 +1159,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
printInstruction(MI, OS);
OutStreamer.EmitRawText(OS.str());
-
+
// Make sure the instruction that follows TBB is 2-byte aligned.
// FIXME: Constant island pass should insert an "ALIGN" instruction instead.
if (MI->getOpcode() == ARM::t2TBB)
@@ -1129,7 +1176,7 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
// avoid out-of-range branches that are due a fundamental limitation of
// the way symbol offsets are encoded with the current Darwin ARM
// relocations.
- const TargetLoweringObjectFileMachO &TLOFMacho =
+ const TargetLoweringObjectFileMachO &TLOFMacho =
static_cast<const TargetLoweringObjectFileMachO &>(
getObjFileLowering());
OutStreamer.SwitchSection(TLOFMacho.getTextSection());
@@ -1148,6 +1195,12 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
16, SectionKind::getText());
OutStreamer.SwitchSection(sect);
}
+ const MCSection *StaticInitSect =
+ OutContext.getMachOSection("__TEXT", "__StaticInit",
+ MCSectionMachO::S_REGULAR |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ OutStreamer.SwitchSection(StaticInitSect);
}
}
@@ -1173,8 +1226,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
OutStreamer.EmitRawText("\t.eabi_attribute " +
Twine(ARMBuildAttrs::ABI_FP_exceptions) + ", 1");
}
-
- if (FiniteOnlyFPMath())
+
+ if (NoInfsFPMath && NoNaNsFPMath)
OutStreamer.EmitRawText("\t.eabi_attribute " +
Twine(ARMBuildAttrs::ABI_FP_number_model)+ ", 1");
else
@@ -1280,7 +1333,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
// LPC0:
// add r0, pc, r0
// This adds the address of LPC0 to r0.
-
+
// Emit the label.
// FIXME: MOVE TO SHARED PLACE.
unsigned Id = (unsigned)MI->getOperand(2).getImm();
@@ -1288,8 +1341,8 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)
+ "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id));
OutStreamer.EmitLabel(Label);
-
-
+
+
// Form and emit tha dd.
MCInst AddInst;
AddInst.setOpcode(ARM::ADDrr);
@@ -1315,7 +1368,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
else
EmitGlobalConstant(MCPE.Val.ConstVal);
-
+
return;
}
case ARM::MOVi2pieces: { // FIXME: Remove asmstring from td file.
@@ -1325,13 +1378,13 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
-
+
{
MCInst TmpInst;
TmpInst.setOpcode(ARM::MOVi);
TmpInst.addOperand(MCOperand::CreateReg(DstReg));
TmpInst.addOperand(MCOperand::CreateImm(SOImmValV1));
-
+
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
@@ -1349,11 +1402,11 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+
TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out
OutStreamer.EmitInstruction(TmpInst);
}
- return;
+ return;
}
case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file.
// This is a hack that lowers as a two instruction sequence.
@@ -1384,32 +1437,32 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
TmpInst.setOpcode(ARM::MOVi16);
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
TmpInst.addOperand(V1); // lower16(imm)
-
+
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+
OutStreamer.EmitInstruction(TmpInst);
}
-
+
{
MCInst TmpInst;
TmpInst.setOpcode(ARM::MOVTi16);
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg
TmpInst.addOperand(V2); // upper16(imm)
-
+
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+
OutStreamer.EmitInstruction(TmpInst);
}
-
+
return;
}
}
-
+
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
OutStreamer.EmitInstruction(TmpInst);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 49c16f3e0720..3a8bebe0dd24 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -15,9 +15,9 @@
#include "ARM.h"
#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
-#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
+#include "ARMGenInstrInfo.inc"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/GlobalValue.h"
@@ -501,7 +501,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
llvm_unreachable("Unknown or unset size field for instr!");
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::DBG_VALUE:
return 0;
@@ -573,48 +573,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 0; // Not reached
}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-///
-bool
-ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
- switch (MI.getOpcode()) {
- default: break;
- case ARM::VMOVS:
- case ARM::VMOVD:
- case ARM::VMOVDneon:
- case ARM::VMOVQ:
- case ARM::VMOVQQ : {
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
- case ARM::MOVr:
- case ARM::MOVr_TC:
- case ARM::tMOVr:
- case ARM::tMOVgpr2tgpr:
- case ARM::tMOVtgpr2gpr:
- case ARM::tMOVgpr2gpr:
- case ARM::t2MOVr: {
- assert(MI.getDesc().getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "Invalid ARM MOV instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
- }
-
- return false;
-}
-
unsigned
ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
@@ -763,8 +721,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Align);
// tGPR is used sometimes in ARM instructions that need to avoid using
- // certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass)
+ // certain registers. Just treat it as GPR here. Likewise, rGPR.
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+ || RC == ARM::rGPRRegisterClass)
RC = ARM::GPRRegisterClass;
switch (RC->getID()) {
@@ -798,7 +757,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addMemOperand(MMO));
}
break;
@@ -818,7 +777,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
@@ -830,7 +789,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
@@ -865,7 +824,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// tGPR is used sometimes in ARM instructions that need to avoid using
// certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass)
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+ || RC == ARM::rGPRRegisterClass)
RC = ARM::GPRRegisterClass;
switch (RC->getID()) {
@@ -893,7 +853,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg)
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addMemOperand(MMO));
}
break;
@@ -910,7 +870,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
@@ -922,7 +882,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
@@ -963,6 +923,11 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
unsigned PCLabelId = AFI->createConstPoolEntryUId();
ARMConstantPoolValue *NewCPV = 0;
+ // FIXME: The below assumes PIC relocation model and that the function
+ // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
+ // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
+ // instructions, so that's probably OK, but is PIC always correct when
+ // we get here?
if (ACPV->isGlobalValue())
NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
ARMCP::CPValue, 4);
@@ -972,6 +937,9 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
else if (ACPV->isBlockAddress())
NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
ARMCP::CPBlockAddress, 4);
+ else if (ACPV->isLSDA())
+ NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId,
+ ARMCP::CPLSDA, 4);
else
llvm_unreachable("Unexpected ARM constantpool value type!!");
CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
@@ -1393,3 +1361,63 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
Offset = (isSub) ? -Offset : Offset;
return Offset == 0;
}
+
+bool ARMBaseInstrInfo::
+AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::CMPri:
+ case ARM::CMPzri:
+ case ARM::t2CMPri:
+ case ARM::t2CMPzri:
+ SrcReg = MI->getOperand(0).getReg();
+ CmpValue = MI->getOperand(1).getImm();
+ return true;
+ }
+
+ return false;
+}
+
+/// ConvertToSetZeroFlag - Convert the instruction to set the "zero" flag so
+/// that we can remove a "comparison with zero".
+bool ARMBaseInstrInfo::
+ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const {
+ // Conservatively refuse to convert an instruction which isn't in the same BB
+ // as the comparison.
+ if (MI->getParent() != CmpInstr->getParent())
+ return false;
+
+ // Check that CPSR isn't set between the comparison instruction and the one we
+ // want to change.
+ MachineBasicBlock::const_iterator I = CmpInstr, E = MI;
+ --I;
+ for (; I != E; --I) {
+ const MachineInstr &Instr = *I;
+
+ for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (!MO.isReg() || !MO.isDef()) continue;
+
+ // This instruction modifies CPSR before the one we want to change. We
+ // can't do this transformation.
+ if (MO.getReg() == ARM::CPSR)
+ return false;
+ }
+ }
+
+ // Set the "zero" bit in CPSR.
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::ADDri:
+ case ARM::SUBri:
+ case ARM::t2ADDri:
+ case ARM::t2SUBri:
+ MI->RemoveOperand(5);
+ MachineInstrBuilder(MI)
+ .addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
+ CmpInstr->eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 89a2db74a75e..b4f4a33a70ad 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -15,11 +15,12 @@
#define ARMBASEINSTRUCTIONINFO_H
#include "ARM.h"
-#include "ARMRegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseRegisterInfo;
/// ARMII - This namespace holds all of the target specific flags that
/// instruction info tracks.
@@ -97,44 +98,45 @@ namespace ARMII {
// Miscellaneous arithmetic instructions
ArithMiscFrm = 12 << FormShift,
+ SatFrm = 13 << FormShift,
// Extend instructions
- ExtFrm = 13 << FormShift,
+ ExtFrm = 14 << FormShift,
// VFP formats
- VFPUnaryFrm = 14 << FormShift,
- VFPBinaryFrm = 15 << FormShift,
- VFPConv1Frm = 16 << FormShift,
- VFPConv2Frm = 17 << FormShift,
- VFPConv3Frm = 18 << FormShift,
- VFPConv4Frm = 19 << FormShift,
- VFPConv5Frm = 20 << FormShift,
- VFPLdStFrm = 21 << FormShift,
- VFPLdStMulFrm = 22 << FormShift,
- VFPMiscFrm = 23 << FormShift,
+ VFPUnaryFrm = 15 << FormShift,
+ VFPBinaryFrm = 16 << FormShift,
+ VFPConv1Frm = 17 << FormShift,
+ VFPConv2Frm = 18 << FormShift,
+ VFPConv3Frm = 19 << FormShift,
+ VFPConv4Frm = 20 << FormShift,
+ VFPConv5Frm = 21 << FormShift,
+ VFPLdStFrm = 22 << FormShift,
+ VFPLdStMulFrm = 23 << FormShift,
+ VFPMiscFrm = 24 << FormShift,
// Thumb format
- ThumbFrm = 24 << FormShift,
+ ThumbFrm = 25 << FormShift,
// Miscelleaneous format
- MiscFrm = 25 << FormShift,
+ MiscFrm = 26 << FormShift,
// NEON formats
- NGetLnFrm = 26 << FormShift,
- NSetLnFrm = 27 << FormShift,
- NDupFrm = 28 << FormShift,
- NLdStFrm = 29 << FormShift,
- N1RegModImmFrm= 30 << FormShift,
- N2RegFrm = 31 << FormShift,
- NVCVTFrm = 32 << FormShift,
- NVDupLnFrm = 33 << FormShift,
- N2RegVShLFrm = 34 << FormShift,
- N2RegVShRFrm = 35 << FormShift,
- N3RegFrm = 36 << FormShift,
- N3RegVShFrm = 37 << FormShift,
- NVExtFrm = 38 << FormShift,
- NVMulSLFrm = 39 << FormShift,
- NVTBLFrm = 40 << FormShift,
+ NGetLnFrm = 27 << FormShift,
+ NSetLnFrm = 28 << FormShift,
+ NDupFrm = 29 << FormShift,
+ NLdStFrm = 30 << FormShift,
+ N1RegModImmFrm= 31 << FormShift,
+ N2RegFrm = 32 << FormShift,
+ NVCVTFrm = 33 << FormShift,
+ NVDupLnFrm = 34 << FormShift,
+ N2RegVShLFrm = 35 << FormShift,
+ N2RegVShRFrm = 36 << FormShift,
+ N3RegFrm = 37 << FormShift,
+ N3RegVShFrm = 38 << FormShift,
+ NVExtFrm = 39 << FormShift,
+ NVMulSLFrm = 40 << FormShift,
+ NVTBLFrm = 41 << FormShift,
//===------------------------------------------------------------------===//
// Misc flags.
@@ -198,7 +200,7 @@ namespace ARMII {
}
class ARMBaseInstrInfo : public TargetInstrInfoImpl {
- const ARMSubtarget& Subtarget;
+ const ARMSubtarget &Subtarget;
protected:
// Can be only subclassed.
explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
@@ -223,7 +225,7 @@ public:
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
+ bool AllowModify = false) const;
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
@@ -262,12 +264,6 @@ public:
///
virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
@@ -341,6 +337,17 @@ public:
unsigned NumInstrs) const {
return NumInstrs && NumInstrs == 1;
}
+
+ /// AnalyzeCompare - For a comparison instruction, return the source register
+ /// in SrcReg and the value it compares against in CmpValue. Return true if
+ /// the comparison instruction can be analyzed.
+ virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ int &CmpValue) const;
+
+ /// ConvertToSetZeroFlag - Convert the instruction to set the zero flag so
+ /// that we can remove a "comparison with zero".
+ virtual bool ConvertToSetZeroFlag(MachineInstr *Instr,
+ MachineInstr *CmpInstr) const;
};
static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 182bd9937145..eceafad63f17 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -40,13 +40,20 @@
#include "llvm/Support/CommandLine.h"
namespace llvm {
-cl::opt<bool>
-ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true),
- cl::desc("Reuse repeated frame index values"));
+static cl::opt<bool>
+ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
+ cl::desc("Force use of virtual base registers for stack load/store"));
+static cl::opt<bool>
+EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
+ cl::desc("Enable pre-regalloc stack frame index allocation"));
}
using namespace llvm;
+static cl::opt<bool>
+EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
+ cl::desc("Enable use of a base pointer for complex stack frames"));
+
unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
bool *isSPVFP) {
if (isSPVFP)
@@ -143,7 +150,8 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &sti)
: ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
TII(tii), STI(sti),
- FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
+ FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
+ BasePtr(ARM::R6) {
}
const unsigned*
@@ -176,8 +184,11 @@ getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(ARM::SP);
Reserved.set(ARM::PC);
- if (STI.isTargetDarwin() || hasFP(MF))
+ Reserved.set(ARM::FPSCR);
+ if (hasFP(MF))
Reserved.set(FramePtr);
+ if (hasBasePointer(MF))
+ Reserved.set(BasePtr);
// Some targets reserve R9.
if (STI.isR9Reserved())
Reserved.set(ARM::R9);
@@ -191,9 +202,13 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
case ARM::SP:
case ARM::PC:
return true;
+ case ARM::R6:
+ if (hasBasePointer(MF))
+ return true;
+ break;
case ARM::R7:
case ARM::R11:
- if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF)))
+ if (FramePtr == Reg && hasFP(MF))
return true;
break;
case ARM::R9:
@@ -510,7 +525,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
- if (!STI.isTargetDarwin() && !hasFP(MF)) {
+ if (!hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPREven1,
GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
@@ -539,7 +554,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
- if (!STI.isTargetDarwin() && !hasFP(MF)) {
+ if (!hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPROdd1,
GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
@@ -609,30 +624,68 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
/// or if frame pointer elimination is disabled.
///
bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ if (STI.isTargetDarwin())
+ return true;
+
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return ((DisableFramePointerElim(MF) && MFI->adjustsStack())||
+ // Always eliminate non-leaf frame pointers.
+ return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
}
+bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ if (!EnableBasePointer)
+ return false;
+
+ if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+ return true;
+
+ // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
+ // negative range for ldr/str (255), and thumb1 is positive offsets only.
+ // It's going to be better to use the SP or Base Pointer instead. When there
+ // are variable sized objects, we can't reference off of the SP, so we
+ // reserve a Base Pointer.
+ if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) {
+ // Conservatively estimate whether the negative offset from the frame
+ // pointer will be sufficient to reach. If a function has a smallish
+ // frame, it's less likely to have lots of spills and callee saved
+ // space, so it's all more likely to be within range of the frame pointer.
+ // If it's wrong, the scavenger will still enable access to work, it just
+ // won't be optimal.
+ if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128)
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- return (RealignStack &&
- !AFI->isThumb1OnlyFunction() &&
- !MFI->hasVarSizedObjects());
+ // We can't realign the stack if:
+ // 1. Dynamic stack realignment is explicitly disabled,
+ // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
+ // 3. There are VLAs in the function and the base pointer is disabled.
+ return (RealignStack && !AFI->isThumb1OnlyFunction() &&
+ (!MFI->hasVarSizedObjects() || EnableBasePointer));
}
bool ARMBaseRegisterInfo::
needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const Function *F = MF.getFunction();
unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
- return (RealignStack &&
- !AFI->isThumb1OnlyFunction() &&
- (MFI->getMaxAlignment() > StackAlign) &&
- !MFI->hasVarSizedObjects());
+ bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) ||
+ F->hasFnAttr(Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
}
bool ARMBaseRegisterInfo::
@@ -668,6 +721,7 @@ static unsigned estimateStackSize(MachineFunction &MF) {
/// instructions will require a scratch register during their expansion later.
unsigned
ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned Limit = (1 << 12) - 1;
for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
@@ -693,7 +747,10 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
Limit = std::min(Limit, ((1U << 8) - 1) * 4);
break;
case ARMII::AddrModeT2_i12:
- if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1);
+ // i12 supports only positive offset so these will be converted to
+ // i8 opcodes. See llvm::rewriteT2FrameIndex.
+ if (hasFP(MF) && AFI->hasStackFrame())
+ Limit = std::min(Limit, (1U << 8) - 1);
break;
case ARMII::AddrMode6:
// Addressing mode 6 (load/store) instructions can't encode an
@@ -710,6 +767,19 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
return Limit;
}
+static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
+ const ARMBaseInstrInfo &TII) {
+ unsigned FnSize = 0;
+ for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+ I != E; ++I)
+ FnSize += TII.GetInstSizeInBytes(I);
+ }
+ return FnSize;
+}
+
void
ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
@@ -737,6 +807,10 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0)
MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ // Spill the BasePtr if it's used.
+ if (hasBasePointer(MF))
+ MF.getRegInfo().setPhysRegUsed(BasePtr);
+
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
const unsigned *CSRegs = getCalleeSavedRegs();
@@ -807,7 +881,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
bool ForceLRSpill = false;
if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
- unsigned FnSize = TII.GetFunctionSizeInBytes(MF);
+ unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
// Force LR to be spilled if the Thumb function size is > 2048. This enables
// use of BL to implement far jump. If it turns out that it's not needed
// then the branch fix up path will undo it.
@@ -824,13 +898,19 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// slot of the previous FP. Also, if we have variable sized objects in the
// function, stack slot references will often be negative, and some of
// our instructions are positive-offset only, so conservatively consider
- // that case to want a spill slot (or register) as well.
+ // that case to want a spill slot (or register) as well. Similarly, if
+ // the function adjusts the stack pointer during execution and the
+ // adjustments aren't already part of our stack size estimate, our offset
+ // calculations may be off, so be conservative.
// FIXME: We could add logic to be more precise about negative offsets
// and which instructions will need a scratch register for them. Is it
// worth the effort and added fragility?
bool BigStack =
- (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >=
- estimateRSStackSizeLimit(MF))) || MFI->hasVarSizedObjects();
+ (RS &&
+ (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ estimateRSStackSizeLimit(MF)))
+ || MFI->hasVarSizedObjects()
+ || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
bool ExtraCSSpill = false;
if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) {
@@ -848,9 +928,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
ExtraCSSpill = true;
}
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ if (hasFP(MF)) {
MF.getRegInfo().setPhysRegUsed(FramePtr);
NumGPRSpills++;
}
@@ -941,55 +1019,88 @@ unsigned ARMBaseRegisterInfo::getRARegister() const {
return ARM::LR;
}
-unsigned
+unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- if (STI.isTargetDarwin() || hasFP(MF))
+ if (hasFP(MF))
return FramePtr;
return ARM::SP;
}
+// Provide a base+offset reference to an FI slot for debug info. It's the
+// same as what we use for resolving the code-gen references for now.
+// FIXME: This can go wrong when references are SP-relative and simple call
+// frames aren't used.
int
ARMBaseRegisterInfo::getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const {
+ return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
+}
+
+int
+ARMBaseRegisterInfo::ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg,
+ int SPAdj) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+ int FPOffset = Offset - AFI->getFramePtrSpillOffset();
bool isFixed = MFI->isFixedObjectIndex(FI);
FrameReg = ARM::SP;
+ Offset += SPAdj;
if (AFI->isGPRCalleeSavedArea1Frame(FI))
- Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ return Offset - AFI->getGPRCalleeSavedArea1Offset();
else if (AFI->isGPRCalleeSavedArea2Frame(FI))
- Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ return Offset - AFI->getGPRCalleeSavedArea2Offset();
else if (AFI->isDPRCalleeSavedAreaFrame(FI))
- Offset -= AFI->getDPRCalleeSavedAreaOffset();
- else if (needsStackRealignment(MF)) {
- // When dynamically realigning the stack, use the frame pointer for
- // parameters, and the stack pointer for locals.
+ return Offset - AFI->getDPRCalleeSavedAreaOffset();
+
+ // When dynamically realigning the stack, use the frame pointer for
+ // parameters, and the stack/base pointer for locals.
+ if (needsStackRealignment(MF)) {
assert (hasFP(MF) && "dynamic stack realignment without a FP!");
if (isFixed) {
FrameReg = getFrameRegister(MF);
- Offset -= AFI->getFramePtrSpillOffset();
+ Offset = FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(hasBasePointer(MF) &&
+ "VLAs and dynamic stack alignment, but missing base pointer!");
+ FrameReg = BasePtr;
}
- } else if (hasFP(MF) && AFI->hasStackFrame()) {
- if (isFixed || MFI->hasVarSizedObjects()) {
- // Use frame pointer to reference fixed objects unless this is a
- // frameless function.
+ return Offset;
+ }
+
+ // If there is a frame pointer, use it when we can.
+ if (hasFP(MF) && AFI->hasStackFrame()) {
+ // Use frame pointer to reference fixed objects. Use it for locals if
+ // there are VLAs (and thus the SP isn't reliable as a base).
+ if (isFixed || (MFI->hasVarSizedObjects() && !hasBasePointer(MF))) {
FrameReg = getFrameRegister(MF);
- Offset -= AFI->getFramePtrSpillOffset();
+ return FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(hasBasePointer(MF) && "missing base pointer!");
+ // Use the base register since we have it.
+ FrameReg = BasePtr;
} else if (AFI->isThumb2Function()) {
- // In Thumb2 mode, the negative offset is very limited.
- int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+ // In Thumb2 mode, the negative offset is very limited. Try to avoid
+ // out of range references.
if (FPOffset >= -255 && FPOffset < 0) {
FrameReg = getFrameRegister(MF);
- Offset = FPOffset;
+ return FPOffset;
}
+ } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
+ // Otherwise, use SP or FP, whichever is closer to the stack slot.
+ FrameReg = getFrameRegister(MF);
+ return FPOffset;
}
}
+ // Use the base pointer if we have one.
+ if (hasBasePointer(MF))
+ FrameReg = BasePtr;
return Offset;
}
-
int
ARMBaseRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
int FI) const {
@@ -1024,7 +1135,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
case ARM::R5:
return ARM::R4;
case ARM::R7:
- return isReservedReg(MF, ARM::R7) ? 0 : ARM::R6;
+ return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ ? 0 : ARM::R6;
case ARM::R9:
return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
case ARM::R11:
@@ -1113,7 +1225,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
case ARM::R4:
return ARM::R5;
case ARM::R6:
- return isReservedReg(MF, ARM::R7) ? 0 : ARM::R7;
+ return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ ? 0 : ARM::R7;
case ARM::R8:
return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
case ARM::R10:
@@ -1220,13 +1333,18 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const {
return true;
}
+bool ARMBaseRegisterInfo::
+requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+ return EnableLocalStackAlloc;
+}
+
// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
// not required, we reserve argument space for call sites in the function
// immediately on entry to the current function. This eliminates the need for
// add/sub sp brackets around call sites. Returns true if the call frame is
// included as part of the stack frame.
bool ARMBaseRegisterInfo::
-hasReservedCallFrame(MachineFunction &MF) const {
+hasReservedCallFrame(const MachineFunction &MF) const {
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
// It's not always a good idea to include the call frame as part of the
@@ -1244,7 +1362,7 @@ hasReservedCallFrame(MachineFunction &MF) const {
// is not sufficient here since we still may reference some objects via SP
// even when FP is available in Thumb2 mode.
bool ARMBaseRegisterInfo::
-canSimplifyCallFramePseudos(MachineFunction &MF) const {
+canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
}
@@ -1305,10 +1423,258 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+int64_t ARMBaseRegisterInfo::
+getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
+ const TargetInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ int64_t InstrOffs = 0;;
+ int Scale = 1;
+ unsigned ImmIdx = 0;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign, consider the appropriate instruction
+ InstrOffs = MI->getOperand(Idx+1).getImm();
+ Scale = 1;
+ break;
+ case ARMII::AddrMode5: {
+ // VFP address mode.
+ const MachineOperand &OffOp = MI->getOperand(Idx+1);
+ InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+ if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ Scale = 4;
+ break;
+ }
+ case ARMII::AddrMode2: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrModeT1_s: {
+ ImmIdx = Idx+1;
+ InstrOffs = MI->getOperand(ImmIdx).getImm();
+ Scale = 4;
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ return InstrOffs * Scale;
+}
+
+/// needsFrameBaseReg - Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool ARMBaseRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+ for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) {
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // It's the load/store FI references that cause issues, as it can be difficult
+ // to materialize the offset if it won't fit in the literal field. Estimate
+ // based on the size of the local frame and some conservative assumptions
+ // about the rest of the stack frame (note, this is pre-regalloc, so
+ // we don't know everything for certain yet) whether this offset is likely
+ // to be out of range of the immediate. Return true if so.
+
+ // We only generate virtual base registers for loads and stores, so
+ // return false for everything else.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::LDR: case ARM::LDRH: case ARM::LDRB:
+ case ARM::STR: case ARM::STRH: case ARM::STRB:
+ case ARM::t2LDRi12: case ARM::t2LDRi8:
+ case ARM::t2STRi12: case ARM::t2STRi8:
+ case ARM::VLDRS: case ARM::VLDRD:
+ case ARM::VSTRS: case ARM::VSTRD:
+ case ARM::tSTRspi: case ARM::tLDRspi:
+ if (ForceAllBaseRegAlloc)
+ return true;
+ break;
+ default:
+ return false;
+ }
+
+ // Without a virtual base register, if the function has variable sized
+ // objects, all fixed-size local references will be via the frame pointer,
+ // Approximate the offset and see if it's legal for the instruction.
+ // Note that the incoming offset is based on the SP value at function entry,
+ // so it'll be negative.
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Estimate an offset from the frame pointer.
+ // Conservatively assume all callee-saved registers get pushed. R4-R6
+ // will be earlier than the FP, so we ignore those.
+ // R7, LR
+ int64_t FPOffset = Offset - 8;
+ // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15
+ if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction())
+ FPOffset -= 80;
+ // Estimate an offset from the stack pointer.
+ // The incoming offset is relating to the SP at the start of the function,
+ // but when we access the local it'll be relative to the SP after local
+ // allocation, so adjust our SP-relative offset by that allocation size.
+ Offset = -Offset;
+ Offset += MFI->getLocalFrameSize();
+ // Assume that we'll have at least some spill slots allocated.
+ // FIXME: This is a total SWAG number. We should run some statistics
+ // and pick a real one.
+ Offset += 128; // 128 bytes of spill slots
+
+ // If there is a frame pointer, try using it.
+ // The FP is only available if there is no dynamic realignment. We
+ // don't know for sure yet whether we'll need that, so we guess based
+ // on whether there are any local variables that would trigger it.
+ unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ if (hasFP(MF) &&
+ !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
+ if (isFrameOffsetLegal(MI, FPOffset))
+ return false;
+ }
+ // If we can reference via the stack pointer, try that.
+ // FIXME: This (and the code that resolves the references) can be improved
+ // to only disallow SP relative references in the live range of
+ // the VLA(s). In practice, it's unclear how much difference that
+ // would make, but it may be worth doing.
+ if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset))
+ return false;
+
+ // The offset likely isn't legal, we want to allocate a virtual base register.
+ return true;
+}
+
+/// materializeFrameBaseRegister - Insert defining instruction(s) for
+/// BaseReg to be a pointer to FrameIdx before insertion point I.
+void ARMBaseRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock::iterator I, unsigned BaseReg,
+ int FrameIdx, int64_t Offset) const {
+ ARMFunctionInfo *AFI =
+ I->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+ unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
+ (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+
+ MachineInstrBuilder MIB =
+ BuildMI(*I->getParent(), I, I->getDebugLoc(), TII.get(ADDriOpc), BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset);
+ if (!AFI->isThumb1OnlyFunction())
+ AddDefaultCC(AddDefaultPred(MIB));
+}
+
+void
+ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This resolveFrameIndex does not support Thumb1!");
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ if (!AFI->isThumbFunction())
+ Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
+ }
+ assert (Done && "Unable to resolve frame index!");
+}
+
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const {
+ const TargetInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ unsigned i = 0;
+
+ while (!MI->getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // AddrMode4 and AddrMode6 cannot handle any offset.
+ if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+ return Offset == 0;
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ bool isSigned = true;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign, consider the appropriate instruction
+ Scale = 1;
+ if (Offset < 0) {
+ NumBits = 8;
+ Offset = -Offset;
+ } else {
+ NumBits = 12;
+ }
+ break;
+ case ARMII::AddrMode5:
+ // VFP address mode.
+ NumBits = 8;
+ Scale = 4;
+ break;
+ case ARMII::AddrMode2:
+ NumBits = 12;
+ break;
+ case ARMII::AddrMode3:
+ NumBits = 8;
+ break;
+ case ARMII::AddrModeT1_s:
+ NumBits = 5;
+ Scale = 4;
+ isSigned = false;
+ break;
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ Offset += getFrameIndexInstrOffset(MI, i);
+ // Make sure the offset is encodable for instructions that scale the
+ // immediate.
+ if ((Offset & (Scale-1)) != 0)
+ return false;
+
+ if (isSigned && Offset < 0)
+ Offset = -Offset;
+
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale)
+ return true;
+
+ return false;
+}
+
+void
ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
unsigned i = 0;
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
@@ -1325,16 +1691,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FrameIndex = MI.getOperand(i).getIndex();
unsigned FrameReg;
- int Offset = getFrameIndexReference(MF, FrameIndex, FrameReg);
- if (FrameReg != ARM::SP)
- SPAdj = 0;
- Offset += SPAdj;
+ int Offset = ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
// Special handling of dbg_value instructions.
if (MI.isDebugValue()) {
MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
MI.getOperand(i+1).ChangeToImmediate(Offset);
- return 0;
+ return;
}
// Modify MI as necessary to handle as much of 'Offset' as possible
@@ -1346,7 +1709,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
}
if (Done)
- return 0;
+ return;
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above, handle the rest, providing a register that is
@@ -1366,10 +1729,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
else {
ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
- if (Value) {
- Value->first = FrameReg; // use the frame register as a kind indicator
- Value->second = Offset;
- }
if (!AFI->isThumbFunction())
emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
Offset, Pred, PredReg, TII);
@@ -1379,10 +1738,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset, Pred, PredReg, TII);
}
MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
- if (!ReuseFrameIndexVals)
- ScratchReg = 0;
}
- return ScratchReg;
}
/// Move iterator past the next bunch of callee save load / store ops for
@@ -1494,7 +1850,8 @@ emitPrologue(MachineFunction &MF) const {
// Otherwise, if this is not Darwin, all the callee-saved registers go
// into spill area 1, including the FP in R11. In either case, it is
// now safe to emit this assignment.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ bool HasFP = hasFP(MF);
+ if (HasFP) {
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
@@ -1513,7 +1870,7 @@ emitPrologue(MachineFunction &MF) const {
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
- if (STI.isTargetDarwin() || hasFP(MF))
+ if (HasFP)
AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
NumBytes);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
@@ -1525,18 +1882,22 @@ emitPrologue(MachineFunction &MF) const {
if (NumBytes) {
// Adjust SP after all the callee-save spills.
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+ if (HasFP)
+ AFI->setShouldRestoreSPFromFP(true);
}
if (STI.isTargetELF() && hasFP(MF)) {
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
+ AFI->setShouldRestoreSPFromFP(true);
}
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
- // If we need dynamic stack realignment, do it here.
+ // If we need dynamic stack realignment, do it here. Be paranoid and make
+ // sure if we also have VLAs, we have a base pointer for frame access.
if (needsStackRealignment(MF)) {
unsigned MaxAlign = MFI->getMaxAlignment();
assert (!AFI->isThumb1OnlyFunction());
@@ -1562,7 +1923,28 @@ emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
.addReg(ARM::R4, RegState::Kill);
}
+
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (hasBasePointer(MF)) {
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), BasePtr)
+ .addReg(ARM::SP)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr)
+ .addReg(ARM::SP);
}
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp.
+ if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
}
static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
@@ -1617,34 +1999,25 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize());
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- bool HasFP = hasFP(MF);
- if ((STI.isTargetDarwin() && NumBytes) || HasFP) {
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
- // Reset SP based on frame pointer only if the stack frame extends beyond
- // frame pointer stack slot or target is ELF and the function has FP.
- if (HasFP ||
- AFI->getGPRCalleeSavedArea2Size() ||
- AFI->getDPRCalleeSavedAreaSize() ||
- AFI->getDPRCalleeSavedAreaOffset()) {
- if (NumBytes) {
- if (isARM)
- emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
- else
- emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
- } else {
- // Thumb2 or ARM.
- if (isARM)
- BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
- .addReg(FramePtr)
- .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
- else
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
- .addReg(FramePtr);
- }
+ if (NumBytes) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ } else {
+ // Thumb2 or ARM.
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+ .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+ .addReg(FramePtr);
}
} else if (NumBytes)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
@@ -1670,7 +2043,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
// Jump to label or value in register.
if (RetOpcode == ARM::TCRETURNdi) {
- BuildMI(MBB, MBBI, dl,
+ BuildMI(MBB, MBBI, dl,
TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)).
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
JumpTarget.getTargetFlags());
@@ -1685,7 +2058,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
} else if (RetOpcode == ARM::TCRETURNriND) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
addReg(JumpTarget.getReg(), RegState::Kill);
- }
+ }
MachineInstr *NewMI = prior(MBBI);
for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index f7ee0d5cc66d..fa2eb6c10498 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -44,7 +44,7 @@ static inline bool isARMLowRegister(unsigned Reg) {
}
}
-struct ARMBaseRegisterInfo : public ARMGenRegisterInfo {
+class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
protected:
const ARMBaseInstrInfo &TII;
const ARMSubtarget &STI;
@@ -52,6 +52,11 @@ protected:
/// FramePtr - ARM physical register used as frame ptr.
unsigned FramePtr;
+ /// BasePtr - ARM physical register used as a base ptr in complex stack
+ /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
+ /// variable size stack objects.
+ unsigned BasePtr;
+
// Can be only subclassed.
explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &STI);
@@ -102,9 +107,18 @@ public:
MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const;
+ bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
bool needsStackRealignment(const MachineFunction &MF) const;
+ int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const;
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+ void materializeFrameBaseRegister(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
+ bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
bool cannotEliminateFrame(const MachineFunction &MF) const;
@@ -116,6 +130,8 @@ public:
unsigned getFrameRegister(const MachineFunction &MF) const;
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const;
+ int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg, int SPAdj) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
// Exception handling queries.
@@ -144,16 +160,17 @@ public:
virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
- virtual bool hasReservedCallFrame(MachineFunction &MF) const;
- virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const;
+ virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
+
+ virtual bool hasReservedCallFrame(const MachineFunction &MF) const;
+ virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
virtual void emitPrologue(MachineFunction &MF) const;
virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 8fdb07f81626..293e32aa5376 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -1,4 +1,4 @@
-//===- ARMCallingConv.td - Calling Conventions for ARM ----------*- C++ -*-===//
+//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -68,7 +68,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
"ArgFlags.getOrigAlign() != 8",
CCAssignToReg<[R0, R1, R2, R3]>>>,
- CCIfType<[i32], CCIfAlign<"8", CCAssignToStack<4, 8>>>,
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[f64], CCAssignToStack<8, 8>>,
CCIfType<[v2f64], CCAssignToStack<16, 8>>
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 7895cb071922..b1a702f90cfc 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -65,7 +65,7 @@ namespace {
static char ID;
public:
ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(&ID), JTI(0),
+ : MachineFunctionPass(ID), JTI(0),
II((const ARMInstrInfo *)tm.getInstrInfo()),
TD(tm.getTargetData()), TM(tm),
MCE(mce), MCPEs(0), MJTEs(0),
@@ -124,6 +124,8 @@ namespace {
void emitMiscArithInstruction(const MachineInstr &MI);
+ void emitSaturateInstruction(const MachineInstr &MI);
+
void emitBranchInstruction(const MachineInstr &MI);
void emitInlineJumpTable(unsigned JTIndex);
@@ -389,6 +391,9 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
case ARMII::ArithMiscFrm:
emitMiscArithInstruction(MI);
break;
+ case ARMII::SatFrm:
+ emitSaturateInstruction(MI);
+ break;
case ARMII::BrFrm:
emitBranchInstruction(MI);
break;
@@ -654,6 +659,19 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
switch (Opcode) {
default:
llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
+ case ARM::BX:
+ case ARM::BMOVPCRX:
+ case ARM::BXr9:
+ case ARM::BMOVPCRXr9: {
+ // First emit mov lr, pc
+ unsigned Binary = 0x01a0e00f;
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+ emitWordLE(Binary);
+
+ // and then emit the branch.
+ emitMiscBranchInstruction(MI);
+ break;
+ }
case TargetOpcode::INLINEASM: {
// We allow inline assembler nodes with empty bodies - they can
// implicitly define registers, which is ok for JIT.
@@ -662,7 +680,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
}
break;
}
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
MCE.emitLabel(MI.getOperand(0).getMCSymbol());
break;
@@ -1209,12 +1227,58 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
// Encode shift_imm.
unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
+ if (TID.Opcode == ARM::PKHTB) {
+ assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
+ if (ShiftAmt == 32)
+ ShiftAmt = 0;
+ }
assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
Binary |= ShiftAmt << ARMII::ShiftShift;
emitWordLE(Binary);
}
+void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGen.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode saturate bit position.
+ unsigned Pos = MI.getOperand(1).getImm();
+ if (TID.Opcode == ARM::SSAT || TID.Opcode == ARM::SSAT16)
+ Pos -= 1;
+ assert((Pos < 16 || (Pos < 32 &&
+ TID.Opcode != ARM::SSAT16 &&
+ TID.Opcode != ARM::USAT16)) &&
+ "saturate bit position out of range");
+ Binary |= Pos << 16;
+
+ // Encode Rm
+ Binary |= getMachineOpValue(MI, 2);
+
+ // Encode shift_imm.
+ if (TID.getNumOperands() == 4) {
+ unsigned ShiftOp = MI.getOperand(3).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ if (Opc == ARM_AM::asr)
+ Binary |= (1 << 6);
+ unsigned ShiftAmt = MI.getOperand(3).getImm();
+ if (ShiftAmt == 32 && Opc == ARM_AM::asr)
+ ShiftAmt = 0;
+ assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+ Binary |= ShiftAmt << ARMII::ShiftShift;
+ }
+
+ emitWordLE(Binary);
+}
+
void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
const TargetInstrDesc &TID = MI.getDesc();
@@ -1485,7 +1549,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
// Set addressing mode by modifying bits U(23) and P(24)
const MachineOperand &MO = MI.getOperand(OpIdx++);
- Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm()));
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
// Set bit W(21)
if (IsUpdating)
@@ -1494,7 +1558,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
// First register is encoded in Dd.
Binary |= encodeVFPRd(MI, OpIdx+2);
- // Number of registers are encoded in offset field.
+ // Count the number of registers.
unsigned NumRegs = 1;
for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 65a3da6f1617..60e923bd2c38 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -18,9 +18,9 @@
#include "ARMAddressingModes.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMInstrInfo.h"
+#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -165,7 +165,7 @@ namespace {
/// HasInlineAsm - True if the function contains inline assembly.
bool HasInlineAsm;
- const TargetInstrInfo *TII;
+ const ARMInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
bool isThumb;
@@ -173,7 +173,7 @@ namespace {
bool isThumb2;
public:
static char ID;
- ARMConstantIslands() : MachineFunctionPass(&ID) {}
+ ARMConstantIslands() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -272,7 +272,7 @@ FunctionPass *llvm::createARMConstantIslandPass() {
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
MachineConstantPool &MCP = *MF.getConstantPool();
- TII = MF.getTarget().getInstrInfo();
+ TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
AFI = MF.getInfo<ARMFunctionInfo>();
STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
@@ -323,6 +323,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// constant pool users.
InitialFunctionScan(MF, CPEMIs);
CPEMIs.clear();
+ DEBUG(dumpBBs());
+
/// Remove dead constant pool entries.
RemoveUnusedCPEntries();
@@ -355,7 +357,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
}
// Shrink 32-bit Thumb2 branch, load, and store instructions.
- if (isThumb2)
+ if (isThumb2 && !STI->prefers32BitThumb())
MadeChange |= OptimizeThumb2Instructions(MF);
// After a while, this might be made debug-only, but it is not expensive.
@@ -366,6 +368,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
MadeChange |= UndoLRSpillRestore();
+ DEBUG(errs() << '\n'; dumpBBs());
+
BBSizes.clear();
BBOffsets.clear();
WaterList.clear();
@@ -509,6 +513,10 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
case ARM::tBR_JTr:
// A Thumb1 table jump may involve padding; for the offsets to
// be right, functions containing these must be 4-byte aligned.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr + 2
+ // is aligned. That is held in Offset+MBBSize, which already has
+ // 2 added in for the size of the mov pc instruction.
MF.EnsureAlignment(2U);
if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
// FIXME: Add a pseudo ALIGN instruction instead.
@@ -768,28 +776,54 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
WaterList.insert(IP, OrigBB);
NewWaterList.insert(OrigBB);
- // Figure out how large the first NewMBB is. (It cannot
- // contain a constpool_entry or tablejump.)
- unsigned NewBBSize = 0;
- for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
- I != E; ++I)
- NewBBSize += TII->GetInstSizeInBytes(I);
-
unsigned OrigBBI = OrigBB->getNumber();
unsigned NewBBI = NewBB->getNumber();
- // Set the size of NewBB in BBSizes.
- BBSizes[NewBBI] = NewBBSize;
- // We removed instructions from UserMBB, subtract that off from its size.
- // Add 2 or 4 to the block to count the unconditional branch we added to it.
int delta = isThumb1 ? 2 : 4;
- BBSizes[OrigBBI] -= NewBBSize - delta;
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ unsigned OrigBBSize = 0;
+ for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
+ I != E; ++I)
+ OrigBBSize += TII->GetInstSizeInBytes(I);
+ BBSizes[OrigBBI] = OrigBBSize;
// ...and adjust BBOffsets for NewBB accordingly.
BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ unsigned NewBBSize = 0;
+ for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+ I != E; ++I)
+ NewBBSize += TII->GetInstSizeInBytes(I);
+ // Set the size of NewBB in BBSizes. It does not include any padding now.
+ BBSizes[NewBBI] = NewBBSize;
+
+ MachineInstr* ThumbJTMI = prior(NewBB->end());
+ if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+ // We've added another 2-byte instruction before this tablejump, which
+ // means we will always need padding if we didn't before, and vice versa.
+
+ // The original offset of the jump instruction was:
+ unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
+ if (OrigOffset%4 == 0) {
+ // We had padding before and now we don't. No net change in code size.
+ delta = 0;
+ } else {
+ // We didn't have padding before and now we do.
+ BBSizes[NewBBI] += 2;
+ delta = 4;
+ }
+ }
+
// All BBOffsets following these blocks must be modified.
- AdjustBBOffsetsAfter(NewBB, delta);
+ if (delta)
+ AdjustBBOffsetsAfter(NewBB, delta);
return NewBB;
}
@@ -915,6 +949,10 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
}
// Thumb1 jump tables require padding. They should be at the end;
// following unconditional branches are removed by AnalyzeBranch.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr
+ // is aligned; if it is, the offset of the jump table following the
+ // instruction will not be aligned, and we need padding.
MachineInstr *ThumbJTMI = prior(MBB->end());
if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
@@ -1143,11 +1181,13 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
MachineBasicBlock::iterator MI = UserMI;
++MI;
unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ MachineInstr *LastIT = 0;
for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
Offset < BaseInsertOffset;
Offset += TII->GetInstSizeInBytes(MI),
- MI = llvm::next(MI)) {
- if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) {
+ MI = llvm::next(MI)) {
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
CPUser &U = CPUsers[CPUIndex];
if (!OffsetIsInRange(Offset, EndInsertOffset,
U.MaxDisp, U.NegOk, U.IsSoImm)) {
@@ -1159,9 +1199,23 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
CPUIndex++;
}
+
+ // Remember the last IT instruction.
+ if (MI->getOpcode() == ARM::t2IT)
+ LastIT = MI;
}
+
DEBUG(errs() << "Split in middle of big block\n");
- NewMBB = SplitBlockBeforeInstr(prior(MI));
+ --MI;
+
+ // Avoid splitting an IT block.
+ if (LastIT) {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ if (CC != ARMCC::AL)
+ MI = LastIT;
+ }
+ NewMBB = SplitBlockBeforeInstr(MI);
}
}
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 9c62597b4323..fc2e3c3fadae 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -19,14 +19,21 @@
#include "ARMBaseInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
namespace {
class ARMExpandPseudo : public MachineFunctionPass {
+ // Constants for register spacing in NEON load/store instructions.
+ enum NEONRegSpacing {
+ SingleSpc,
+ EvenDblSpc,
+ OddDblSpc
+ };
+
public:
static char ID;
- ARMExpandPseudo() : MachineFunctionPass(&ID) {}
+ ARMExpandPseudo() : MachineFunctionPass(ID) {}
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -41,6 +48,10 @@ namespace {
void TransferImpOps(MachineInstr &OldMI,
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
bool ExpandMBB(MachineBasicBlock &MBB);
+ void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc,
+ bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
+ void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc,
+ bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
};
char ARMExpandPseudo::ID = 0;
}
@@ -63,6 +74,129 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
}
}
+/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VLD instructions with D register operands.
+void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool hasWriteBack,
+ NEONRegSpacing RegSpc, unsigned NumRegs) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ unsigned OpIdx = 0;
+
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ D2 = TRI->getSubReg(DstReg, ARM::dsub_2);
+ D3 = TRI->getSubReg(DstReg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(DstReg, ARM::dsub_2);
+ D2 = TRI->getSubReg(DstReg, ARM::dsub_4);
+ D3 = TRI->getSubReg(DstReg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing for VLD");
+ D0 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ D1 = TRI->getSubReg(DstReg, ARM::dsub_3);
+ D2 = TRI->getSubReg(DstReg, ARM::dsub_5);
+ D3 = TRI->getSubReg(DstReg, ARM::dsub_7);
+ }
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+
+ if (hasWriteBack) {
+ bool WBIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned WBReg = MI.getOperand(OpIdx++).getReg();
+ MIB.addReg(WBReg, RegState::Define | getDeadRegState(WBIsDead));
+ }
+ // Copy the addrmode6 operands.
+ bool AddrIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
+ MIB.addImm(MI.getOperand(OpIdx++).getImm());
+ if (hasWriteBack) {
+ // Copy the am6offset operand.
+ bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
+ }
+
+ MIB = AddDefaultPred(MIB);
+ TransferImpOps(MI, MIB, MIB);
+ // For an instruction writing the odd subregs, add an implicit use of the
+ // super-register because the even subregs were loaded separately.
+ if (RegSpc == OddDblSpc)
+ MIB.addReg(DstReg, RegState::Implicit);
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ MI.eraseFromParent();
+}
+
+/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VST instructions with D register operands.
+void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool hasWriteBack,
+ NEONRegSpacing RegSpc, unsigned NumRegs) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ unsigned OpIdx = 0;
+ if (hasWriteBack) {
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ MIB.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
+ }
+ // Copy the addrmode6 operands.
+ bool AddrIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
+ MIB.addImm(MI.getOperand(OpIdx++).getImm());
+ if (hasWriteBack) {
+ // Copy the am6offset operand.
+ bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
+ }
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx).getReg();
+ unsigned D0, D1, D2, D3;
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ D2 = TRI->getSubReg(SrcReg, ARM::dsub_2);
+ D3 = TRI->getSubReg(SrcReg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(SrcReg, ARM::dsub_2);
+ D2 = TRI->getSubReg(SrcReg, ARM::dsub_4);
+ D3 = TRI->getSubReg(SrcReg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing for VST");
+ D0 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ D1 = TRI->getSubReg(SrcReg, ARM::dsub_3);
+ D2 = TRI->getSubReg(SrcReg, ARM::dsub_5);
+ D3 = TRI->getSubReg(SrcReg, ARM::dsub_7);
+ }
+
+ MIB.addReg(D0).addReg(D1);
+ if (NumRegs > 2)
+ MIB.addReg(D2);
+ if (NumRegs > 3)
+ MIB.addReg(D3);
+ MIB = AddDefaultPred(MIB);
+ TransferImpOps(MI, MIB, MIB);
+ if (SrcIsKill)
+ // Add an implicit kill for the super-reg.
+ (*MIB).addRegisterKilled(SrcReg, TRI, true);
+ MI.eraseFromParent();
+}
+
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
@@ -71,9 +205,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineInstr &MI = *MBBI;
MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+ bool ModifiedOp = true;
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
- default: break;
+ default:
+ ModifiedOp = false;
+ break;
+
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
@@ -92,10 +230,10 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
.addOperand(MI.getOperand(2));
TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
- Modified = true;
break;
}
+ case ARM::MOVi32imm:
case ARM::t2MOVi32imm: {
unsigned PredReg = 0;
ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
@@ -104,9 +242,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
const MachineOperand &MO = MI.getOperand(1);
MachineInstrBuilder LO16, HI16;
- LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16),
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == ARM::MOVi32imm ?
+ ARM::MOVi16 : ARM::t2MOVi16),
DstReg);
- HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16))
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == ARM::MOVi32imm ?
+ ARM::MOVTi16 : ARM::t2MOVTi16))
.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
.addReg(DstReg);
@@ -128,7 +270,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
HI16.addImm(Pred).addReg(PredReg);
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
- Modified = true;
break;
}
@@ -155,9 +296,211 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
.addReg(OddSrc, getKillRegState(SrcIsKill)));
TransferImpOps(MI, Even, Odd);
MI.eraseFromParent();
- Modified = true;
}
+
+ case ARM::VLD1q8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break;
+ case ARM::VLD1q16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break;
+ case ARM::VLD1q32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break;
+ case ARM::VLD1q64Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break;
+ case ARM::VLD1q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break;
+ case ARM::VLD1q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break;
+ case ARM::VLD1q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break;
+ case ARM::VLD1q64Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break;
+
+ case ARM::VLD2d8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break;
+ case ARM::VLD2d16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break;
+ case ARM::VLD2d32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break;
+ case ARM::VLD2q8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break;
+ case ARM::VLD2q16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break;
+ case ARM::VLD2q32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break;
+ case ARM::VLD2d8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break;
+ case ARM::VLD2d16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break;
+ case ARM::VLD2d32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break;
+ case ARM::VLD2q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break;
+ case ARM::VLD2q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break;
+ case ARM::VLD2q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break;
+
+ case ARM::VLD3d8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break;
+ case ARM::VLD3d16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break;
+ case ARM::VLD3d32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break;
+ case ARM::VLD1d64TPseudo:
+ ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break;
+ case ARM::VLD3d8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD3d16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD3d32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD1d64TPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD3q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VLD3q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VLD3q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VLD3q8oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break;
+ case ARM::VLD3q16oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break;
+ case ARM::VLD3q32oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break;
+
+ case ARM::VLD4d8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break;
+ case ARM::VLD4d16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break;
+ case ARM::VLD4d32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break;
+ case ARM::VLD1d64QPseudo:
+ ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break;
+ case ARM::VLD4d8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD4d16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD4d32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD1d64QPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD4q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VLD4q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VLD4q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VLD4q8oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break;
+ case ARM::VLD4q16oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break;
+ case ARM::VLD4q32oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break;
+
+ case ARM::VST1q8Pseudo:
+ ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
+ case ARM::VST1q16Pseudo:
+ ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break;
+ case ARM::VST1q32Pseudo:
+ ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break;
+ case ARM::VST1q64Pseudo:
+ ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break;
+ case ARM::VST1q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q64Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break;
+
+ case ARM::VST2d8Pseudo:
+ ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break;
+ case ARM::VST2d16Pseudo:
+ ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break;
+ case ARM::VST2d32Pseudo:
+ ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break;
+ case ARM::VST2q8Pseudo:
+ ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break;
+ case ARM::VST2q16Pseudo:
+ ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break;
+ case ARM::VST2q32Pseudo:
+ ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break;
+ case ARM::VST2d8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2d16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2d32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break;
+ case ARM::VST2q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break;
+ case ARM::VST2q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break;
+
+ case ARM::VST3d8Pseudo:
+ ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break;
+ case ARM::VST3d16Pseudo:
+ ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break;
+ case ARM::VST3d32Pseudo:
+ ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break;
+ case ARM::VST1d64TPseudo:
+ ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break;
+ case ARM::VST3d8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break;
+ case ARM::VST3d16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break;
+ case ARM::VST3d32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break;
+ case ARM::VST1d64TPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break;
+ case ARM::VST3q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VST3q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VST3q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VST3q8oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break;
+ case ARM::VST3q16oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break;
+ case ARM::VST3q32oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break;
+
+ case ARM::VST4d8Pseudo:
+ ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break;
+ case ARM::VST4d16Pseudo:
+ ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break;
+ case ARM::VST4d32Pseudo:
+ ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break;
+ case ARM::VST1d64QPseudo:
+ ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break;
+ case ARM::VST4d8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break;
+ case ARM::VST4d16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break;
+ case ARM::VST4d32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break;
+ case ARM::VST1d64QPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break;
+ case ARM::VST4q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VST4q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VST4q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VST4q8oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break;
+ case ARM::VST4q16oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break;
+ case ARM::VST4q32oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break;
}
+
+ if (ModifiedOp)
+ Modified = true;
MBBI = NMBBI;
}
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
new file mode 100644
index 000000000000..4892eae95833
--- /dev/null
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -0,0 +1,665 @@
+//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// ARMGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMTargetMachine.h"
+#include "ARMSubtarget.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARMFastISel("arm-fast-isel",
+ cl::desc("Turn on experimental ARM fast-isel support"),
+ cl::init(false), cl::Hidden);
+
+namespace {
+
+class ARMFastISel : public FastISel {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const TargetLowering &TLI;
+ const ARMFunctionInfo *AFI;
+
+ // Convenience variable to avoid checking all the time.
+ bool isThumb;
+
+ public:
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
+ : FastISel(funcInfo),
+ TM(funcInfo.MF->getTarget()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
+ isThumb = AFI->isThumbFunction();
+ }
+
+ // Code from FastISel.cpp.
+ virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC);
+ virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm);
+ virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx);
+
+ // Backend specific FastISel code.
+ virtual bool TargetSelectInstruction(const Instruction *I);
+ virtual unsigned TargetMaterializeConstant(const Constant *C);
+
+ #include "ARMGenFastISel.inc"
+
+ // Instruction selection routines.
+ virtual bool ARMSelectLoad(const Instruction *I);
+ virtual bool ARMSelectStore(const Instruction *I);
+ virtual bool ARMSelectBranch(const Instruction *I);
+
+ // Utility routines.
+ private:
+ bool isTypeLegal(const Type *Ty, EVT &VT);
+ bool isLoadTypeLegal(const Type *Ty, EVT &VT);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset);
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset);
+ bool ARMLoadAlloca(const Instruction *I);
+ bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg);
+ bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset);
+ bool ARMMaterializeConstant(const ConstantInt *Val, unsigned &Reg);
+
+ bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
+ const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+};
+
+} // end anonymous namespace
+
+// #include "ARMGenCallingConv.inc"
+
+// DefinesOptionalPredicate - This is different from DefinesPredicate in that
+// we don't care about implicit defs here, just places we'll need to add a
+// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
+bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.hasOptionalDef())
+ return false;
+
+ // Look to see if our OptionalDef is defining CPSR or CCR.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ if (MO.getReg() == ARM::CPSR)
+ *CPSR = true;
+ }
+ return true;
+}
+
+// If the machine is predicable go ahead and add the predicate operands, if
+// it needs default CC operands add those.
+const MachineInstrBuilder &
+ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
+ MachineInstr *MI = &*MIB;
+
+ // Do we use a predicate?
+ if (TII.isPredicable(MI))
+ AddDefaultPred(MIB);
+
+ // Do we optionally set a predicate? Preds is size > 0 iff the predicate
+ // defines CPSR. All other OptionalDefines in ARM are the CCR register.
+ bool CPSR = false;
+ if (DefinesOptionalPredicate(MI, &CPSR)) {
+ if (CPSR)
+ AddDefaultT1CC(MIB);
+ else
+ AddDefaultCC(MIB);
+ }
+ return MIB;
+}
+
+unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx));
+ return ResultReg;
+}
+
+unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+ EVT VT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!VT.isSimple()) return 0;
+
+ // TODO: This should be safe for fp because they're just bits from the
+ // Constant.
+ // TODO: Theoretically we could materialize fp constants with instructions
+ // from VFP3.
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(C->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(C, Align);
+
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ // Different addressing modes between ARM/Thumb2 for constant pool loads.
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRpci))
+ .addReg(DestReg).addConstantPoolIndex(Idx));
+ else
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::LDRcp))
+ .addReg(DestReg).addConstantPoolIndex(Idx)
+ .addReg(0).addImm(0));
+
+ return DestReg;
+}
+
+bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) {
+ VT = TLI.getValueType(Ty, true);
+
+ // Only handle simple types.
+ if (VT == MVT::Other || !VT.isSimple()) return false;
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16)
+ return true;
+
+ return false;
+}
+
+// Computes the Reg+Offset to get to an object.
+bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg,
+ int &Offset) {
+ // Some boilerplate from the X86 FastISel.
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks; it's possible we haven't
+ // visited them yet, so the instructions may not yet be assigned
+ // virtual registers.
+ if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
+ return false;
+
+ Opcode = I->getOpcode();
+ U = I;
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
+ switch (Opcode) {
+ default:
+ //errs() << "Failing Opcode is: " << *Op1 << "\n";
+ break;
+ case Instruction::Alloca: {
+ assert(false && "Alloca should have been handled earlier!");
+ return false;
+ }
+ }
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
+ //errs() << "Failing GV is: " << GV << "\n";
+ (void)GV;
+ return false;
+ }
+
+ // Try to get this in a register if nothing else has worked.
+ Reg = getRegForValue(Obj);
+ if (Reg == 0) return false;
+
+ // Since the offset may be too large for the load instruction
+ // get the reg+offset into a register.
+ // TODO: Verify the additions work, otherwise we'll need to add the
+ // offset instead of 0 to the instructions and do all sorts of operand
+ // munging.
+ // TODO: Optimize this somewhat.
+ if (Offset != 0) {
+ ARMCC::CondCodes Pred = ARMCC::AL;
+ unsigned PredReg = 0;
+
+ if (!isThumb)
+ emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ Reg, Reg, Offset, Pred, PredReg,
+ static_cast<const ARMBaseInstrInfo&>(TII));
+ else {
+ assert(AFI->isThumb2Function());
+ emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ Reg, Reg, Offset, Pred, PredReg,
+ static_cast<const ARMBaseInstrInfo&>(TII));
+ }
+ }
+
+ return true;
+}
+
+bool ARMFastISel::ARMLoadAlloca(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+
+ // Verify it's an alloca.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned ResultReg = createResultReg(RC);
+ TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
+ ResultReg, SI->second, RC,
+ TM.getRegisterInfo());
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg,
+ unsigned Reg, int Offset) {
+
+ assert(VT.isSimple() && "Non-simple types are invalid here!");
+ unsigned Opc;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Trying to emit for an unhandled type!");
+ return false;
+ case MVT::i16:
+ Opc = isThumb ? ARM::tLDRH : ARM::LDRH;
+ VT = MVT::i32;
+ break;
+ case MVT::i8:
+ Opc = isThumb ? ARM::tLDRB : ARM::LDRB;
+ VT = MVT::i32;
+ break;
+ case MVT::i32:
+ Opc = isThumb ? ARM::tLDR : ARM::LDR;
+ break;
+ }
+
+ ResultReg = createResultReg(TLI.getRegClassFor(VT));
+
+ // TODO: Fix the Addressing modes so that these can share some code.
+ // Since this is a Thumb1 load this will work in Thumb1 or 2 mode.
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Reg).addImm(Offset).addReg(0));
+ else
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Reg).addReg(0).addImm(Offset));
+
+ return true;
+}
+
+bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg) {
+ Value *Op1 = I->getOperand(1);
+
+ // Verify it's an alloca.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ assert(SrcReg != 0 && "Nothing to store!");
+ TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
+ SrcReg, true /*isKill*/, SI->second, RC,
+ TM.getRegisterInfo());
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg,
+ unsigned DstReg, int Offset) {
+ unsigned StrOpc;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8: StrOpc = isThumb ? ARM::tSTRB : ARM::STRB; break;
+ case MVT::i16: StrOpc = isThumb ? ARM::tSTRH : ARM::STRH; break;
+ case MVT::i32: StrOpc = isThumb ? ARM::tSTR : ARM::STR; break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRS;
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRD;
+ break;
+ }
+
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc), SrcReg)
+ .addReg(DstReg).addImm(Offset).addReg(0));
+ else
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc), SrcReg)
+ .addReg(DstReg).addReg(0).addImm(Offset));
+
+ return true;
+}
+
+bool ARMFastISel::ARMSelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // Yay type legalization
+ EVT VT;
+ if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0)
+ return false;
+
+ // If we're an alloca we know we have a frame index and can emit the store
+ // quickly.
+ if (ARMStoreAlloca(I, SrcReg))
+ return true;
+
+ // Our register and offset with innocuous defaults.
+ unsigned Reg = 0;
+ int Offset = 0;
+
+ // See if we can handle this as Reg + Offset
+ if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset))
+ return false;
+
+ if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false;
+
+ return false;
+
+}
+
+bool ARMFastISel::ARMSelectLoad(const Instruction *I) {
+ // If we're an alloca we know we have a frame index and can emit the load
+ // directly in short order.
+ if (ARMLoadAlloca(I))
+ return true;
+
+ // Verify we have a legal type before going any further.
+ EVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // Our register and offset with innocuous defaults.
+ unsigned Reg = 0;
+ int Offset = 0;
+
+ // See if we can handle this as Reg + Offset
+ if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset))
+ return false;
+
+ unsigned ResultReg;
+ if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::ARMSelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Simple branch support.
+ unsigned CondReg = getRegForValue(BI->getCondition());
+ if (CondReg == 0) return false;
+
+ unsigned CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(CondReg).addReg(CondReg));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+}
+
+// TODO: SoftFP support.
+bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
+ // No Thumb-1 for now.
+ if (isThumb && !AFI->isThumb2Function()) return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return ARMSelectLoad(I);
+ case Instruction::Store:
+ return ARMSelectStore(I);
+ case Instruction::Br:
+ return ARMSelectBranch(I);
+ default: break;
+ }
+ return false;
+}
+
+namespace llvm {
+ llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+ if (EnableARMFastISel) return new ARMFastISel(funcInfo);
+ return 0;
+ }
+}
diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp
new file mode 100644
index 000000000000..85b0c6c248d0
--- /dev/null
+++ b/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -0,0 +1,212 @@
+//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+// foo[i] = bar[i] * baz[i];
+// }
+//
+// On ARM the addresses of 3 arrays should be kept in the registers, thus
+// this code has quite large register pressure (loop body):
+//
+// ldr r1, [r5], #4
+// ldr r2, [r6], #4
+// mul r1, r2, r1
+// str r1, [r0], #4
+//
+// Pass converts the code to something like:
+//
+// static struct {
+// int foo[N];
+// int bar[N];
+// int baz[N];
+// } merged;
+//
+// for (i = 0; i < N; ++i) {
+// merged.foo[i] = merged.bar[i] * merged.baz[i];
+// }
+//
+// and in ARM code this becomes:
+//
+// ldr r0, [r5, #40]
+// ldr r1, [r5, #80]
+// mul r0, r1, r0
+// str r0, [r5], #4
+//
+// note that we saved 2 registers here almostly "for free".
+// ===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-global-merge"
+#include "ARM.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+ class LLVM_LIBRARY_VISIBILITY ARMGlobalMerge : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLowering *TLI;
+
+ bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool) const;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ explicit ARMGlobalMerge(const TargetLowering *tli)
+ : FunctionPass(ID), TLI(tli) {}
+
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function& F);
+
+ const char *getPassName() const {
+ return "Merge internal globals";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ struct GlobalCmp {
+ const TargetData *TD;
+
+ GlobalCmp(const TargetData *td):
+ TD(td) { }
+
+ bool operator() (const GlobalVariable* GV1,
+ const GlobalVariable* GV2) {
+ const Type* Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ const Type* Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+ return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
+ }
+ };
+ };
+} // end anonymous namespace
+
+char ARMGlobalMerge::ID = 0;
+
+bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst) const {
+ const TargetData *TD = TLI->getTargetData();
+
+ // FIXME: Infer the maximum possible offset depending on the actual users
+ // (these max offsets are different for the users inside Thumb or ARM
+ // functions)
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+
+ // FIXME: Find better heuristics
+ std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
+
+ const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+ for (size_t i = 0, e = Globals.size(); i != e; ) {
+ size_t j = 0;
+ uint64_t MergedSize = 0;
+ std::vector<const Type*> Tys;
+ std::vector<Constant*> Inits;
+ for (j = i; MergedSize < MaxOffset && j != e; ++j) {
+ const Type* Ty = Globals[j]->getType()->getElementType();
+ Tys.push_back(Ty);
+ Inits.push_back(Globals[j]->getInitializer());
+ MergedSize += TD->getTypeAllocSize(Ty);
+ }
+
+ StructType* MergedTy = StructType::get(M.getContext(), Tys);
+ Constant* MergedInit = ConstantStruct::get(MergedTy, Inits);
+ GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst,
+ GlobalValue::InternalLinkage,
+ MergedInit, "merged");
+ for (size_t k = i; k < j; ++k) {
+ SmallVector<Constant*, 2> Idx;
+ Idx.push_back(ConstantInt::get(Int32Ty, 0));
+ Idx.push_back(ConstantInt::get(Int32Ty, k-i));
+
+ Constant* GEP =
+ ConstantExpr::getInBoundsGetElementPtr(MergedGV,
+ &Idx[0], Idx.size());
+
+ Globals[k]->replaceAllUsesWith(GEP);
+ Globals[k]->eraseFromParent();
+ }
+ i = j;
+ }
+
+ return true;
+}
+
+
+bool ARMGlobalMerge::doInitialization(Module& M) {
+ SmallVector<GlobalVariable*, 16> Globals, ConstGlobals;
+ const TargetData *TD = TLI->getTargetData();
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+ bool Changed = false;
+
+ // Grab all non-const globals.
+ for (Module::global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ // Merge is safe for "normal" internal globals only
+ if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+ continue;
+
+ // Ignore fancy-aligned globals for now.
+ if (I->getAlignment() != 0)
+ continue;
+
+ // Ignore all 'special' globals.
+ if (I->getName().startswith("llvm.") ||
+ I->getName().startswith(".llvm."))
+ continue;
+
+ if (TD->getTypeAllocSize(I->getType()) < MaxOffset) {
+ if (I->isConstant())
+ ConstGlobals.push_back(I);
+ else
+ Globals.push_back(I);
+ }
+ }
+
+ if (Globals.size() > 1)
+ Changed |= doMerge(Globals, M, false);
+ // FIXME: This currently breaks the EH processing due to way how the
+ // typeinfo detection works. We might want to detect the TIs and ignore
+ // them in the future.
+
+ // if (ConstGlobals.size() > 1)
+ // Changed |= doMerge(ConstGlobals, M, true);
+
+ return Changed;
+}
+
+bool ARMGlobalMerge::runOnFunction(Function& F) {
+ return false;
+}
+
+FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
+ return new ARMGlobalMerge(tli);
+}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c84d3ff81324..51a30c158dd1 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -36,6 +36,11 @@
using namespace llvm;
+static cl::opt<bool>
+DisableShifterOp("disable-shifter-op", cl::Hidden,
+ cl::desc("Disable isel of shifter-op"),
+ cl::init(false));
+
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -113,6 +118,16 @@ public:
bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base,
SDValue &OffReg, SDValue &ShImm);
+ inline bool Pred_so_imm(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ return ARM_AM::getSOImmVal(N->getZExtValue()) != -1;
+ }
+
+ inline bool Pred_t2_so_imm(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ return ARM_AM::getT2SOImmVal(N->getZExtValue()) != -1;
+ }
+
// Include the pieces autogenerated from the target description.
#include "ARMGenDAGISel.inc"
@@ -220,6 +235,9 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
SDValue &BaseReg,
SDValue &ShReg,
SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
// Don't match base register only case. That is matched to a separate
@@ -463,7 +481,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N,
SDValue &Addr, SDValue &Mode) {
Addr = N;
- Mode = CurDAG->getTargetConstant(0, MVT::i32);
+ Mode = CurDAG->getTargetConstant(ARM_AM::getAM4ModeImm(ARM_AM::ia), MVT::i32);
return true;
}
@@ -666,6 +684,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N,
bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
SDValue &BaseReg,
SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
// Don't match base register only case. That is matched to a separate
@@ -1090,110 +1111,79 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
break;
}
+ EVT ResTy;
+ if (NumVecs == 1)
+ ResTy = VT;
+ else {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+ }
+
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue SuperReg;
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- std::vector<EVT> ResTys(NumVecs, VT);
- ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- if (NumVecs < 2)
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ if (NumVecs == 1)
return VLd;
- SDValue RegSeq;
- SDValue V0 = SDValue(VLd, 0);
- SDValue V1 = SDValue(VLd, 1);
-
- // Form a REG_SEQUENCE to force register allocation.
- if (NumVecs == 2)
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- else {
- SDValue V2 = SDValue(VLd, 2);
- // If it's a vld3, form a quad D-register but discard the last part.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : SDValue(VLd, 3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
-
+ SuperReg = SDValue(VLd, 0);
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
- dl, VT, RegSeq);
+ dl, VT, SuperReg);
ReplaceUses(SDValue(N, Vec), D);
}
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
return NULL;
}
- EVT RegVT = GetNEONSubregVT(VT);
if (NumVecs <= 2) {
// Quad registers are directly supported for VLD1 and VLD2,
// loading pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- std::vector<EVT> ResTys(2 * NumVecs, RegVT);
- ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- Chain = SDValue(VLd, 2 * NumVecs);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ if (NumVecs == 1)
+ return VLd;
+
+ SuperReg = SDValue(VLd, 0);
+ Chain = SDValue(VLd, 1);
- // Combine the even and odd subregs to produce the result.
- if (NumVecs == 1) {
- SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
- ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
- } else {
- SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
- SDValue(VLd, 0), SDValue(VLd, 1),
- SDValue(VLd, 2), SDValue(VLd, 3)), 0);
- SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
- SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
- ReplaceUses(SDValue(N, 0), Q0);
- ReplaceUses(SDValue(N, 1), Q1);
- }
} else {
// Otherwise, quad registers are loaded with two separate instructions,
// where one loads the even registers and the other loads the odd registers.
-
- std::vector<EVT> ResTys(NumVecs, RegVT);
- ResTys.push_back(MemAddr.getValueType());
- ResTys.push_back(MVT::Other);
+ EVT AddrTy = MemAddr.getValueType();
// Load the even subregs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain };
- SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6);
- Chain = SDValue(VLdA, NumVecs+1);
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
+ SDNode *VLdA =
+ CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7);
+ Chain = SDValue(VLdA, 2);
// Load the odd subregs.
Opc = QOpcodes1[OpcodeIndex];
- const SDValue OpsB[] = { SDValue(VLdA, NumVecs),
- Align, Reg0, Pred, Reg0, Chain };
- SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
- Chain = SDValue(VLdB, NumVecs+1);
-
- SDValue V0 = SDValue(VLdA, 0);
- SDValue V1 = SDValue(VLdB, 0);
- SDValue V2 = SDValue(VLdA, 1);
- SDValue V3 = SDValue(VLdB, 1);
- SDValue V4 = SDValue(VLdA, 2);
- SDValue V5 = SDValue(VLdB, 2);
- SDValue V6 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
- : SDValue(VLdA, 3);
- SDValue V7 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
- : SDValue(VLdB, 3);
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
- V4, V5, V6, V7), 0);
-
- // Extract out the 3 / 4 Q registers.
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
- dl, VT, RegSeq);
- ReplaceUses(SDValue(N, Vec), Q);
- }
+ const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0),
+ Pred, Reg0, Chain };
+ SDNode *VLdB =
+ CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7);
+ SuperReg = SDValue(VLdB, 0);
+ Chain = SDValue(VLdB, 2);
+ }
+
+ // Extract out the Q registers.
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
+ dl, VT, SuperReg);
+ ReplaceUses(SDValue(N, Vec), Q);
}
ReplaceUses(SDValue(N, NumVecs), Chain);
return NULL;
@@ -1235,12 +1225,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 10> Ops;
+ SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
if (is64BitVector) {
- if (NumVecs >= 2) {
+ if (NumVecs == 1) {
+ Ops.push_back(N->getOperand(3));
+ } else {
SDValue RegSeq;
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
@@ -1257,111 +1249,61 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
: N->getOperand(3+3);
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
- RegSeq));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
- RegSeq));
- if (NumVecs > 2)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
- RegSeq));
- if (NumVecs > 3)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
- RegSeq));
- } else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(N->getOperand(Vec+3));
+ Ops.push_back(RegSeq);
}
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = DOpcodes[OpcodeIndex];
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
- EVT RegVT = GetNEONSubregVT(VT);
if (NumVecs <= 2) {
- // Quad registers are directly supported for VST1 and VST2,
- // storing pairs of D regs.
+ // Quad registers are directly supported for VST1 and VST2.
unsigned Opc = QOpcodes0[OpcodeIndex];
- if (NumVecs == 2) {
- // First extract the pair of Q registers.
+ if (NumVecs == 1) {
+ Ops.push_back(N->getOperand(3));
+ } else {
+ // Form a QQ register.
SDValue Q0 = N->getOperand(3);
SDValue Q1 = N->getOperand(4);
-
- // Form a QQ register.
- SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT,
- QQ));
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4);
- } else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3)));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3)));
- }
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
- 5 + 2 * NumVecs);
+ Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0));
}
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
// Form the QQQQ REG_SEQUENCE.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3));
- V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3));
- }
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
-
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(3+3);
+ SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
// Store the even D registers.
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
Ops.push_back(Reg0); // post-access address offset
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
- RegVT, RegSeq));
+ Ops.push_back(RegSeq);
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = QOpcodes0[OpcodeIndex];
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
+ MVT::Other, Ops.data(), 7);
Chain = SDValue(VStA, 1);
// Store the odd D registers.
Ops[0] = SDValue(VStA, 0); // MemAddr
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
- RegVT, RegSeq);
- Ops[NumVecs+5] = Chain;
+ Ops[6] = Chain;
Opc = QOpcodes1[OpcodeIndex];
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
+ MVT::Other, Ops.data(), 7);
Chain = SDValue(VStB, 1);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
@@ -1675,7 +1617,7 @@ SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
if (!T)
return 0;
- if (Predicate_t2_so_imm(TrueVal.getNode())) {
+ if (Pred_t2_so_imm(TrueVal.getNode())) {
SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
@@ -1692,7 +1634,7 @@ SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
if (!T)
return 0;
- if (Predicate_so_imm(TrueVal.getNode())) {
+ if (Pred_so_imm(TrueVal.getNode())) {
SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
@@ -1740,7 +1682,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
}
// Pattern: (ARMcmov:i32 GPR:i32:$false,
- // (imm:i32)<<P:Predicate_so_imm>>:$true,
+ // (imm:i32)<<P:Pred_so_imm>>:$true,
// (imm:i32):$cc)
// Emits: (MOVCCi:i32 GPR:i32:$false,
// (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
@@ -2013,43 +1955,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ResNode = SelectARMIndexedLoad(N);
if (ResNode)
return ResNode;
-
- // VLDMQ must be custom-selected for "v2f64 load" to set the AM5Opc value.
- if (Subtarget->hasVFP2() &&
- N->getValueType(0).getSimpleVT().SimpleTy == MVT::v2f64) {
- SDValue Chain = N->getOperand(0);
- SDValue AM5Opc =
- CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
- SDValue Pred = getAL(CurDAG);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
- SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain };
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl,
- MVT::v2f64, MVT::Other, Ops, 5);
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
- return Ret;
- }
- // Other cases are autogenerated.
- break;
- }
- case ISD::STORE: {
- // VSTMQ must be custom-selected for "v2f64 store" to set the AM5Opc value.
- if (Subtarget->hasVFP2() &&
- N->getOperand(1).getValueType().getSimpleVT().SimpleTy == MVT::v2f64) {
- SDValue Chain = N->getOperand(0);
- SDValue AM5Opc =
- CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
- SDValue Pred = getAL(CurDAG);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
- SDValue Ops[] = { N->getOperand(1), N->getOperand(2),
- AM5Opc, Pred, PredReg, Chain };
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
- return Ret;
- }
// Other cases are autogenerated.
break;
}
@@ -2206,39 +2111,40 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vld1: {
unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
ARM::VLD1d32, ARM::VLD1d64 };
- unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
- ARM::VLD1q32, ARM::VLD1q64 };
+ unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
+ ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld2: {
- unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
- ARM::VLD2d32, ARM::VLD1q64 };
- unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
+ unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
+ ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+ ARM::VLD2q32Pseudo };
return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld3: {
- unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16,
- ARM::VLD3d32, ARM::VLD1d64T };
- unsigned QOpcodes0[] = { ARM::VLD3q8_UPD,
- ARM::VLD3q16_UPD,
- ARM::VLD3q32_UPD };
- unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD,
- ARM::VLD3q16odd_UPD,
- ARM::VLD3q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
+ ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4: {
- unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16,
- ARM::VLD4d32, ARM::VLD1d64Q };
- unsigned QOpcodes0[] = { ARM::VLD4q8_UPD,
- ARM::VLD4q16_UPD,
- ARM::VLD4q32_UPD };
- unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD,
- ARM::VLD4q16odd_UPD,
- ARM::VLD4q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
+ ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
@@ -2266,39 +2172,40 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vst1: {
unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
ARM::VST1d32, ARM::VST1d64 };
- unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
- ARM::VST1q32, ARM::VST1q64 };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
+ ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
- unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
- ARM::VST2d32, ARM::VST1q64 };
- unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
+ ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
- unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
- ARM::VST3d32, ARM::VST1d64T };
- unsigned QOpcodes0[] = { ARM::VST3q8_UPD,
- ARM::VST3q16_UPD,
- ARM::VST3q32_UPD };
- unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD,
- ARM::VST3q16odd_UPD,
- ARM::VST3q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
+ ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4: {
- unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
- ARM::VST4d32, ARM::VST1d64Q };
- unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
- ARM::VST4q16_UPD,
- ARM::VST4q32_UPD };
- unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD,
- ARM::VST4q16odd_UPD,
- ARM::VST4q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
+ ARM::VST4d32Pseudo, ARM::VST1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 0091df753eb7..ce4a2c90689c 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -55,7 +55,14 @@ STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool>
EnableARMTailCalls("arm-tail-calls", cl::Hidden,
cl::desc("Generate tail calls (TEMPORARY OPTION)."),
- cl::init(true));
+ cl::init(false));
+
+// This option should go away when Machine LICM is smart enough to hoist a
+// reg-to-reg VDUP.
+static cl::opt<bool>
+EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden,
+ cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."),
+ cl::init(false));
static cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
@@ -122,7 +129,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
}
+ setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
// Promote all bit-wise operations.
if (VT.isInteger() && VT != PromotedBitwiseVT) {
@@ -166,6 +176,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ RegInfo = TM.getRegisterInfo();
if (Subtarget->isTargetDarwin()) {
// Uses VFP for Thumb libfuncs if available.
@@ -264,7 +275,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
- addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+ if (!Subtarget->isFPOnlySP())
+ addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
}
@@ -310,9 +322,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+ setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
- setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+ // Custom handling for some quad-vector types to detect VMULL.
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
@@ -410,12 +427,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// doesn't yet know how to not do that for SjLj.
setExceptionSelectorRegister(ARM::R0);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise
- // use the default expansion.
- bool canHandleAtomics =
- (Subtarget->hasV7Ops() ||
- (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()));
- if (canHandleAtomics) {
+ // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
+ // the default expansion.
+ if (Subtarget->hasDataBarrier() ||
+ (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) {
// membarrier needs custom lowering; the rest are legal and handled
// normally.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
@@ -466,10 +481,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+ }
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -481,9 +498,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::SETCC, MVT::f64, Expand);
- setOperationAction(ISD::SELECT, MVT::i32, Expand);
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
@@ -530,6 +547,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::MUL);
+ if (Subtarget->hasV6T2Ops())
+ setTargetDAGCombine(ISD::OR);
+
setStackPointerRegisterToSaveRestore(ARM::SP);
if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
@@ -547,6 +567,37 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
benefitFromCodePlacementOpt = true;
}
+std::pair<const TargetRegisterClass*, uint8_t>
+ARMTargetLowering::findRepresentativeClass(EVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ // Use DPR as representative register class for all floating point
+ // and vector types. Since there are 32 SPR registers and 32 DPR registers so
+ // the cost is 1 for both f32 and f64.
+ case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
+ case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
+ RRC = ARM::DPRRegisterClass;
+ break;
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 2;
+ break;
+ case MVT::v4i64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 4;
+ break;
+ case MVT::v8i64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 8;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
@@ -561,6 +612,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
+ case ARMISD::AND: return "ARMISD::AND";
case ARMISD::CMP: return "ARMISD::CMP";
case ARMISD::CMPZ: return "ARMISD::CMPZ";
case ARMISD::CMPFP: return "ARMISD::CMPFP";
@@ -635,9 +687,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::VMULLs: return "ARMISD::VMULLs";
+ case ARMISD::VMULLu: return "ARMISD::VMULLu";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
+ case ARMISD::BFI: return "ARMISD::BFI";
}
}
@@ -656,11 +711,23 @@ TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
return TargetLowering::getRegClassFor(VT);
}
+// Create a fast isel object.
+FastISel *
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+ return ARM::createFastISel(funcInfo);
+}
+
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
}
+/// getMaximalGlobalOffset - Returns the maximal possible offset which can
+/// be used for loads / stores from the global.
+unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
+ return (Subtarget->isThumb1Only() ? 127 : 4095);
+}
+
Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
unsigned NumVals = N->getNumValues();
if (!NumVals)
@@ -688,6 +755,24 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
return Sched::RegPressure;
}
+unsigned
+ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case ARM::tGPRRegClassID:
+ return RegInfo->hasFP(MF) ? 4 : 5;
+ case ARM::GPRRegClassID: {
+ unsigned FP = RegInfo->hasFP(MF) ? 1 : 0;
+ return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
+ }
+ case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
+ case ARM::DPRRegClassID:
+ return 32 - 10;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
@@ -793,8 +878,9 @@ static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
CCState &State, bool CanFail) {
static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+ static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
- unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+ unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
if (Reg == 0) {
// For the 2nd half of a v2f64, do not just fail.
if (CanFail)
@@ -812,6 +898,10 @@ static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
if (HiRegList[i] == Reg)
break;
+ unsigned T = State.AllocateReg(LoRegList[i]);
+ (void)T;
+ assert(T == LoRegList[i] && "Could not allocate register");
+
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
LocVT, LocInfo));
@@ -1624,6 +1714,10 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
}
+unsigned ARMTargetLowering::getJumpTableEncoding() const {
+ return MachineJumpTableInfo::EK_Inline;
+}
+
SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -1917,17 +2011,19 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
DebugLoc dl = Op.getDebugLoc();
SDValue Op5 = Op.getOperand(5);
unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
- // v6 and v7 can both handle barriers directly, but need handled a bit
- // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should
+ // Some subtargets which have dmb and dsb instructions can handle barriers
+ // directly. Some ARMv6 cpus can support them with the help of mcr
+ // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should
// never get here.
unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
- if (Subtarget->hasV7Ops())
+ if (Subtarget->hasDataBarrier())
return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
- else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())
+ else {
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() &&
+ "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, MVT::i32));
- assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
- return SDValue();
+ }
}
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
@@ -1945,54 +2041,6 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
}
SDValue
-ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
- SelectionDAG &DAG) const {
- SDNode *Node = Op.getNode();
- DebugLoc dl = Node->getDebugLoc();
- EVT VT = Node->getValueType(0);
- SDValue Chain = Op.getOperand(0);
- SDValue Size = Op.getOperand(1);
- SDValue Align = Op.getOperand(2);
-
- // Chain the dynamic stack allocation so that it doesn't modify the stack
- // pointer when other instructions are using the stack.
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
-
- unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue();
- unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment();
- if (AlignVal > StackAlign)
- // Do this now since selection pass cannot introduce new target
- // independent node.
- Align = DAG.getConstant(-(uint64_t)AlignVal, VT);
-
- // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up
- // using a "add r, sp, r" instead. Negate the size now so we don't have to
- // do even more horrible hack later.
- MachineFunction &MF = DAG.getMachineFunction();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- if (AFI->isThumb1OnlyFunction()) {
- bool Negate = true;
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size);
- if (C) {
- uint32_t Val = C->getZExtValue();
- if (Val <= 508 && ((Val & 3) == 0))
- Negate = false;
- }
- if (Negate)
- Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size);
- }
-
- SDVTList VTList = DAG.getVTList(VT, MVT::Other);
- SDValue Ops1[] = { Chain, Size, Align };
- SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3);
- Chain = Res.getValue(1);
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
- DAG.getIntPtrConstant(0, true), SDValue());
- SDValue Ops2[] = { Res, Chain };
- return DAG.getMergeValues(Ops2, 2, dl);
-}
-
-SDValue
ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
DebugLoc dl) const {
@@ -2229,28 +2277,28 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
default: break;
case ISD::SETLT:
case ISD::SETGE:
- if (isLegalICmpImmediate(C-1)) {
+ if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
RHS = DAG.getConstant(C-1, MVT::i32);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
- if (C > 0 && isLegalICmpImmediate(C-1)) {
+ if (C != 0 && isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
RHS = DAG.getConstant(C-1, MVT::i32);
}
break;
case ISD::SETLE:
case ISD::SETGT:
- if (isLegalICmpImmediate(C+1)) {
+ if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
RHS = DAG.getConstant(C+1, MVT::i32);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
- if (C < 0xffffffff && isLegalICmpImmediate(C+1)) {
+ if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
RHS = DAG.getConstant(C+1, MVT::i32);
}
@@ -2287,6 +2335,52 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
}
+SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = Op.getOperand(0);
+ SDValue SelectTrue = Op.getOperand(1);
+ SDValue SelectFalse = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Convert:
+ //
+ // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
+ // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
+ //
+ if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
+ const ConstantSDNode *CMOVTrue =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(0));
+ const ConstantSDNode *CMOVFalse =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+
+ if (CMOVTrue && CMOVFalse) {
+ unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
+ unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
+
+ SDValue True;
+ SDValue False;
+ if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
+ True = SelectTrue;
+ False = SelectFalse;
+ } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
+ True = SelectFalse;
+ False = SelectTrue;
+ }
+
+ if (True.getNode() && False.getNode()) {
+ EVT VT = Cond.getValueType();
+ SDValue ARMcc = Cond.getOperand(2);
+ SDValue CCR = Cond.getOperand(3);
+ SDValue Cmp = Cond.getOperand(4);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+ }
+ }
+ }
+
+ return DAG.getSelectCC(dl, Cond,
+ DAG.getConstant(0, Cond.getValueType()),
+ SelectTrue, SelectFalse, ISD::SETNE);
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@@ -2403,8 +2497,9 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
bool SeenZero = false;
if (canChangeToInt(LHS, SeenZero, Subtarget) &&
canChangeToInt(RHS, SeenZero, Subtarget) &&
- // If one of the operand is zero, it's safe to ignore the NaN case.
- (FiniteOnlyFPMath() || SeenZero)) {
+ // If one of the operand is zero, it's safe to ignore the NaN case since
+ // we only care about equality comparisons.
+ (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
// If unsafe fp math optimization is enabled and there are no othter uses of
// the CMP operands, and the condition code is EQ oe NE, we can optimize it
// to an integer comparison.
@@ -2587,7 +2682,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
}
// Return LR, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass);
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
@@ -2730,6 +2825,24 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
return DAG.getMergeValues(Ops, 2, dl);
}
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ // The rounding mode is in bits 23:22 of the FPSCR.
+ // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
+ // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
+ // so that the shift + and get folded into a bitfield extract.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::arm_get_fpscr,
+ MVT::i32));
+ SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
+ DAG.getConstant(1U << 22, MVT::i32));
+ SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
+ DAG.getConstant(22, MVT::i32));
+ return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+ DAG.getConstant(3, MVT::i32));
+}
+
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
@@ -3046,6 +3159,11 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
bool &ReverseVEXT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
ReverseVEXT = false;
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
Imm = M[0];
// If this is a VEXT shuffle, the immediate value is the index of the first
@@ -3061,6 +3179,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
ReverseVEXT = true;
}
+ if (M[i] < 0) continue; // ignore UNDEF indices
if (ExpectedElt != static_cast<unsigned>(M[i]))
return false;
}
@@ -3086,13 +3205,16 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
unsigned BlockElts = M[0] + 1;
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSz;
if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
for (unsigned i = 0; i < NumElts; ++i) {
- if ((unsigned) M[i] !=
- (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
return false;
}
@@ -3108,8 +3230,8 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
- if ((unsigned) M[i] != i + WhichResult ||
- (unsigned) M[i+1] != i + NumElts + WhichResult)
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
return false;
}
return true;
@@ -3127,8 +3249,8 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
- if ((unsigned) M[i] != i + WhichResult ||
- (unsigned) M[i+1] != i + WhichResult)
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
return false;
}
return true;
@@ -3143,6 +3265,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i != NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
if ((unsigned) M[i] != 2 * i + WhichResult)
return false;
}
@@ -3168,7 +3291,8 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
for (unsigned j = 0; j != 2; ++j) {
unsigned Idx = WhichResult;
for (unsigned i = 0; i != Half; ++i) {
- if ((unsigned) M[i + j * Half] != Idx)
+ int MIdx = M[i + j * Half];
+ if (MIdx >= 0 && (unsigned) MIdx != Idx)
return false;
Idx += 2;
}
@@ -3191,8 +3315,8 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
- if ((unsigned) M[i] != Idx ||
- (unsigned) M[i+1] != Idx + NumElts)
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
return false;
Idx += 1;
}
@@ -3217,8 +3341,8 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
- if ((unsigned) M[i] != Idx ||
- (unsigned) M[i+1] != Idx)
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
return false;
Idx += 1;
}
@@ -3230,9 +3354,30 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
+// If N is an integer constant that can be moved into a register in one
+// instruction, return an SDValue of such a constant (will become a MOV
+// instruction). Otherwise return null.
+static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
+ const ARMSubtarget *ST, DebugLoc dl) {
+ uint64_t Val;
+ if (!isa<ConstantSDNode>(N))
+ return SDValue();
+ Val = cast<ConstantSDNode>(N)->getZExtValue();
+
+ if (ST->isThumb1Only()) {
+ if (Val <= 255 || ~Val <= 255)
+ return DAG.getConstant(Val, MVT::i32);
+ } else {
+ if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
+ return DAG.getConstant(Val, MVT::i32);
+ }
+ return SDValue();
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
-static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
@@ -3292,15 +3437,41 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
if (isOnlyLowElement)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
- // If all elements are constants, fall back to the default expansion, which
- // will generate a load from the constant pool.
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+ if (EnableARMVDUPsplat) {
+ // Use VDUP for non-constant splats. For f32 constant splats, reduce to
+ // i32 and try again.
+ if (usesOnlyOneValue && EltSize <= 32) {
+ if (!isConstant)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (VT.getVectorElementType().isFloatingPoint()) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
+ Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
+ NumElts);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ LowerBUILD_VECTOR(Val, DAG, ST));
+ }
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ }
+ }
+
+ // If all elements are constants and the case above didn't get hit, fall back
+ // to the default expansion, which will generate a load from the constant
+ // pool.
if (isConstant)
return SDValue();
- // Use VDUP for non-constant splats.
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
- if (usesOnlyOneValue && EltSize <= 32)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (!EnableARMVDUPsplat) {
+ // Use VDUP for non-constant splats.
+ if (usesOnlyOneValue && EltSize <= 32)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ }
// Vectors with 32- or 64-bit elements can be built by directly assigning
// the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
@@ -3585,6 +3756,51 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
}
+/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or
+/// an extending load, return the unextended value.
+static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+ return N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
+ LD->getBasePtr(), LD->getSrcValue(),
+ LD->getSrcValueOffset(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+}
+
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+ // Multiplications are only custom-lowered for 128-bit vectors so that
+ // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
+ EVT VT = Op.getValueType();
+ assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
+ SDNode *N0 = Op.getOperand(0).getNode();
+ SDNode *N1 = Op.getOperand(1).getNode();
+ unsigned NewOpc = 0;
+ if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) &&
+ (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) {
+ NewOpc = ARMISD::VMULLs;
+ } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) &&
+ (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) {
+ NewOpc = ARMISD::VMULLu;
+ } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) {
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ } else {
+ // Other vector multiplications are legal.
+ return Op;
+ }
+
+ // Legalize to a VMULL instruction.
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Op0 = SkipExtension(N0, DAG);
+ SDValue Op1 = SkipExtension(N1, DAG);
+
+ assert(Op0.getValueType().is64BitVector() &&
+ Op1.getValueType().is64BitVector() &&
+ "unexpected types for extended operands to VMULL");
+ return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
@@ -3594,10 +3810,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
LowerGlobalAddressELF(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
- case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
@@ -3621,10 +3837,12 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::VSETCC: return LowerVSETCC(Op, DAG);
- case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
}
return SDValue();
}
@@ -4002,78 +4220,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
-
- case ARM::tANDsp:
- case ARM::tADDspr_:
- case ARM::tSUBspi_:
- case ARM::t2SUBrSPi_:
- case ARM::t2SUBrSPi12_:
- case ARM::t2SUBrSPs_: {
- MachineFunction *MF = BB->getParent();
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
- bool DstIsDead = MI->getOperand(0).isDead();
- bool SrcIsKill = MI->getOperand(1).isKill();
-
- if (SrcReg != ARM::SP) {
- // Copy the source to SP from virtual register.
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
- unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
- ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
- BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP)
- .addReg(SrcReg, getKillRegState(SrcIsKill));
- }
-
- unsigned OpOpc = 0;
- bool NeedPred = false, NeedCC = false, NeedOp3 = false;
- switch (MI->getOpcode()) {
- default:
- llvm_unreachable("Unexpected pseudo instruction!");
- case ARM::tANDsp:
- OpOpc = ARM::tAND;
- NeedPred = true;
- break;
- case ARM::tADDspr_:
- OpOpc = ARM::tADDspr;
- break;
- case ARM::tSUBspi_:
- OpOpc = ARM::tSUBspi;
- break;
- case ARM::t2SUBrSPi_:
- OpOpc = ARM::t2SUBrSPi;
- NeedPred = true; NeedCC = true;
- break;
- case ARM::t2SUBrSPi12_:
- OpOpc = ARM::t2SUBrSPi12;
- NeedPred = true;
- break;
- case ARM::t2SUBrSPs_:
- OpOpc = ARM::t2SUBrSPs;
- NeedPred = true; NeedCC = true; NeedOp3 = true;
- break;
- }
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP);
- if (OpOpc == ARM::tAND)
- AddDefaultT1CC(MIB);
- MIB.addReg(ARM::SP);
- MIB.addOperand(MI->getOperand(2));
- if (NeedOp3)
- MIB.addOperand(MI->getOperand(3));
- if (NeedPred)
- AddDefaultPred(MIB);
- if (NeedCC)
- AddDefaultCC(MIB);
-
- // Copy the result from SP to virtual register.
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
- unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
- ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
- BuildMI(*BB, MI, dl, TII->get(CopyOpc))
- .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
- .addReg(ARM::SP);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
- }
}
}
@@ -4141,30 +4287,42 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
return SDValue();
}
-/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
-static SDValue PerformADDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- // added by evan in r37685 with no testcase.
- SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
-
+/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
+/// operands N0 and N1. This is a helper for PerformADDCombine that is
+/// called with the default operands, and if that fails, with commuted
+/// operands.
+static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI) {
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
if (Result.getNode()) return Result;
}
- if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
- if (Result.getNode()) return Result;
- }
-
return SDValue();
}
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+///
+static SDValue PerformADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // First try with the default operand order.
+ SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
+ if (Result.getNode())
+ return Result;
+
+ // If that didn't work, try again with the operands commuted.
+ return PerformADDCombineWithOperands(N, N1, N0, DCI);
+}
+
/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+///
static SDValue PerformSUBCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- // added by evan in r37685 with no testcase.
- SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
// fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
@@ -4231,6 +4389,105 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
}
+/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
+ // reasonable.
+
+ // BFI is only available on V6T2+
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ // 1) or (and A, mask), val => ARMbfi A, val, mask
+ // iff (val & mask) == val
+ //
+ // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
+ // && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+ // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
+ // && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+ // (i.e., copy a bitfield value into another bitfield of the same width)
+ if (N0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+
+
+ // The value and the mask need to be constants so we can verify this is
+ // actually a bitfield set. If the mask is 0xffff, we can do better
+ // via a movt instruction, so don't use BFI in that case.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C)
+ return SDValue();
+ unsigned Mask = C->getZExtValue();
+ if (Mask == 0xffff)
+ return SDValue();
+ SDValue Res;
+ // Case (1): or (and A, mask), val => ARMbfi A, val, mask
+ if ((C = dyn_cast<ConstantSDNode>(N1))) {
+ unsigned Val = C->getZExtValue();
+ if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val)
+ return SDValue();
+ Val >>= CountTrailingZeros_32(~Mask);
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Val, MVT::i32),
+ DAG.getConstant(Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ } else if (N1.getOpcode() == ISD::AND) {
+ // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!C)
+ return SDValue();
+ unsigned Mask2 = C->getZExtValue();
+
+ if (ARM::isBitFieldInvertedMask(Mask) &&
+ ARM::isBitFieldInvertedMask(~Mask2) &&
+ (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask == 0xffff || Mask == 0xffff0000))
+ return SDValue();
+ // 2a
+ unsigned lsb = CountTrailingZeros_32(Mask2);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
+ DAG.getConstant(lsb, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res,
+ DAG.getConstant(Mask, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ } else if (ARM::isBitFieldInvertedMask(~Mask) &&
+ ARM::isBitFieldInvertedMask(Mask2) &&
+ (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask2 == 0xffff || Mask2 == 0xffff0000))
+ return SDValue();
+ // 2b
+ unsigned lsb = CountTrailingZeros_32(Mask);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(lsb, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
+ DAG.getConstant(Mask2, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ }
+ }
+
+ return SDValue();
+}
+
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
@@ -4561,7 +4818,7 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
// If the target supports NEON, try to use vmax/vmin instructions for f32
- // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set,
+ // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set,
// be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
// a NaN; only do the transformation when it matches that behavior.
@@ -4648,6 +4905,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ADD: return PerformADDCombine(N, DCI);
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
+ case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
@@ -5379,6 +5637,21 @@ int ARM::getVFPf64Imm(const APFloat &FPImm) {
return ((int)Sign << 7) | (Exp << 4) | Mantissa;
}
+bool ARM::isBitFieldInvertedMask(unsigned v) {
+ if (v == 0xffffffff)
+ return 0;
+ // there can be 1's on either or both "outsides", all the "inside"
+ // bits must be 0's
+ unsigned int lsb = 0, msb = 31;
+ while (v & (1 << msb)) --msb;
+ while (v & (1 << lsb)) ++lsb;
+ for (unsigned int i = lsb; i <= msb; ++i) {
+ if (v & (1 << i))
+ return 0;
+ }
+ return 1;
+}
+
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 128b72e1e743..ba9ea7f15e7b 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -17,6 +17,8 @@
#include "ARMSubtarget.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include <vector>
@@ -45,6 +47,8 @@ namespace llvm {
PIC_ADD, // Add with a PC operand and a PIC label.
+ AND, // ARM "and" instruction that sets the 's' flag in CPSR.
+
CMP, // ARM compare instructions.
CMPZ, // ARM compare that sets only Z flag.
CMPFP, // ARM VFP compare instruction, sets FPSCR.
@@ -80,7 +84,7 @@ namespace llvm {
MEMBARRIER, // Memory barrier
SYNCBARRIER, // Memory sync barrier
-
+
VCEQ, // Vector compare equal.
VCGE, // Vector compare greater than or equal.
VCGEU, // Vector compare unsigned greater than or equal.
@@ -141,6 +145,10 @@ namespace llvm {
VUZP, // unzip (deinterleave)
VTRN, // transpose
+ // Vector multiply long:
+ VMULLs, // ...signed
+ VMULLu, // ...unsigned
+
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
// operations, but for ARM some BUILD_VECTORs are legal as-is and their
@@ -150,7 +158,10 @@ namespace llvm {
// Floating-point max and min:
FMAX,
- FMIN
+ FMIN,
+
+ // Bit-field insert
+ BFI
};
}
@@ -162,6 +173,7 @@ namespace llvm {
/// returns -1.
int getVFPf32Imm(const APFloat &FPImm);
int getVFPf64Imm(const APFloat &FPImm);
+ bool isBitFieldInvertedMask(unsigned v);
}
//===--------------------------------------------------------------------===//
@@ -171,6 +183,8 @@ namespace llvm {
public:
explicit ARMTargetLowering(TargetMachine &TM);
+ virtual unsigned getJumpTableEncoding(void) const;
+
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
/// ReplaceNodeResults - Replace the results of node with an illegal result
@@ -255,8 +269,19 @@ namespace llvm {
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
+ /// getMaximalGlobalOffset - Returns the maximal possible offset which can
+ /// be used for loads / stores from the global.
+ virtual unsigned getMaximalGlobalOffset() const;
+
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+
Sched::Preference getSchedulingPreference(SDNode *N) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
@@ -265,11 +290,17 @@ namespace llvm {
/// materialize the FP immediate as a load from a constant pool.
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
+
private:
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
const ARMSubtarget *Subtarget;
+ const TargetRegisterInfo *RegInfo;
+
/// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
///
unsigned ARMPCLabelIndex;
@@ -310,14 +341,15 @@ namespace llvm {
SelectionDAG &DAG) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
@@ -377,6 +409,10 @@ namespace llvm {
unsigned BinOpcode) const;
};
+
+ namespace ARM {
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ }
}
#endif // ARMISELLOWERING_H
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index ac568e75ccc4..113cfffe61f9 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -36,37 +36,38 @@ def LdStMulFrm : Format<10>;
def LdStExFrm : Format<11>;
def ArithMiscFrm : Format<12>;
-def ExtFrm : Format<13>;
-
-def VFPUnaryFrm : Format<14>;
-def VFPBinaryFrm : Format<15>;
-def VFPConv1Frm : Format<16>;
-def VFPConv2Frm : Format<17>;
-def VFPConv3Frm : Format<18>;
-def VFPConv4Frm : Format<19>;
-def VFPConv5Frm : Format<20>;
-def VFPLdStFrm : Format<21>;
-def VFPLdStMulFrm : Format<22>;
-def VFPMiscFrm : Format<23>;
-
-def ThumbFrm : Format<24>;
-def MiscFrm : Format<25>;
-
-def NGetLnFrm : Format<26>;
-def NSetLnFrm : Format<27>;
-def NDupFrm : Format<28>;
-def NLdStFrm : Format<29>;
-def N1RegModImmFrm: Format<30>;
-def N2RegFrm : Format<31>;
-def NVCVTFrm : Format<32>;
-def NVDupLnFrm : Format<33>;
-def N2RegVShLFrm : Format<34>;
-def N2RegVShRFrm : Format<35>;
-def N3RegFrm : Format<36>;
-def N3RegVShFrm : Format<37>;
-def NVExtFrm : Format<38>;
-def NVMulSLFrm : Format<39>;
-def NVTBLFrm : Format<40>;
+def SatFrm : Format<13>;
+def ExtFrm : Format<14>;
+
+def VFPUnaryFrm : Format<15>;
+def VFPBinaryFrm : Format<16>;
+def VFPConv1Frm : Format<17>;
+def VFPConv2Frm : Format<18>;
+def VFPConv3Frm : Format<19>;
+def VFPConv4Frm : Format<20>;
+def VFPConv5Frm : Format<21>;
+def VFPLdStFrm : Format<22>;
+def VFPLdStMulFrm : Format<23>;
+def VFPMiscFrm : Format<24>;
+
+def ThumbFrm : Format<25>;
+def MiscFrm : Format<26>;
+
+def NGetLnFrm : Format<27>;
+def NSetLnFrm : Format<28>;
+def NDupFrm : Format<29>;
+def NLdStFrm : Format<30>;
+def N1RegModImmFrm: Format<31>;
+def N2RegFrm : Format<32>;
+def NVCVTFrm : Format<33>;
+def NVDupLnFrm : Format<34>;
+def N2RegVShLFrm : Format<35>;
+def N2RegVShRFrm : Format<36>;
+def N3RegFrm : Format<37>;
+def N3RegVShFrm : Format<38>;
+def NVExtFrm : Format<39>;
+def NVMulSLFrm : Format<40>;
+def NVTBLFrm : Format<41>;
// Misc flags.
@@ -87,21 +88,21 @@ class Xform16Bit { bit canXformTo16Bit = 1; }
class AddrMode<bits<4> val> {
bits<4> Value = val;
}
-def AddrModeNone : AddrMode<0>;
-def AddrMode1 : AddrMode<1>;
-def AddrMode2 : AddrMode<2>;
-def AddrMode3 : AddrMode<3>;
-def AddrMode4 : AddrMode<4>;
-def AddrMode5 : AddrMode<5>;
-def AddrMode6 : AddrMode<6>;
-def AddrModeT1_1 : AddrMode<7>;
-def AddrModeT1_2 : AddrMode<8>;
-def AddrModeT1_4 : AddrMode<9>;
-def AddrModeT1_s : AddrMode<10>;
-def AddrModeT2_i12: AddrMode<11>;
-def AddrModeT2_i8 : AddrMode<12>;
-def AddrModeT2_so : AddrMode<13>;
-def AddrModeT2_pc : AddrMode<14>;
+def AddrModeNone : AddrMode<0>;
+def AddrMode1 : AddrMode<1>;
+def AddrMode2 : AddrMode<2>;
+def AddrMode3 : AddrMode<3>;
+def AddrMode4 : AddrMode<4>;
+def AddrMode5 : AddrMode<5>;
+def AddrMode6 : AddrMode<6>;
+def AddrModeT1_1 : AddrMode<7>;
+def AddrModeT1_2 : AddrMode<8>;
+def AddrModeT1_4 : AddrMode<9>;
+def AddrModeT1_s : AddrMode<10>;
+def AddrModeT2_i12 : AddrMode<11>;
+def AddrModeT2_i8 : AddrMode<12>;
+def AddrModeT2_so : AddrMode<13>;
+def AddrModeT2_pc : AddrMode<14>;
def AddrModeT2_i8s4 : AddrMode<15>;
// Instruction size.
@@ -137,11 +138,17 @@ def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
// ARM special operands.
//
+def CondCodeOperand : AsmOperandClass {
+ let Name = "CondCode";
+ let SuperClasses = [];
+}
+
// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
// register whose default is 0 (no register).
def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
(ops (i32 14), (i32 zero_reg))> {
let PrintMethod = "printPredicateOperand";
+ let ParserMatchClass = CondCodeOperand;
}
// Conditional code result for instructions whose 's' bit is set, e.g. subs.
@@ -240,6 +247,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
+
// A few are not predicable
class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
IndexMode im, Format f, InstrItinClass itin,
@@ -254,9 +262,9 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
list<Predicate> Predicates = [IsARM];
}
-// Same as I except it can optionally modify CPSR. Note it's modeled as
-// an input operand since by default it's a zero register. It will
-// become an implicit def once it's "flipped".
+// Same as I except it can optionally modify CPSR. Note it's modeled as an input
+// operand since by default it's a zero register. It will become an implicit def
+// once it's "flipped".
class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
IndexMode im, Format f, InstrItinClass itin,
string opc, string asm, string cstr,
@@ -313,7 +321,7 @@ class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
}
class ABXIx2<dag oops, dag iops, InstrItinClass itin,
string asm, list<dag> pattern>
- : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, BrMiscFrm, itin,
+ : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, Pseudo, itin,
asm, "", pattern>;
// BR_JT instructions
@@ -322,16 +330,14 @@ class JTI<dag oops, dag iops, InstrItinClass itin,
: XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin,
asm, "", pattern>;
-
// Atomic load/store instructions
-
class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
- let Inst{20} = 1;
+ let Inst{20} = 1;
let Inst{11-0} = 0b111110011111;
}
class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
@@ -340,7 +346,7 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
opc, asm, "", pattern> {
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
- let Inst{20} = 0;
+ let Inst{20} = 0;
let Inst{11-4} = 0b11111001;
}
@@ -350,21 +356,21 @@ class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
: I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
: XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -377,7 +383,7 @@ class AI2<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// loads
@@ -389,7 +395,7 @@ class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -399,7 +405,7 @@ class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -409,7 +415,7 @@ class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -419,7 +425,7 @@ class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// stores
@@ -431,7 +437,7 @@ class AI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -441,7 +447,7 @@ class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -451,7 +457,7 @@ class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -461,7 +467,7 @@ class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Pre-indexed loads
@@ -473,7 +479,7 @@ class AI2ldwpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -483,7 +489,7 @@ class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Pre-indexed stores
@@ -495,7 +501,7 @@ class AI2stwpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -505,7 +511,7 @@ class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Post-indexed loads
@@ -517,7 +523,7 @@ class AI2ldwpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -527,7 +533,7 @@ class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Post-indexed stores
@@ -539,7 +545,7 @@ class AI2stwpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -549,7 +555,7 @@ class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// addrmode3 instructions
@@ -977,7 +983,7 @@ class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
Encoding {
let Inst{31-27} = opcod1;
let Inst{15-14} = opcod2;
- let Inst{12} = opcod3;
+ let Inst{12} = opcod3;
}
// BR_JT instructions
@@ -1099,13 +1105,13 @@ class T1Special<bits<4> opcode> : Encoding16 {
// A6.2.4 Load/store single data item encoding.
class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 {
let Inst{15-12} = opA;
- let Inst{11-9} = opB;
+ let Inst{11-9} = opB;
}
-class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>;
+class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>;
class T1LdSt4Imm<bits<3> opB> : T1LoadStore<0b0110, opB>; // Immediate, 4 bytes
class T1LdSt1Imm<bits<3> opB> : T1LoadStore<0b0111, opB>; // Immediate, 1 byte
class T1LdSt2Imm<bits<3> opB> : T1LoadStore<0b1000, opB>; // Immediate, 2 bytes
-class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
+class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
// A6.2.5 Miscellaneous 16-bit instructions encoding.
class T1Misc<bits<7> opcode> : Encoding16 {
@@ -1125,9 +1131,10 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
list<Predicate> Predicates = [IsThumb2];
}
-// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as
-// an input operand since by default it's a zero register. It will
-// become an implicit def once it's "flipped".
+// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
+// input operand since by default it's a zero register. It will become an
+// implicit def once it's "flipped".
+//
// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
// more consistent.
class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
@@ -1185,11 +1192,11 @@ class T2Ii8s4<bit P, bit W, bit load, dag oops, dag iops, InstrItinClass itin,
pattern> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
- let Inst{24} = P;
- let Inst{23} = ?; // The U bit.
- let Inst{22} = 1;
- let Inst{21} = W;
- let Inst{20} = load;
+ let Inst{24} = P;
+ let Inst{23} = ?; // The U bit.
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = load;
}
class T2sI<dag oops, dag iops, InstrItinClass itin,
@@ -1225,14 +1232,14 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
list<Predicate> Predicates = [IsThumb2];
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
- let Inst{24} = signed;
- let Inst{23} = 0;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
let Inst{22-21} = opcod;
- let Inst{20} = load;
- let Inst{11} = 1;
+ let Inst{20} = load;
+ let Inst{11} = 1;
// (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
- let Inst{10} = pre; // The P bit.
- let Inst{8} = 1; // The W bit.
+ let Inst{10} = pre; // The P bit.
+ let Inst{8} = 1; // The W bit.
}
// Helper class for disassembly only
@@ -1243,9 +1250,9 @@ class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops,
: T2I<oops, iops, itin, opc, asm, pattern> {
let Inst{31-27} = 0b11111;
let Inst{26-24} = 0b011;
- let Inst{23} = long;
+ let Inst{23} = long;
let Inst{22-20} = op22_20;
- let Inst{7-4} = op7_4;
+ let Inst{7-4} = op7_4;
}
// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
@@ -1325,9 +1332,9 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
}
// Load / store multiple
-class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
- : VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
+ : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
VFPLdStMulFrm, itin, asm, cstr, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
@@ -1337,9 +1344,9 @@ class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
let D = VFPNeonDomain;
}
-class AXSI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
- : VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
+ : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
VFPLdStMulFrm, itin, asm, cstr, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
@@ -1367,8 +1374,8 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-8} = 0b1011;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Double precision, binary, VML[AS] (for additional predicate)
@@ -1379,12 +1386,11 @@ class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-8} = 0b1011;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
list<Predicate> Predicates = [HasVFP2, UseVMLx];
}
-
// Single precision, unary
class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
@@ -1415,8 +1421,8 @@ class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-8} = 0b1010;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Single precision binary, if no NEON
@@ -1521,10 +1527,18 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
: NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
cstr, pattern> {
let Inst{31-24} = 0b11110100;
- let Inst{23} = op23;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{7-4} = op7_4;
+ let Inst{11-8} = op11_8;
+ let Inst{7-4} = op7_4;
+}
+
+class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
+ : InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ list<Predicate> Predicates = [HasNEON];
}
class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -1548,13 +1562,13 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
string opc, string dt, string asm, string cstr,
list<dag> pattern>
: NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> {
- let Inst{23} = op23;
+ let Inst{23} = op23;
let Inst{21-19} = op21_19;
- let Inst{11-8} = op11_8;
- let Inst{7} = op7;
- let Inst{6} = op6;
- let Inst{5} = op5;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{5} = op5;
+ let Inst{4} = op4;
}
// NEON 2 vector register format.
@@ -1567,9 +1581,9 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
let Inst{17-16} = op17_16;
- let Inst{11-7} = op11_7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Same as N2V except it doesn't have a datatype suffix.
@@ -1582,9 +1596,9 @@ class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
let Inst{17-16} = op17_16;
- let Inst{11-7} = op11_7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// NEON 2 vector register with immediate.
@@ -1592,12 +1606,12 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
: NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{11-8} = op11_8;
- let Inst{7} = op7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// NEON 3 vector register format.
@@ -1605,12 +1619,12 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
: NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Same as N3V except it doesn't have a data type suffix.
@@ -1619,12 +1633,12 @@ class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
: NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// NEON VMOVs between scalar and core registers.
@@ -1634,9 +1648,9 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
: InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain,
"", itin> {
let Inst{27-20} = opcod1;
- let Inst{11-8} = opcod2;
- let Inst{6-5} = opcod3;
- let Inst{4} = 1;
+ let Inst{11-8} = opcod2;
+ let Inst{6-5} = opcod3;
+ let Inst{4} = 1;
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
@@ -1670,9 +1684,9 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
let Inst{24-23} = 0b11;
let Inst{21-20} = 0b11;
let Inst{19-16} = op19_16;
- let Inst{11-7} = 0b11000;
- let Inst{6} = op6;
- let Inst{4} = 0;
+ let Inst{11-7} = 0b11000;
+ let Inst{6} = op6;
+ let Inst{4} = 0;
}
// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 51fc1522485f..e66f9b9ad0ac 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -44,6 +44,10 @@ def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
SDTCisVT<3, i32>, SDTCisVT<4, i32>,
SDTCisVT<5, OtherVT>]>;
+def SDT_ARMAnd : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
@@ -54,13 +58,16 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
-def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>;
-def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
-def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMMEMBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_ARMSYNCBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_ARMMEMBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMSYNCBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
@@ -99,11 +106,14 @@ def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
[SDNPHasChain]>;
+def ARMand : SDNode<"ARMISD::AND", SDT_ARMAnd,
+ [SDNPOutFlag]>;
+
def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
[SDNPOutFlag]>;
def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
- [SDNPOutFlag,SDNPCommutative]>;
+ [SDNPOutFlag, SDNPCommutative]>;
def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
@@ -117,51 +127,54 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
-def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7,
- [SDNPHasChain]>;
-def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7,
- [SDNPHasChain]>;
-def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6,
- [SDNPHasChain]>;
-def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6,
- [SDNPHasChain]>;
+def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMSyncBarrier : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERMCR,
+ [SDNPHasChain]>;
+def ARMSyncBarrierMCR : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERMCR,
+ [SDNPHasChain]>;
def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
[SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+
+def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
-def HasV4T : Predicate<"Subtarget->hasV4TOps()">;
-def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
-def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
-def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
-def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
-def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">;
-def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
-def HasV7 : Predicate<"Subtarget->hasV7Ops()">;
-def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
-def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
-def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
-def HasNEON : Predicate<"Subtarget->hasNEON()">;
-def HasDivide : Predicate<"Subtarget->hasDivide()">;
+def HasV4T : Predicate<"Subtarget->hasV4TOps()">;
+def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
+def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">;
+def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV7 : Predicate<"Subtarget->hasV7Ops()">;
+def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
+def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
+def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
+def HasNEON : Predicate<"Subtarget->hasNEON()">;
+def HasDivide : Predicate<"Subtarget->hasDivide()">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">;
-def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def HasDB : Predicate<"Subtarget->hasDataBarrier()">;
+def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
-def IsThumb : Predicate<"Subtarget->isThumb()">;
-def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
-def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
-def IsARM : Predicate<"!Subtarget->isThumb()">;
-def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
-def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">;
+def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
+def IsARM : Predicate<"!Subtarget->isThumb()">;
+def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
+def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
// FIXME: Eventually this will be just "hasV6T2Ops".
-def UseMovt : Predicate<"Subtarget->useMovt()">;
-def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
-
-def UseVMLx : Predicate<"Subtarget->useVMLx()">;
+def UseMovt : Predicate<"Subtarget->useMovt()">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+def UseVMLx : Predicate<"Subtarget->useVMLx()">;
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -221,29 +234,12 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{
/// e.g., 0xf000ffff
def bf_inv_mask_imm : Operand<i32>,
PatLeaf<(imm), [{
- uint32_t v = (uint32_t)N->getZExtValue();
- if (v == 0xffffffff)
- return 0;
- // there can be 1's on either or both "outsides", all the "inside"
- // bits must be 0's
- unsigned int lsb = 0, msb = 31;
- while (v & (1 << msb)) --msb;
- while (v & (1 << lsb)) ++lsb;
- for (unsigned int i = lsb; i <= msb; ++i) {
- if (v & (1 << i))
- return 0;
- }
- return 1;
+ return ARM::isBitFieldInvertedMask(N->getZExtValue());
}] > {
let PrintMethod = "printBitfieldInvMaskImmOperand";
}
/// Split a 32-bit immediate into two 16 bit parts.
-def lo16 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() & 0xffff,
- MVT::i32);
-}]>;
-
def hi16 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
}]>;
@@ -306,6 +302,13 @@ def pclabel : Operand<i32> {
let PrintMethod = "printPCLabel";
}
+// shift_imm: An integer that encodes a shift amount and the type of shift
+// (currently either asr or lsl) using the same encoding used for the
+// immediates in so_reg operands.
+def shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+}
+
// shifter_operand operands: so_reg and so_imm.
def so_reg : Operand<i32>, // reg reg imm
ComplexPattern<i32, 3, "SelectShifterOperandReg",
@@ -319,10 +322,7 @@ def so_reg : Operand<i32>, // reg reg imm
// represented in the imm field in the same 12-bit form that they are encoded
// into so_imm instructions: the 8-bit immediate is the least significant bits
// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
-def so_imm : Operand<i32>,
- PatLeaf<(imm), [{
- return ARM_AM::getSOImmVal(N->getZExtValue()) != -1;
- }]> {
+def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> {
let PrintMethod = "printSOImmOperand";
}
@@ -452,11 +452,15 @@ include "ARMInstrFormats.td"
/// binop that produces a value.
multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
IIC_iALUi, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
let Inst{25} = 1;
}
+ }
def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
IIC_iALUr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
@@ -502,7 +506,7 @@ multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
-let Defs = [CPSR] in {
+let isCompare = 1, Defs = [CPSR] in {
multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iCMPi,
@@ -1117,7 +1121,7 @@ let isBranch = 1, isTerminator = 1 in {
let isNotDuplicable = 1, isIndirectBranch = 1 in {
def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target \n$jt",
+ IIC_Br, "mov\tpc, $target$jt",
[(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
let Inst{11-4} = 0b00000000;
let Inst{15-12} = 0b1111;
@@ -1127,7 +1131,7 @@ let isBranch = 1, isTerminator = 1 in {
}
def BR_JTm : JTI<(outs),
(ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "ldr\tpc, $target \n$jt",
+ IIC_Br, "ldr\tpc, $target$jt",
[(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
imm:$id)]> {
let Inst{15-12} = 0b1111;
@@ -1139,7 +1143,7 @@ let isBranch = 1, isTerminator = 1 in {
}
def BR_JTadd : JTI<(outs),
(ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "add\tpc, $target, $idx \n$jt",
+ IIC_Br, "add\tpc, $target, $idx$jt",
[(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
imm:$id)]> {
let Inst{15-12} = 0b1111;
@@ -1573,8 +1577,12 @@ defm UXTH : AI_unary_rrot<0b01101111,
defm UXTB16 : AI_unary_rrot<0b01101100,
"uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
-def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
- (UXTB16r_rot GPR:$Src, 24)>;
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
+// (UXTB16r_rot GPR:$Src, 24)>;
def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
(UXTB16r_rot GPR:$Src, 8)>;
@@ -1631,16 +1639,24 @@ defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
-// These don't define reg/reg forms, because they are handled above.
def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, "rsb", "\t$dst, $a, $b",
- [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> {
+ IIC_iALUi, "rsb", "\t$dst, $a, $b",
+ [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> {
let Inst{25} = 1;
}
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSBrr : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
+ IIC_iALUr, "rsb", "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{25} = 0;
+ let Inst{11-4} = 0b00000000;
+}
+
def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, "rsb", "\t$dst, $a, $b",
- [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
+ IIC_iALUsr, "rsb", "\t$dst, $a, $b",
+ [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
let Inst{25} = 0;
}
@@ -1667,6 +1683,14 @@ def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
Requires<[IsARM]> {
let Inst{25} = 1;
}
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSCrr : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ DPFrm, IIC_iALUr, "rsc", "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{25} = 0;
+ let Inst{11-4} = 0b00000000;
+}
def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b",
[(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
@@ -1716,24 +1740,26 @@ def : ARMPat<(adde GPR:$src, so_imm_not:$imm),
// ARM Arithmetic Instruction -- for disassembly only
// GPR:$dst = GPR:$a op GPR:$b
-class AAI<bits<8> op27_20, bits<4> op7_4, string opc>
+class AAI<bits<8> op27_20, bits<4> op7_4, string opc,
+ list<dag> pattern = [/* For disassembly only; pattern left blank */]>
: AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr,
- opc, "\t$dst, $a, $b",
- [/* For disassembly only; pattern left blank */]> {
+ opc, "\t$dst, $a, $b", pattern> {
let Inst{27-20} = op27_20;
let Inst{7-4} = op7_4;
}
// Saturating add/subtract -- for disassembly only
-def QADD : AAI<0b00010000, 0b0101, "qadd">;
+def QADD : AAI<0b00010000, 0b0101, "qadd",
+ [(set GPR:$dst, (int_arm_qadd GPR:$a, GPR:$b))]>;
def QADD16 : AAI<0b01100010, 0b0001, "qadd16">;
def QADD8 : AAI<0b01100010, 0b1001, "qadd8">;
def QASX : AAI<0b01100010, 0b0011, "qasx">;
def QDADD : AAI<0b00010100, 0b0101, "qdadd">;
def QDSUB : AAI<0b00010110, 0b0101, "qdsub">;
def QSAX : AAI<0b01100010, 0b0101, "qsax">;
-def QSUB : AAI<0b00010010, 0b0101, "qsub">;
+def QSUB : AAI<0b00010010, 0b0101, "qsub",
+ [(set GPR:$dst, (int_arm_qsub GPR:$a, GPR:$b))]>;
def QSUB16 : AAI<0b01100010, 0b0111, "qsub16">;
def QSUB8 : AAI<0b01100010, 0b1111, "qsub8">;
def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">;
@@ -1793,54 +1819,45 @@ def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
// Signed/Unsigned saturate -- for disassembly only
-def SSATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{27-21} = 0b0110101;
- let Inst{6-4} = 0b001;
-}
-
-def SSATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def SSAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{27-21} = 0b0110101;
- let Inst{6-4} = 0b101;
+ let Inst{5-4} = 0b01;
}
-def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
+def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm,
NoItinerary, "ssat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{27-20} = 0b01101010;
let Inst{7-4} = 0b0011;
}
-def USATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{27-21} = 0b0110111;
- let Inst{6-4} = 0b001;
-}
-
-def USATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def USAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{27-21} = 0b0110111;
- let Inst{6-4} = 0b101;
+ let Inst{5-4} = 0b01;
}
-def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
+def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm,
NoItinerary, "usat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{27-20} = 0b01101110;
let Inst{7-4} = 0b0011;
}
+def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
+
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
//
defm AND : AsI1_bin_irs<0b0000, "and",
BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm ANDS : AI1_bin_s_irs<0b0000, "and",
+ BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>;
defm ORR : AsI1_bin_irs<0b1100, "orr",
BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm EOR : AsI1_bin_irs<0b0001, "eor",
@@ -1858,11 +1875,11 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
}
// A8.6.18 BFI - Bitfield insert (Encoding A1)
-// Added for disassembler with the pattern field purposely left blank.
-def BFI : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+def BFI : I<(outs GPR:$dst), (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm),
AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
- "bfi", "\t$dst, $src, $imm", "",
- [/* For disassembly only; pattern left blank */]>,
+ "bfi", "\t$dst, $val, $imm", "$src = $dst",
+ [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val,
+ bf_inv_mask_imm:$imm))]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-21} = 0b0111110;
let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
@@ -2232,11 +2249,20 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
let Inst{19-16} = 0b1111;
}
+def lsl_shift_imm : SDNodeXForm<imm, [{
+ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
+ return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+
+def lsl_amt : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() < 32);
+}], lsl_shift_imm>;
+
def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
- (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
+ (ins GPR:$src1, GPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
- (and (shl GPR:$src2, (i32 imm:$shamt)),
+ (and (shl GPR:$src2, lsl_amt:$sh),
0xFFFF0000)))]>,
Requires<[IsARM, HasV6]> {
let Inst{6-4} = 0b001;
@@ -2245,26 +2271,37 @@ def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
// Alternate cases for PKHBT where identities eliminate some nodes.
def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
(PKHBT GPR:$src1, GPR:$src2, 0)>;
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
- (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$sh)),
+ (PKHBT GPR:$src1, GPR:$src2, (lsl_shift_imm imm16_31:$sh))>;
+
+def asr_shift_imm : SDNodeXForm<imm, [{
+ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue());
+ return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+def asr_amt : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() <= 32);
+}], asr_shift_imm>;
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
- (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
+ (ins GPR:$src1, GPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
- (and (sra GPR:$src2, imm16_31:$shamt),
- 0xFFFF)))]>, Requires<[IsARM, HasV6]> {
+ (and (sra GPR:$src2, asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[IsARM, HasV6]> {
let Inst{6-4} = 0b101;
}
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))),
- (PKHTB GPR:$src1, GPR:$src2, 16)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)),
+ (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>;
def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
- (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
- (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
+ (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
@@ -2272,8 +2309,52 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
defm CMP : AI1_cmp_irs<0b1010, "cmp",
BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
-//FIXME: Disable CMN, as CCodes are backwards from compare expectations
-// Compare-to-zero still works out, just not the relationals
+
+// FIXME: There seems to be a (potential) hardware bug with the CMN instruction
+// and comparison with 0. These two pieces of code should give identical
+// results:
+//
+// rsbs r1, r1, 0
+// cmp r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// and:
+//
+// cmn r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// However, the CMN gives the *opposite* result when r1 is 0. This is because
+// the carry flag is set in the CMP case but not in the CMN case. In short, the
+// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the
+// value of r0 and the carry bit (because the "carry bit" parameter to
+// AddWithCarry is defined as 1 in this case, the carry flag will always be set
+// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is
+// never a "carry" when this AddWithCarry is performed (because the "carry bit"
+// parameter to AddWithCarry is defined as 0).
+//
+// The AddWithCarry in the CMP case seems to be relying upon the identity:
+//
+// ~x + 1 = -x
+//
+// However when x is 0 and unsigned, this doesn't hold:
+//
+// x = 0
+// ~x = 0xFFFF FFFF
+// ~x + 1 = 0x1 0000 0000
+// (-x = 0) != (0x1 0000 0000 = ~x + 1)
+//
+// Therefore, we should disable *all* versions of CMN, especially when comparing
+// against zero, until we can limit when the CMN instruction is used (when we
+// know that the RHS is not 0) or when we have a hardware fix for this.
+//
+// (See the ARM docs for the "AddWithCarry" pseudo-code.)
+//
+// This is related to <rdar://problem/7569620>.
+//
//defm CMN : AI1_cmp_irs<0b1011, "cmn",
// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
@@ -2298,8 +2379,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
Defs = [CPSR] in {
def BCCi64 : PseudoInst<(outs),
- (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
- IIC_Br,
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+ IIC_Br,
"${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc",
[(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
@@ -2346,102 +2427,63 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
-def Int_MemBarrierV7 : AInoP<(outs), (ins),
- Pseudo, NoItinerary,
- "dmb", "",
- [(ARMMemBarrierV7)]>,
- Requires<[IsARM, HasV7]> {
+def DMBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dmb", "",
+ [(ARMMemBarrier)]>, Requires<[IsARM, HasDB]> {
let Inst{31-4} = 0xf57ff05;
// FIXME: add support for options other than a full system DMB
// See DMB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
-def Int_SyncBarrierV7 : AInoP<(outs), (ins),
- Pseudo, NoItinerary,
- "dsb", "",
- [(ARMSyncBarrierV7)]>,
- Requires<[IsARM, HasV7]> {
+def DSBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dsb", "",
+ [(ARMSyncBarrier)]>, Requires<[IsARM, HasDB]> {
let Inst{31-4} = 0xf57ff04;
// FIXME: add support for options other than a full system DSB
// See DSB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
-def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero),
- Pseudo, NoItinerary,
+def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
"mcr", "\tp15, 0, $zero, c7, c10, 5",
- [(ARMMemBarrierV6 GPR:$zero)]>,
+ [(ARMMemBarrierMCR GPR:$zero)]>,
Requires<[IsARM, HasV6]> {
// FIXME: add support for options other than a full system DMB
// FIXME: add encoding
}
-def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero),
- Pseudo, NoItinerary,
+def DSB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
"mcr", "\tp15, 0, $zero, c7, c10, 4",
- [(ARMSyncBarrierV6 GPR:$zero)]>,
+ [(ARMSyncBarrierMCR GPR:$zero)]>,
Requires<[IsARM, HasV6]> {
// FIXME: add support for options other than a full system DSB
// FIXME: add encoding
}
}
-// Helper class for multiclass MemB -- for disassembly only
-class AMBI<string opc, string asm>
- : AInoP<(outs), (ins), MiscFrm, NoItinerary, opc, asm,
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV7]> {
- let Inst{31-20} = 0xf57;
-}
-
-multiclass MemB<bits<4> op7_4, string opc> {
-
- def st : AMBI<opc, "\tst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1110;
- }
-
- def ish : AMBI<opc, "\tish"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1011;
- }
-
- def ishst : AMBI<opc, "\tishst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1010;
- }
-
- def nsh : AMBI<opc, "\tnsh"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0111;
- }
-
- def nshst : AMBI<opc, "\tnshst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0110;
- }
+// Memory Barrier Operations Variants -- for disassembly only
- def osh : AMBI<opc, "\tosh"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0011;
- }
+def memb_opt : Operand<i32> {
+ let PrintMethod = "printMemBOption";
+}
- def oshst : AMBI<opc, "\toshst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0010;
- }
+class AMBI<bits<4> op7_4, string opc>
+ : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, opc, "\t$opt",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasDB]> {
+ let Inst{31-8} = 0xf57ff0;
+ let Inst{7-4} = op7_4;
}
// These DMB variants are for disassembly only.
-defm DMB : MemB<0b0101, "dmb">;
+def DMBvar : AMBI<0b0101, "dmb">;
// These DSB variants are for disassembly only.
-defm DSB : MemB<0b0100, "dsb">;
+def DSBvar : AMBI<0b0100, "dsb">;
// ISB has only full system option -- for disassembly only
-def ISBsy : AMBI<"isb", ""> {
- let Inst{7-4} = 0b0110;
+def ISBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>,
+ Requires<[IsARM, HasDB]> {
+ let Inst{31-4} = 0xf57ff06;
let Inst{3-0} = 0b1111;
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 7f7eb980abe8..4d2f1169061f 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -93,6 +93,11 @@ def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
+def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+
def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
@@ -100,14 +105,14 @@ def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
- unsigned EltBits;
+ unsigned EltBits = 0;
uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 32 && EltVal == 0);
}]>;
def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
- unsigned EltBits;
+ unsigned EltBits = 0;
uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 8 && EltVal == 0xff);
}]>;
@@ -124,15 +129,16 @@ def nModImm : Operand<i32> {
// NEON load / store instructions
//===----------------------------------------------------------------------===//
-let mayLoad = 1, neverHasSideEffects = 1 in {
// Use vldmia to load a Q register as a D register pair.
// This is equivalent to VLDMD except that it has a Q register operand
// instead of a pair of D registers.
def VLDMQ
- : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p),
+ : AXDI4<(outs QPR:$dst), (ins addrmode4:$addr, pred:$p),
IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>;
+ "vldm${addr:submode}${p}\t$addr, ${dst:dregpair}", "",
+ [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]>;
+let mayLoad = 1, neverHasSideEffects = 1 in {
// Use vld1 to load a Q register as a D register pair.
// This alternative to VLDMQ allows an alignment to be specified.
// This is equivalent to VLD1q64 except that it has a Q register operand.
@@ -141,15 +147,16 @@ def VLD1q
IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
} // mayLoad = 1, neverHasSideEffects = 1
-let mayStore = 1, neverHasSideEffects = 1 in {
// Use vstmia to store a Q register as a D register pair.
// This is equivalent to VSTMD except that it has a Q register operand
// instead of a pair of D registers.
def VSTMQ
- : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p),
+ : AXDI4<(outs), (ins QPR:$src, addrmode4:$addr, pred:$p),
IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>;
+ "vstm${addr:submode}${p}\t$addr, ${src:dregpair}", "",
+ [(store (v2f64 QPR:$src), addrmode4:$addr)]>;
+let mayStore = 1, neverHasSideEffects = 1 in {
// Use vst1 to store a Q register as a D register pair.
// This alternative to VSTMQ allows an alignment to be specified.
// This is equivalent to VST1q64 except that it has a Q register operand.
@@ -160,6 +167,25 @@ def VST1q
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Classes for VLD* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQPseudo
+ : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
+class VLDQWBPseudo
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VST,
+ "$addr.addr = $wb">;
+class VLDQQPseudo
+ : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
+class VLDQQWBPseudo
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VST,
+ "$addr.addr = $wb">;
+class VLDQQQQWBPseudo
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
+ "$addr.addr = $wb, $src = $dst">;
+
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
@@ -180,6 +206,11 @@ def VLD1q16 : VLD1Q<0b0100, "16">;
def VLD1q32 : VLD1Q<0b1000, "32">;
def VLD1q64 : VLD1Q<0b1100, "64">;
+def VLD1q8Pseudo : VLDQPseudo;
+def VLD1q16Pseudo : VLDQPseudo;
+def VLD1q32Pseudo : VLDQPseudo;
+def VLD1q64Pseudo : VLDQPseudo;
+
// ...with address register writeback:
class VLD1DWB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb),
@@ -202,6 +233,11 @@ def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
+def VLD1q8Pseudo_UPD : VLDQWBPseudo;
+def VLD1q16Pseudo_UPD : VLDQWBPseudo;
+def VLD1q32Pseudo_UPD : VLDQWBPseudo;
+def VLD1q64Pseudo_UPD : VLDQWBPseudo;
+
// ...with 3 registers (some of these are only for the disassembler):
class VLD1D3<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
@@ -222,6 +258,9 @@ def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">;
+def VLD1d64TPseudo : VLDQQPseudo;
+def VLD1d64TPseudo_UPD : VLDQQWBPseudo;
+
// ...with 4 registers (some of these are only for the disassembler):
class VLD1D4<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
@@ -244,6 +283,9 @@ def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">;
def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">;
+def VLD1d64QPseudo : VLDQQPseudo;
+def VLD1d64QPseudo_UPD : VLDQQWBPseudo;
+
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
@@ -263,6 +305,14 @@ def VLD2q8 : VLD2Q<0b0000, "8">;
def VLD2q16 : VLD2Q<0b0100, "16">;
def VLD2q32 : VLD2Q<0b1000, "32">;
+def VLD2d8Pseudo : VLDQPseudo;
+def VLD2d16Pseudo : VLDQPseudo;
+def VLD2d32Pseudo : VLDQPseudo;
+
+def VLD2q8Pseudo : VLDQQPseudo;
+def VLD2q16Pseudo : VLDQQPseudo;
+def VLD2q32Pseudo : VLDQQPseudo;
+
// ...with address register writeback:
class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
@@ -284,6 +334,14 @@ def VLD2q8_UPD : VLD2QWB<0b0000, "8">;
def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
+def VLD2d8Pseudo_UPD : VLDQWBPseudo;
+def VLD2d16Pseudo_UPD : VLDQWBPseudo;
+def VLD2d32Pseudo_UPD : VLDQWBPseudo;
+
+def VLD2q8Pseudo_UPD : VLDQQWBPseudo;
+def VLD2q16Pseudo_UPD : VLDQQWBPseudo;
+def VLD2q32Pseudo_UPD : VLDQQWBPseudo;
+
// ...with double-spaced registers (for disassembly only):
def VLD2b8 : VLD2D<0b1001, 0b0000, "8">;
def VLD2b16 : VLD2D<0b1001, 0b0100, "16">;
@@ -302,6 +360,10 @@ def VLD3d8 : VLD3D<0b0100, 0b0000, "8">;
def VLD3d16 : VLD3D<0b0100, 0b0100, "16">;
def VLD3d32 : VLD3D<0b0100, 0b1000, "32">;
+def VLD3d8Pseudo : VLDQQPseudo;
+def VLD3d16Pseudo : VLDQQPseudo;
+def VLD3d32Pseudo : VLDQQPseudo;
+
// ...with address register writeback:
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
@@ -314,6 +376,10 @@ def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">;
def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
+def VLD3d8Pseudo_UPD : VLDQQWBPseudo;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD3q8 : VLD3D<0b0101, 0b0000, "8">;
def VLD3q16 : VLD3D<0b0101, 0b0100, "16">;
@@ -322,10 +388,14 @@ def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">;
def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">;
def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">;
-def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">;
-def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo;
// VLD4 : Vector Load (multiple 4-element structures)
class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -338,6 +408,10 @@ def VLD4d8 : VLD4D<0b0000, 0b0000, "8">;
def VLD4d16 : VLD4D<0b0000, 0b0100, "16">;
def VLD4d32 : VLD4D<0b0000, 0b1000, "32">;
+def VLD4d8Pseudo : VLDQQPseudo;
+def VLD4d16Pseudo : VLDQQPseudo;
+def VLD4d32Pseudo : VLDQQPseudo;
+
// ...with address register writeback:
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
@@ -350,6 +424,10 @@ def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">;
def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD4q8 : VLD4D<0b0001, 0b0000, "8">;
def VLD4q16 : VLD4D<0b0001, 0b0100, "16">;
@@ -358,10 +436,14 @@ def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">;
def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">;
def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">;
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">;
-def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">;
-def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">;
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo;
// VLD1LN : Vector Load (single element to one lane)
// FIXME: Not yet implemented.
@@ -486,6 +568,25 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+// Classes for VST* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQPseudo
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">;
+class VSTQWBPseudo
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
+ "$addr.addr = $wb">;
+class VSTQQPseudo
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">;
+class VSTQQWBPseudo
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST,
+ "$addr.addr = $wb">;
+class VSTQQQQWBPseudo
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
+ "$addr.addr = $wb">;
+
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
@@ -505,6 +606,11 @@ def VST1q16 : VST1Q<0b0100, "16">;
def VST1q32 : VST1Q<0b1000, "32">;
def VST1q64 : VST1Q<0b1100, "64">;
+def VST1q8Pseudo : VSTQPseudo;
+def VST1q16Pseudo : VSTQPseudo;
+def VST1q32Pseudo : VSTQPseudo;
+def VST1q64Pseudo : VSTQPseudo;
+
// ...with address register writeback:
class VST1DWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
@@ -525,6 +631,11 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">;
def VST1q32_UPD : VST1QWB<0b1000, "32">;
def VST1q64_UPD : VST1QWB<0b1100, "64">;
+def VST1q8Pseudo_UPD : VSTQWBPseudo;
+def VST1q16Pseudo_UPD : VSTQWBPseudo;
+def VST1q32Pseudo_UPD : VSTQWBPseudo;
+def VST1q64Pseudo_UPD : VSTQWBPseudo;
+
// ...with 3 registers (some of these are only for the disassembler):
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
@@ -547,6 +658,9 @@ def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
def VST1d64T_UPD : VST1D3WB<0b1100, "64">;
+def VST1d64TPseudo : VSTQQPseudo;
+def VST1d64TPseudo_UPD : VSTQQWBPseudo;
+
// ...with 4 registers (some of these are only for the disassembler):
class VST1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
@@ -570,6 +684,9 @@ def VST1d16Q_UPD : VST1D4WB<0b0100, "16">;
def VST1d32Q_UPD : VST1D4WB<0b1000, "32">;
def VST1d64Q_UPD : VST1D4WB<0b1100, "64">;
+def VST1d64QPseudo : VSTQQPseudo;
+def VST1d64QPseudo_UPD : VSTQQWBPseudo;
+
// VST2 : Vector Store (multiple 2-element structures)
class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
@@ -589,6 +706,14 @@ def VST2q8 : VST2Q<0b0000, "8">;
def VST2q16 : VST2Q<0b0100, "16">;
def VST2q32 : VST2Q<0b1000, "32">;
+def VST2d8Pseudo : VSTQPseudo;
+def VST2d16Pseudo : VSTQPseudo;
+def VST2d32Pseudo : VSTQPseudo;
+
+def VST2q8Pseudo : VSTQQPseudo;
+def VST2q16Pseudo : VSTQQPseudo;
+def VST2q32Pseudo : VSTQQPseudo;
+
// ...with address register writeback:
class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -610,6 +735,14 @@ def VST2q8_UPD : VST2QWB<0b0000, "8">;
def VST2q16_UPD : VST2QWB<0b0100, "16">;
def VST2q32_UPD : VST2QWB<0b1000, "32">;
+def VST2d8Pseudo_UPD : VSTQWBPseudo;
+def VST2d16Pseudo_UPD : VSTQWBPseudo;
+def VST2d32Pseudo_UPD : VSTQWBPseudo;
+
+def VST2q8Pseudo_UPD : VSTQQWBPseudo;
+def VST2q16Pseudo_UPD : VSTQQWBPseudo;
+def VST2q32Pseudo_UPD : VSTQQWBPseudo;
+
// ...with double-spaced registers (for disassembly only):
def VST2b8 : VST2D<0b1001, 0b0000, "8">;
def VST2b16 : VST2D<0b1001, 0b0100, "16">;
@@ -628,6 +761,10 @@ def VST3d8 : VST3D<0b0100, 0b0000, "8">;
def VST3d16 : VST3D<0b0100, 0b0100, "16">;
def VST3d32 : VST3D<0b0100, 0b1000, "32">;
+def VST3d8Pseudo : VSTQQPseudo;
+def VST3d16Pseudo : VSTQQPseudo;
+def VST3d32Pseudo : VSTQQPseudo;
+
// ...with address register writeback:
class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -640,6 +777,10 @@ def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">;
def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
+def VST3d8Pseudo_UPD : VSTQQWBPseudo;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VST3q8 : VST3D<0b0101, 0b0000, "8">;
def VST3q16 : VST3D<0b0101, 0b0100, "16">;
@@ -648,10 +789,14 @@ def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">;
def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">;
def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">;
+def VST3q8Pseudo_UPD : VSTQQQQWBPseudo;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">;
-def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">;
-def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">;
+def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo;
// VST4 : Vector Store (multiple 4-element structures)
class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -664,6 +809,10 @@ def VST4d8 : VST4D<0b0000, 0b0000, "8">;
def VST4d16 : VST4D<0b0000, 0b0100, "16">;
def VST4d32 : VST4D<0b0000, 0b1000, "32">;
+def VST4d8Pseudo : VSTQQPseudo;
+def VST4d16Pseudo : VSTQQPseudo;
+def VST4d32Pseudo : VSTQQPseudo;
+
// ...with address register writeback:
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -676,6 +825,10 @@ def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">;
def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
+def VST4d8Pseudo_UPD : VSTQQWBPseudo;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VST4q8 : VST4D<0b0001, 0b0000, "8">;
def VST4q16 : VST4D<0b0001, 0b0100, "16">;
@@ -684,10 +837,14 @@ def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">;
def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
+def VST4q8Pseudo_UPD : VSTQQQQWBPseudo;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">;
-def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">;
-def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">;
+def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
// VST1LN : Vector Store (single element from one lane)
// FIXME: Not yet implemented.
@@ -879,6 +1036,15 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
+// Narrow 2-register operations.
+class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
+ (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
+ [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>;
+
// Narrow 2-register intrinsics.
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -888,14 +1054,14 @@ class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>;
-// Long 2-register intrinsics (currently only used for VMOVL).
-class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+// Long 2-register operations (currently only used for VMOVL).
+class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst),
(ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>;
+ [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>;
// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
@@ -1150,6 +1316,24 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
(ResTy (NEONvduplane (OpTy DPR_8:$src3),
imm:$lane)))))))]>;
+// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
+class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set DPR:$dst, (Ty (OpNode DPR:$src1,
+ (Ty (IntOp (Ty DPR:$src2), (Ty DPR:$src3))))))]>;
+class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set QPR:$dst, (Ty (OpNode QPR:$src1,
+ (Ty (IntOp (Ty QPR:$src2), (Ty QPR:$src3))))))]>;
+
// Neon 3-argument intrinsics, both double- and quad-register.
// The destination register is also used as the first source operand register.
class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -1169,6 +1353,53 @@ class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
(OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
+// Long Multiply-Add/Sub operations.
+class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$src2),
+ (TyD DPR:$src3)))))]>;
+class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst),
+ (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
+ [(set QPR:$dst,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$src2),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$src3),
+ imm:$lane))))))]>;
+class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst),
+ (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
+ [(set QPR:$dst,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$src2),
+ (TyD (NEONvduplane (TyD DPR_8:$src3),
+ imm:$lane))))))]>;
+
+// Long Intrinsic-Op vector operations with explicit extend (VABAL).
+class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src2),
+ (TyD DPR:$src3)))))))]>;
+
// Neon Long 3-argument intrinsic. The destination register is
// a quad-register and is also used as the first source operand register.
class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -1217,6 +1448,61 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
let isCommutable = Commutable;
}
+// Long 3-register operations.
+class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src1), (TyD DPR:$src2))))]> {
+ let isCommutable = Commutable;
+}
+class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ [(set QPR:$dst,
+ (TyQ (OpNode (TyD DPR:$src1),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$src2),imm:$lane)))))]>;
+class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ [(set QPR:$dst,
+ (TyQ (OpNode (TyD DPR:$src1),
+ (TyD (NEONvduplane (TyD DPR_8:$src2), imm:$lane)))))]>;
+
+// Long 3-register operations with explicitly extended operands.
+class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (OpNode (TyQ (ExtOp (TyD DPR:$src1))),
+ (TyQ (ExtOp (TyD DPR:$src2)))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics with explicit extend (VABDL).
+class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src1),
+ (TyD DPR:$src2))))))]> {
+ let isCommutable = Commutable;
+}
+
// Long 3-register intrinsics.
class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
@@ -1248,14 +1534,15 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
(OpTy (NEONvduplane (OpTy DPR_8:$src2),
imm:$lane)))))]>;
-// Wide 3-register intrinsics.
-class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
- Intrinsic IntOp, bit Commutable>
+// Wide 3-register operations.
+class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
+ SDNode OpNode, SDNode ExtOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> {
+ [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD DPR:$src2)))))]> {
let isCommutable = Commutable;
}
@@ -1488,6 +1775,23 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
}
+// Neon Narrowing 2-register vector operations,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, OpNode>;
+ def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, OpNode>;
+ def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, OpNode>;
+}
+
// Neon Narrowing 2-register vector intrinsics,
// source operand element sizes of 16, 32 and 64 bits:
multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
@@ -1508,14 +1812,14 @@ multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
// source operand element sizes of 16, 32 and 64 bits:
-multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
- def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
- def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
- def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
+ def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
}
@@ -1607,6 +1911,47 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
}
+// Neon Long 3-register vector operations.
+
+multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, Commutable>;
+ def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, Commutable>;
+ def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, Commutable>;
+}
+
+multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
// Neon Long 3-register vector intrinsics.
// First with only element sizes of 16 and 32 bits:
@@ -1643,21 +1988,36 @@ multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
v8i16, v8i8, IntOp, Commutable>;
}
+// ....with explicit extend (VABDL).
+multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, ExtOp, Commutable>;
+ def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, ExtOp, Commutable>;
+ def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, ExtOp, Commutable>;
+}
+
// Neon Wide 3-register vector intrinsics,
// source operand element sizes of 8, 16 and 32 bits:
-multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
- string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0> {
- def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "8"),
- v8i16, v8i8, IntOp, Commutable>;
- def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "16"),
- v4i32, v4i16, IntOp, Commutable>;
- def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "32"),
- v2i64, v2i32, IntOp, Commutable>;
+multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
}
@@ -1700,6 +2060,29 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8,
mul, ShOp>;
}
+// Neon Intrinsic-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
+ def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
+ def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
+ def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
+ def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
+}
+
// Neon 3-argument intrinsics,
// element sizes of 8, 16 and 32 bits:
multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
@@ -1723,6 +2106,29 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
}
+// Neon Long Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, SDNode MulOp,
+ SDNode OpNode> {
+ def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
+ def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
+ string Dt, SDNode MulOp, SDNode OpNode> {
+ def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
+ !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+
// Neon Long 3-argument intrinsics.
// First with only element sizes of 16 and 32 bits:
@@ -1752,6 +2158,21 @@ multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
}
+// ....with explicit extend (VABAL).
+multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
+ def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
+ IntOp, ExtOp, OpNode>;
+ def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
+ IntOp, ExtOp, OpNode>;
+ def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
+ IntOp, ExtOp, OpNode>;
+}
+
// Neon 2-register vector intrinsics,
// element sizes of 8, 16 and 32 bits:
@@ -1996,13 +2417,13 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
v4f32, v4f32, fadd, 1>;
// VADDL : Vector Add Long (Q = D + D)
-defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
- "vaddl", "s", int_arm_neon_vaddls, 1>;
-defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
- "vaddl", "u", int_arm_neon_vaddlu, 1>;
+defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "s", add, sext, 1>;
+defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "u", add, zext, 1>;
// VADDW : Vector Add Wide (Q = Q + D)
-defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>;
-defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>;
+defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
+defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
// VHADD : Vector Halving Add
defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
@@ -2113,16 +2534,14 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "s", int_arm_neon_vmulls, 1>;
-defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "u", int_arm_neon_vmullu, 1>;
+defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "s", NEONvmulls, 1>;
+defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "u", NEONvmullu, 1>;
def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
v8i16, v8i8, int_arm_neon_vmullp, 1>;
-defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s",
- int_arm_neon_vmulls>;
-defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u",
- int_arm_neon_vmullu>;
+defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
+defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
@@ -2172,13 +2591,13 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
-defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlal", "s", int_arm_neon_vmlals>;
-defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlal", "u", int_arm_neon_vmlalu>;
+defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "s", NEONvmulls, add>;
+defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "u", NEONvmullu, add>;
-defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>;
-defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>;
+defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
+defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -2224,13 +2643,13 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
-defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlsl", "s", int_arm_neon_vmlsls>;
-defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlsl", "u", int_arm_neon_vmlslu>;
+defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "u", NEONvmullu, sub>;
-defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>;
-defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>;
+defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -2247,13 +2666,13 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
v4f32, v4f32, fsub, 0>;
// VSUBL : Vector Subtract Long (Q = D - D)
-defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
- "vsubl", "s", int_arm_neon_vsubls, 1>;
-defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
- "vsubl", "u", int_arm_neon_vsublu, 1>;
+defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "s", sub, sext, 0>;
+defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "u", sub, zext, 0>;
// VSUBW : Vector Subtract Wide (Q = Q - D)
-defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>;
-defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>;
+defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
+defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
// VHSUB : Vector Halving Subtract
defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
@@ -2469,32 +2888,32 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
// VABD : Vector Absolute Difference
defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabd", "s", int_arm_neon_vabds, 0>;
+ "vabd", "s", int_arm_neon_vabds, 1>;
defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabd", "u", int_arm_neon_vabdu, 0>;
+ "vabd", "u", int_arm_neon_vabdu, 1>;
def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
- "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
+ "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
- "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
+ "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
-defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabdl", "s", int_arm_neon_vabdls, 0>;
-defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabdl", "u", int_arm_neon_vabdlu, 0>;
+defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "s", int_arm_neon_vabds, zext, 1>;
+defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
// VABA : Vector Absolute Difference and Accumulate
-defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
- "vaba", "s", int_arm_neon_vabas>;
-defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
- "vaba", "u", int_arm_neon_vabau>;
+defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "s", int_arm_neon_vabds, add>;
+defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "u", int_arm_neon_vabdu, add>;
// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
-defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD,
- "vabal", "s", int_arm_neon_vabals>;
-defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD,
- "vabal", "u", int_arm_neon_vabalu>;
+defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
+ "vabal", "s", int_arm_neon_vabds, zext, add>;
+defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
+ "vabal", "u", int_arm_neon_vabdu, zext, add>;
// Vector Maximum and Minimum.
@@ -3113,8 +3532,8 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
[(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
// VMOVN : Vector Narrowing Move
-defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
- "vmovn", "i", int_arm_neon_vmovn>;
+defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
+ "vmovn", "i", trunc>;
// VQMOVN : Vector Saturating Narrowing Move
defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
"vqmovn", "s", int_arm_neon_vqmovns>;
@@ -3123,10 +3542,8 @@ defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
"vqmovun", "s", int_arm_neon_vqmovnsu>;
// VMOVL : Vector Lengthening Move
-defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s",
- int_arm_neon_vmovls>;
-defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u",
- int_arm_neon_vmovlu>;
+defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
+defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
// Vector Conversions.
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index bc0790dccbb5..a13ff1232749 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -221,9 +221,13 @@ def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi,
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
// ADD rd, sp, #imm8
+// This is rematerializable, which is particularly useful for taking the
+// address of locals.
+let isReMaterializable = 1 in {
def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
"add\t$dst, $sp, $rhs", []>,
T1Encoding<{1,0,1,0,1,?}>; // A6.2 & A8.6.8
+}
// ADD sp, sp, #imm7
def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
@@ -251,19 +255,6 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
let Inst{2-0} = 0b101;
}
-// Pseudo instruction that will expand into a tSUBspi + a copy.
-let usesCustomInserter = 1 in { // Expanded after instruction selection.
-def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs),
- NoItinerary, "${:comment} sub\t$dst, $rhs", []>;
-
-def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
- NoItinerary, "${:comment} add\t$dst, $rhs", []>;
-
-let Defs = [CPSR] in
-def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
- NoItinerary, "${:comment} and\t$dst, $rhs", []>;
-} // usesCustomInserter
-
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
//
@@ -378,7 +369,7 @@ let isBranch = 1, isTerminator = 1 in {
def tBR_JTr : T1JTI<(outs),
(ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target\n\t.align\t2\n$jt",
+ IIC_Br, "mov\tpc, $target\n\t.align\t2$jt",
[(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>,
Encoding16 {
let Inst{15-7} = 0b010001101;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index bbe675e81ab1..6ba0a44be470 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -32,7 +32,7 @@ def t2_so_reg : Operand<i32>, // reg imm
ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
[shl,srl,sra,rotr]> {
let PrintMethod = "printT2SOOperand";
- let MIOperandInfo = (ops GPR, i32imm);
+ let MIOperandInfo = (ops rGPR, i32imm);
}
// t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value
@@ -51,10 +51,7 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
// represented in the imm field in the same 12-bit form that they are encoded
// into t2_so_imm instructions: the 8-bit immediate is the least significant
// bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
-def t2_so_imm : Operand<i32>,
- PatLeaf<(imm), [{
- return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1;
-}]>;
+def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]>;
// t2_so_imm_not - Match an immediate that is a complement
// of a t2_so_imm.
@@ -162,7 +159,7 @@ def t2am_imm8s4_offset : Operand<i32> {
def t2addrmode_so_reg : Operand<i32>,
ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
let PrintMethod = "printT2AddrModeSoRegOperand";
- let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+ let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
}
@@ -176,9 +173,9 @@ def t2addrmode_so_reg : Operand<i32>,
multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Cheap = 0, bit ReMat = 0> {
// shifted imm
- def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
+ def i : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
opc, "\t$dst, $src",
- [(set GPR:$dst, (opnode t2_so_imm:$src))]> {
+ [(set rGPR:$dst, (opnode t2_so_imm:$src))]> {
let isAsCheapAsAMove = Cheap;
let isReMaterializable = ReMat;
let Inst{31-27} = 0b11110;
@@ -189,9 +186,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def r : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
+ def r : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVr,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]> {
+ [(set rGPR:$dst, (opnode rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -202,9 +199,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def s : T2sI<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
+ def s : T2sI<(outs rGPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode t2_so_reg:$src))]> {
+ [(set rGPR:$dst, (opnode t2_so_reg:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -217,11 +214,11 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
/// binary operation that produces a value. These are predicable and can be
/// changed to modify CPSR.
multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0, string wide =""> {
+ bit Commutable = 0, string wide = ""> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -229,9 +226,9 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -242,9 +239,9 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -259,23 +256,35 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode,
T2I_bin_irs<opcod, opc, opnode, Commutable, ".w">;
/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
-/// reversed. It doesn't define the 'rr' form since it's handled by its
-/// T2I_bin_irs counterpart.
-multiclass T2I_rbin_is<bits<4> opcod, string opc, PatFrag opnode> {
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the T2I_bin_irs counterpart.
+multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
opc, ".w\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
let Inst{20} = ?; // The S bit.
let Inst{15} = 0;
}
+ // register
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, rGPR:$lhs), IIC_iALUr,
+ opc, "\t$dst, $rhs, $lhs",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = ?; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
opc, "\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -289,9 +298,9 @@ let Defs = [CPSR] in {
multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -299,9 +308,9 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2I<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -312,9 +321,9 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -328,9 +337,12 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24} = 1;
@@ -338,10 +350,11 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{20} = 0; // The S bit.
let Inst{15} = 0;
}
+ }
// 12-bit imm
- def ri12 : T2I<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi,
+ def ri12 : T2I<(outs rGPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi,
!strconcat(opc, "w"), "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24} = 0;
@@ -350,9 +363,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -364,9 +377,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24} = 1;
@@ -382,9 +395,9 @@ let Uses = [CPSR] in {
multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -393,9 +406,9 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>,
Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
@@ -407,9 +420,9 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -423,9 +436,9 @@ let Defs = [CPSR] in {
multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -434,9 +447,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>,
Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
@@ -448,9 +461,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -461,13 +474,14 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
}
}
-/// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit.
+/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
+/// version is not needed since this is only for codegen.
let Defs = [CPSR] in {
multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+ def ri : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
!strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -475,9 +489,9 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{15} = 0;
}
// shifted register
- def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+ def rs : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
!strconcat(opc, "s"), "\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -490,18 +504,18 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// rotate operation that produces a value.
multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
// 5-bit imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, imm1_31:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-21} = 0b010010;
let Inst{19-16} = 0b1111; // Rn
let Inst{5-4} = opcod;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iMOVsr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iMOVsr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-21} = opcod;
@@ -513,7 +527,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
/// patterns. Similar to T2I_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
-let Defs = [CPSR] in {
+let isCompare = 1, Defs = [CPSR] in {
multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi,
@@ -527,9 +541,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{11-8} = 0b1111; // Rd
}
// register
- def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr,
+ def rr : T2I<(outs), (ins GPR:$lhs, rGPR:$rhs), IIC_iCMPr,
opc, ".w\t$lhs, $rhs",
- [(opnode GPR:$lhs, GPR:$rhs)]> {
+ [(opnode GPR:$lhs, rGPR:$rhs)]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -639,9 +653,9 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
/// T2I_unary_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]> {
+ [(set rGPR:$dst, (opnode rGPR:$src))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -650,9 +664,9 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, ".w\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
+ [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -665,9 +679,9 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
opc, "\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]>,
+ [(set rGPR:$dst, (opnode rGPR:$src))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -677,9 +691,9 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, "\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
+ [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -694,7 +708,7 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
// supported yet.
multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
- def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
opc, "\t$dst, $src", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -704,7 +718,7 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, "\t$dst, $src, ror $rot", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -719,9 +733,9 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
/// T2I_bin_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
+ def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr,
opc, "\t$dst, $LHS, $RHS",
- [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+ [(set rGPR:$dst, (opnode rGPR:$LHS, rGPR:$RHS))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -730,10 +744,10 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot),
IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot",
- [(set GPR:$dst, (opnode GPR:$LHS,
- (rotr GPR:$RHS, rot_imm:$rot)))]>,
+ [(set rGPR:$dst, (opnode rGPR:$LHS,
+ (rotr rGPR:$RHS, rot_imm:$rot)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -747,7 +761,7 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
// DO variant - disassembly only, no pattern
multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
- def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
+ def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr,
opc, "\t$dst, $LHS, $RHS", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -756,7 +770,7 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot),
IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -779,8 +793,8 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
// assembler.
let neverHasSideEffects = 1 in {
let isReMaterializable = 1 in
-def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
- "adr$p.w\t$dst, #$label", []> {
+def t2LEApcrel : T2XI<(outs rGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
+ "adr${p}.w\t$dst, #$label", []> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -790,9 +804,9 @@ def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
let Inst{15} = 0;
}
} // neverHasSideEffects
-def t2LEApcrelJT : T2XI<(outs GPR:$dst),
+def t2LEApcrelJT : T2XI<(outs rGPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi,
- "adr$p.w\t$dst, #${label}_${id}", []> {
+ "adr${p}.w\t$dst, #${label}_${id}", []> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -866,9 +880,9 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
}
// Signed and unsigned division on v7-M
-def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
+def t2SDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
"sdiv", "\t$dst, $a, $b",
- [(set GPR:$dst, (sdiv GPR:$a, GPR:$b))]>,
+ [(set rGPR:$dst, (sdiv rGPR:$a, rGPR:$b))]>,
Requires<[HasDivide]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011100;
@@ -877,9 +891,9 @@ def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
let Inst{7-4} = 0b1111;
}
-def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
+def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
"udiv", "\t$dst, $a, $b",
- [(set GPR:$dst, (udiv GPR:$a, GPR:$b))]>,
+ [(set rGPR:$dst, (udiv rGPR:$a, rGPR:$b))]>,
Requires<[HasDivide]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011101;
@@ -888,17 +902,6 @@ def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
let Inst{7-4} = 0b1111;
}
-// Pseudo instruction that will expand into a t2SUBrSPi + a copy.
-let usesCustomInserter = 1 in { // Expanded after instruction selection.
-def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
- NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>;
-def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
- NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>;
-def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
- NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>;
-} // usesCustomInserter
-
-
//===----------------------------------------------------------------------===//
// Load / store Instructions.
//
@@ -917,10 +920,10 @@ defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
-def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
+def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2),
(ins t2addrmode_imm8s4:$addr),
IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>;
-def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
+def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2),
(ins i32imm:$addr), IIC_iLoadi,
"ldrd", "\t$dst1, $addr", []> {
let Inst{19-16} = 0b1111; // Rn
@@ -967,6 +970,11 @@ def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
(t2LDRHpci tconstpool:$addr)>;
+// FIXME: The destination register of the loads and stores can't be PC, but
+// can be SP. We need another regclass (similar to rGPR) to represent
+// that. Not a pressing issue since these are selected manually,
+// not via pattern.
+
// Indexed loads
let mayLoad = 1, neverHasSideEffects = 1 in {
def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb),
@@ -1286,9 +1294,9 @@ def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in
-def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
+def t2MOVi : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
"mov", ".w\t$dst, $src",
- [(set GPR:$dst, t2_so_imm:$src)]> {
+ [(set rGPR:$dst, t2_so_imm:$src)]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = 0b0010;
@@ -1298,9 +1306,9 @@ def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
+def t2MOVi16 : T2I<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
"movw", "\t$dst, $src",
- [(set GPR:$dst, imm0_65535:$src)]> {
+ [(set rGPR:$dst, imm0_65535:$src)]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0010;
@@ -1309,10 +1317,10 @@ def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
}
let Constraints = "$src = $dst" in
-def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi,
+def t2MOVTi16 : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$imm), IIC_iMOVi,
"movt", "\t$dst, $imm",
- [(set GPR:$dst,
- (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]> {
+ [(set rGPR:$dst,
+ (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0110;
@@ -1320,7 +1328,7 @@ def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi,
let Inst{15} = 0;
}
-def : T2Pat<(or GPR:$src, 0xffff0000), (t2MOVTi16 GPR:$src, 0xffff)>;
+def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
//===----------------------------------------------------------------------===//
// Extend Instructions.
@@ -1352,10 +1360,14 @@ defm t2UXTH : T2I_unary_rrot<0b001, "uxth",
defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16",
UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
-def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot GPR:$Src, 24)>, Requires<[HasT2ExtractPack]>;
-def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot GPR:$Src, 8)>, Requires<[HasT2ExtractPack]>;
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
+// (t2UXTB16r_rot rGPR:$Src, 24)>, Requires<[HasT2ExtractPack]>;
+def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
+ (t2UXTB16r_rot rGPR:$Src, 8)>, Requires<[HasT2ExtractPack]>;
defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
@@ -1389,7 +1401,7 @@ defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc",
BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
// RSB
-defm t2RSB : T2I_rbin_is <0b1110, "rsb",
+defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
BinOpFrag<(sub node:$LHS, node:$RHS)>>;
defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
@@ -1409,18 +1421,18 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
(t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
let AddedComplexity = 1 in
-def : T2Pat<(addc GPR:$src, imm0_255_neg:$imm),
- (t2SUBSri GPR:$src, imm0_255_neg:$imm)>;
-def : T2Pat<(addc GPR:$src, t2_so_imm_neg:$imm),
- (t2SUBSri GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(addc rGPR:$src, imm0_255_neg:$imm),
+ (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
let AddedComplexity = 1 in
-def : T2Pat<(adde GPR:$src, imm0_255_not:$imm),
- (t2SBCSri GPR:$src, imm0_255_not:$imm)>;
-def : T2Pat<(adde GPR:$src, t2_so_imm_not:$imm),
- (t2SBCSri GPR:$src, t2_so_imm_not:$imm)>;
+def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm),
+ (t2SBCSri rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm),
+ (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>;
// Select Bytes -- for disassembly only
@@ -1437,9 +1449,10 @@ def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel",
// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
// And Miscellaneous operations -- for disassembly only
-class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc>
- : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, opc,
- "\t$dst, $a, $b", [/* For disassembly only; pattern left blank */]> {
+class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
+ list<dag> pat = [/* For disassembly only; pattern left blank */]>
+ : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), NoItinerary, opc,
+ "\t$dst, $a, $b", pat> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0101;
let Inst{22-20} = op22_20;
@@ -1449,14 +1462,16 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc>
// Saturating add/subtract -- for disassembly only
-def t2QADD : T2I_pam<0b000, 0b1000, "qadd">;
+def t2QADD : T2I_pam<0b000, 0b1000, "qadd",
+ [(set rGPR:$dst, (int_arm_qadd rGPR:$a, rGPR:$b))]>;
def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">;
def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">;
def t2QASX : T2I_pam<0b010, 0b0001, "qasx">;
def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd">;
def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub">;
def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">;
-def t2QSUB : T2I_pam<0b000, 0b1010, "qsub">;
+def t2QSUB : T2I_pam<0b000, 0b1010, "qsub",
+ [(set rGPR:$dst, (int_arm_qsub rGPR:$a, rGPR:$b))]>;
def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">;
def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">;
def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
@@ -1498,37 +1513,27 @@ def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">;
// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
-def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b),
NoItinerary, "usad8", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), NoItinerary, "usada8",
+def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), NoItinerary, "usada8",
"\t$dst, $a, $b, $acc", []>;
// Signed/Unsigned saturate -- for disassembly only
-def t2SSATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1100;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 0; // sh = '0'
-}
-
-def t2SSATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def t2SSAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
+ NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
let Inst{20} = 0;
let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
}
-def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
+def t2SSAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
"ssat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
@@ -1540,27 +1545,16 @@ def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
let Inst{7-6} = 0b00; // imm2 = '00'
}
-def t2USATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1110;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 0; // sh = '0'
-}
-
-def t2USATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def t2USAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
+ NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1110;
let Inst{20} = 0;
let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
}
-def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
+def t2USAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
"usat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
@@ -1572,6 +1566,9 @@ def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
let Inst{7-6} = 0b00; // imm2 = '00'
}
+def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
+
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
//
@@ -1582,9 +1579,9 @@ defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>;
defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
let Uses = [CPSR] in {
-def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+def t2MOVrx : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
"rrx", "\t$dst, $src",
- [(set GPR:$dst, (ARMrrx GPR:$src))]> {
+ [(set rGPR:$dst, (ARMrrx rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1596,9 +1593,9 @@ def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
}
let Defs = [CPSR] in {
-def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+def t2MOVsrl_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
"lsrs", ".w\t$dst, $src, #1",
- [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> {
+ [(set rGPR:$dst, (ARMsrl_flag rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1609,9 +1606,9 @@ def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
let Inst{14-12} = 0b000;
let Inst{7-6} = 0b01;
}
-def t2MOVsra_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+def t2MOVsra_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
"asrs", ".w\t$dst, $src, #1",
- [(set GPR:$dst, (ARMsra_flag GPR:$src))]> {
+ [(set rGPR:$dst, (ARMsra_flag rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1638,10 +1635,13 @@ defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+defm t2ANDS : T2I_bin_s_irs<0b0000, "and",
+ BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>;
+
let Constraints = "$src = $dst" in
-def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+def t2BFC : T2I<(outs rGPR:$dst), (ins rGPR:$src, bf_inv_mask_imm:$imm),
IIC_iUNAsi, "bfc", "\t$dst, $imm",
- [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]> {
+ [(set rGPR:$dst, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b10110;
@@ -1649,7 +1649,7 @@ def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
let Inst{15} = 0;
}
-def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
+def t2SBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
@@ -1657,7 +1657,7 @@ def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
let Inst{15} = 0;
}
-def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
+def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
@@ -1666,10 +1666,12 @@ def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
}
// A8.6.18 BFI - Bitfield insert (Encoding T1)
-// Added for disassembler with the pattern field purposely left blank.
-// FIXME: Utilize this instruction in codgen.
-def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
- IIC_iALUi, "bfi", "\t$dst, $src, $lsb, $width", []> {
+let Constraints = "$src = $dst" in
+def t2BFI : T2I<(outs rGPR:$dst),
+ (ins rGPR:$src, rGPR:$val, bf_inv_mask_imm:$imm),
+ IIC_iALUi, "bfi", "\t$dst, $val, $imm",
+ [(set rGPR:$dst, (ARMbfi rGPR:$src, rGPR:$val,
+ bf_inv_mask_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b10110;
@@ -1677,19 +1679,20 @@ def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
}
defm t2ORN : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or node:$LHS,
- (not node:$RHS))>>;
+ (not node:$RHS))>, 0, "">;
// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
let AddedComplexity = 1 in
defm t2MVN : T2I_un_irs <0b0011, "mvn", UnOpFrag<(not node:$Src)>, 1, 1>;
-def : T2Pat<(and GPR:$src, t2_so_imm_not:$imm),
- (t2BICri GPR:$src, t2_so_imm_not:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
+ (t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
-def : T2Pat<(or GPR:$src, t2_so_imm_not:$imm),
- (t2ORNri GPR:$src, t2_so_imm_not:$imm)>,
+def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
+ (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
Requires<[IsThumb2]>;
def : T2Pat<(t2_so_imm_not:$src),
@@ -1699,9 +1702,9 @@ def : T2Pat<(t2_so_imm_not:$src),
// Multiply Instructions.
//
let isCommutable = 1 in
-def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+def t2MUL: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
"mul", "\t$dst, $a, $b",
- [(set GPR:$dst, (mul GPR:$a, GPR:$b))]> {
+ [(set rGPR:$dst, (mul rGPR:$a, rGPR:$b))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -1709,9 +1712,9 @@ def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0000; // Multiply
}
-def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2MLA: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"mla", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]> {
+ [(set rGPR:$dst, (add (mul rGPR:$a, rGPR:$b), rGPR:$c))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -1719,9 +1722,9 @@ def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // Multiply
}
-def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2MLS: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"mls", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]> {
+ [(set rGPR:$dst, (sub rGPR:$c, (mul rGPR:$a, rGPR:$b)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -1732,7 +1735,8 @@ def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
// Extra precision multiplies with low / high results
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
-def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
+def t2SMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
"smull", "\t$ldst, $hdst, $a, $b", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1740,7 +1744,8 @@ def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
let Inst{7-4} = 0b0000;
}
-def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
+def t2UMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
"umull", "\t$ldst, $hdst, $a, $b", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1750,7 +1755,8 @@ def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
} // isCommutable
// Multiply + accumulate
-def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+def t2SMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"smlal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1758,7 +1764,8 @@ def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
let Inst{7-4} = 0b0000;
}
-def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+def t2UMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"umlal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1766,7 +1773,8 @@ def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
let Inst{7-4} = 0b0000;
}
-def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+def t2UMAAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"umaal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1778,9 +1786,9 @@ def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
// Rounding variants of the below included for disassembly only
// Most significant word multiply
-def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+def t2SMMUL : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
"smmul", "\t$dst, $a, $b",
- [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]> {
+ [(set rGPR:$dst, (mulhs rGPR:$a, rGPR:$b))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -1788,7 +1796,7 @@ def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+def t2SMMULR : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
"smmulr", "\t$dst, $a, $b", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -1797,9 +1805,9 @@ def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
}
-def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLA : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmla", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]> {
+ [(set rGPR:$dst, (add (mulhs rGPR:$a, rGPR:$b), rGPR:$c))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -1807,7 +1815,7 @@ def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLAR: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmlar", "\t$dst, $a, $b, $c", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -1816,9 +1824,9 @@ def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
}
-def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLS: T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmls", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]> {
+ [(set rGPR:$dst, (sub rGPR:$c, (mulhs rGPR:$a, rGPR:$b)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
@@ -1826,7 +1834,7 @@ def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLSR:T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmlsr", "\t$dst, $a, $b, $c", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -1836,10 +1844,10 @@ def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
}
multiclass T2I_smul<string opc, PatFrag opnode> {
- def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "bb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
- (sext_inreg GPR:$b, i16)))]> {
+ [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16),
+ (sext_inreg rGPR:$b, i16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1848,10 +1856,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "bt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16))))]> {
+ [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16),
+ (sra rGPR:$b, (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1860,10 +1868,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b01;
}
- def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "tb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
- (sext_inreg GPR:$b, i16)))]> {
+ [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)),
+ (sext_inreg rGPR:$b, i16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1872,10 +1880,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b10;
}
- def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "tt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
- (sra GPR:$b, (i32 16))))]> {
+ [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)),
+ (sra rGPR:$b, (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1884,10 +1892,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b11;
}
- def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16,
+ def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
!strconcat(opc, "wb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (sra (opnode GPR:$a,
- (sext_inreg GPR:$b, i16)), (i32 16)))]> {
+ [(set rGPR:$dst, (sra (opnode rGPR:$a,
+ (sext_inreg rGPR:$b, i16)), (i32 16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1896,10 +1904,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16,
+ def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
!strconcat(opc, "wt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (sra (opnode GPR:$a,
- (sra GPR:$b, (i32 16))), (i32 16)))]> {
+ [(set rGPR:$dst, (sra (opnode rGPR:$a,
+ (sra rGPR:$b, (i32 16))), (i32 16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1911,11 +1919,11 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
multiclass T2I_smla<string opc, PatFrag opnode> {
- def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "bb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc,
- (opnode (sext_inreg GPR:$a, i16),
- (sext_inreg GPR:$b, i16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc,
+ (opnode (sext_inreg rGPR:$a, i16),
+ (sext_inreg rGPR:$b, i16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1924,10 +1932,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16)))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (opnode (sext_inreg rGPR:$a, i16),
+ (sra rGPR:$b, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1936,10 +1944,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b01;
}
- def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "tb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
- (sext_inreg GPR:$b, i16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)),
+ (sext_inreg rGPR:$b, i16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1948,10 +1956,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b10;
}
- def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
- (sra GPR:$b, (i32 16)))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)),
+ (sra rGPR:$b, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1960,10 +1968,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b11;
}
- def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sext_inreg GPR:$b, i16)), (i32 16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a,
+ (sext_inreg rGPR:$b, i16)), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1972,10 +1980,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sra GPR:$b, (i32 16))), (i32 16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a,
+ (sra rGPR:$b, (i32 16))), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1989,61 +1997,61 @@ defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
-def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
// These are for disassembly only.
-def t2SMUAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
+def t2SMUAD: T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
+def t2SMUADX:T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
+def t2SMUSD: T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
+def t2SMUSDX:T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlad",
+def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlad",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smladx",
+def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smladx",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsd",
+def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsd",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsdx",
+def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsdx",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlald",
+def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlald",
"\t$ldst, $hdst, $a, $b", []>;
-def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaldx",
+def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaldx",
"\t$ldst, $hdst, $a, $b", []>;
-def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsld",
+def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsld",
"\t$ldst, $hdst, $a, $b", []>;
-def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsldx",
+def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsldx",
"\t$ldst, $hdst, $a, $b", []>;
//===----------------------------------------------------------------------===//
@@ -2061,35 +2069,35 @@ class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
let Inst{5-4} = op2;
}
-def t2CLZ : T2I_misc<0b11, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "clz", "\t$dst, $src", [(set GPR:$dst, (ctlz GPR:$src))]>;
+def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
+ "clz", "\t$dst, $src", [(set rGPR:$dst, (ctlz rGPR:$src))]>;
-def t2RBIT : T2I_misc<0b01, 0b10, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
"rbit", "\t$dst, $src",
- [(set GPR:$dst, (ARMrbit GPR:$src))]>;
+ [(set rGPR:$dst, (ARMrbit rGPR:$src))]>;
-def t2REV : T2I_misc<0b01, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "rev", ".w\t$dst, $src", [(set GPR:$dst, (bswap GPR:$src))]>;
+def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
+ "rev", ".w\t$dst, $src", [(set rGPR:$dst, (bswap rGPR:$src))]>;
-def t2REV16 : T2I_misc<0b01, 0b01, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
"rev16", ".w\t$dst, $src",
- [(set GPR:$dst,
- (or (and (srl GPR:$src, (i32 8)), 0xFF),
- (or (and (shl GPR:$src, (i32 8)), 0xFF00),
- (or (and (srl GPR:$src, (i32 8)), 0xFF0000),
- (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>;
+ [(set rGPR:$dst,
+ (or (and (srl rGPR:$src, (i32 8)), 0xFF),
+ (or (and (shl rGPR:$src, (i32 8)), 0xFF00),
+ (or (and (srl rGPR:$src, (i32 8)), 0xFF0000),
+ (and (shl rGPR:$src, (i32 8)), 0xFF000000)))))]>;
-def t2REVSH : T2I_misc<0b01, 0b11, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
"revsh", ".w\t$dst, $src",
- [(set GPR:$dst,
+ [(set rGPR:$dst,
(sext_inreg
- (or (srl (and GPR:$src, 0xFF00), (i32 8)),
- (shl GPR:$src, (i32 8))), i16))]>;
+ (or (srl (and rGPR:$src, 0xFF00), (i32 8)),
+ (shl rGPR:$src, (i32 8))), i16))]>;
-def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
- [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
- (and (shl GPR:$src2, (i32 imm:$shamt)),
+def t2PKHBT : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh",
+ [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF),
+ (and (shl rGPR:$src2, lsl_amt:$sh),
0xFFFF0000)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11101;
@@ -2100,18 +2108,20 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
}
// Alternate cases for PKHBT where identities eliminate some nodes.
-def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
- (t2PKHBT GPR:$src1, GPR:$src2, 0)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
Requires<[HasT2ExtractPack]>;
-def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
- (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>,
Requires<[HasT2ExtractPack]>;
-def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
- [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
- (and (sra GPR:$src2, imm16_31:$shamt),
- 0xFFFF)))]>,
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def t2PKHTB : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh",
+ [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF0000),
+ (and (sra rGPR:$src2, asr_amt:$sh),
+ 0xFFFF)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2122,18 +2132,17 @@ def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
-def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))),
- (t2PKHTB GPR:$src1, GPR:$src2, 16)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>,
Requires<[HasT2ExtractPack]>;
-def : T2Pat<(or (and GPR:$src1, 0xFFFF0000),
- (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
- (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
+ (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>,
Requires<[HasT2ExtractPack]>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
-
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
defm t2CMPz : T2I_cmp_irs<0b1101, "cmp",
@@ -2157,18 +2166,13 @@ defm t2TST : T2I_cmp_irs<0b0000, "tst",
defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>;
-// A8.6.27 CBNZ, CBZ - Compare and branch on (non)zero.
-// Short range conditional branch. Looks awesome for loops. Need to figure
-// out how to use this one.
-
-
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
-def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
+def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr,
"mov", ".w\t$dst, $true",
- [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
+ [/*(set rGPR:$dst, (ARMcmov rGPR:$false, rGPR:$true, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $dst"> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2179,9 +2183,9 @@ def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
let Inst{7-4} = 0b0000;
}
-def t2MOVCCi : T2I<(outs GPR:$dst), (ins GPR:$false, t2_so_imm:$true),
+def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true),
IIC_iCMOVi, "mov", ".w\t$dst, $true",
-[/*(set GPR:$dst, (ARMcmov GPR:$false, t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
+[/*(set rGPR:$dst,(ARMcmov rGPR:$false,t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $dst"> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -2201,20 +2205,20 @@ class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
let Inst{19-16} = 0b1111; // Rn
let Inst{5-4} = opcod; // Shift type.
}
-def t2MOVCClsl : T2I_movcc_sh<0b00, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "lsl", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
-def t2MOVCClsr : T2I_movcc_sh<0b01, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "lsr", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
-def t2MOVCCasr : T2I_movcc_sh<0b10, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "asr", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
-def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
} // neverHasSideEffects
@@ -2225,21 +2229,15 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst),
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
-def t2Int_MemBarrierV7 : AInoP<(outs), (ins),
- ThumbFrm, NoItinerary,
- "dmb", "",
- [(ARMMemBarrierV7)]>,
- Requires<[IsThumb2]> {
+def t2DMBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dmb", "",
+ [(ARMMemBarrier)]>, Requires<[IsThumb, HasDB]> {
let Inst{31-4} = 0xF3BF8F5;
// FIXME: add support for options other than a full system DMB
let Inst{3-0} = 0b1111;
}
-def t2Int_SyncBarrierV7 : AInoP<(outs), (ins),
- ThumbFrm, NoItinerary,
- "dsb", "",
- [(ARMSyncBarrierV7)]>,
- Requires<[IsThumb2]> {
+def t2DSBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dsb", "",
+ [(ARMSyncBarrier)]>, Requires<[IsThumb, HasDB]> {
let Inst{31-4} = 0xF3BF8F4;
// FIXME: add support for options other than a full system DSB
let Inst{3-0} = 0b1111;
@@ -2329,13 +2327,13 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
}
let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
Size4Bytes, NoItinerary, "ldrexb", "\t$dest, [$ptr]",
"", []>;
-def t2LDREXH : T2I_ldrex<0b01, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
Size4Bytes, NoItinerary, "ldrexh", "\t$dest, [$ptr]",
"", []>;
-def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+def t2LDREX : Thumb2I<(outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
Size4Bytes, NoItinerary,
"ldrex", "\t$dest, [$ptr]", "",
[]> {
@@ -2344,20 +2342,20 @@ def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
let Inst{11-8} = 0b1111;
let Inst{7-0} = 0b00000000; // imm8 = 0
}
-def t2LDREXD : T2I_ldrex<0b11, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr),
+def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$dest, rGPR:$dest2), (ins rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"ldrexd", "\t$dest, $dest2, [$ptr]", "",
[], {?, ?, ?, ?}>;
}
let mayStore = 1, Constraints = "@earlyclobber $success" in {
-def t2STREXB : T2I_strex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strexb", "\t$success, $src, [$ptr]", "", []>;
-def t2STREXH : T2I_strex<0b01, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strexh", "\t$success, $src, [$ptr]", "", []>;
-def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def t2STREX : Thumb2I<(outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strex", "\t$success, $src, [$ptr]", "",
[]> {
@@ -2365,8 +2363,8 @@ def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
let Inst{26-20} = 0b0000100;
let Inst{7-0} = 0b00000000; // imm8 = 0
}
-def t2STREXD : T2I_strex<0b11, (outs GPR:$success),
- (ins GPR:$src, GPR:$src2, GPR:$ptr),
+def t2STREXD : T2I_strex<0b11, (outs rGPR:$success),
+ (ins rGPR:$src, rGPR:$src2, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strexd", "\t$success, $src, $src2, [$ptr]", "", [],
{?, ?, ?, ?}>;
@@ -2416,7 +2414,7 @@ let Defs =
D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
D31 ], hasSideEffects = 1, isBarrier = 1 in {
- def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
+ def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
"mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
"adds\t$val, #7\n\t"
@@ -2425,14 +2423,14 @@ let Defs =
"b\t1f\n\t"
"movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
- [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, HasVFP2]>;
}
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
hasSideEffects = 1, isBarrier = 1 in {
- def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
+ def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
"mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
"adds\t$val, #7\n\t"
@@ -2441,7 +2439,7 @@ let Defs =
"b\t1f\n\t"
"movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
- [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, NoVFP]>;
}
@@ -2482,7 +2480,7 @@ let isNotDuplicable = 1, isIndirectBranch = 1 in {
def t2BR_JT :
T2JTI<(outs),
(ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target\n$jt",
+ IIC_Br, "mov\tpc, $target$jt",
[(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0100100;
@@ -2496,7 +2494,7 @@ def t2BR_JT :
def t2TBB :
T2JTI<(outs),
(ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "tbb\t$index\n$jt", []> {
+ IIC_Br, "tbb\t$index$jt", []> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0001101;
let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction)
@@ -2507,7 +2505,7 @@ def t2TBB :
def t2TBH :
T2JTI<(outs),
(ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "tbh\t$index\n$jt", []> {
+ IIC_Br, "tbh\t$index$jt", []> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0001101;
let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction)
@@ -2560,7 +2558,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
// Branch and Exchange Jazelle -- for disassembly only
// Rm = Inst{19-16}
-def t2BXJ : T2I<(outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -2647,25 +2645,25 @@ def t2SRSIA : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
}
// Return From Exception is a system instruction -- for disassembly only
-def t2RFEDBW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfedb", "\t$base!",
+def t2RFEDBW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfedb", "\t$base!",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0000011; // W = 1
}
-def t2RFEDB : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeab", "\t$base",
+def t2RFEDB : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeab", "\t$base",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0000001; // W = 0
}
-def t2RFEIAW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base!",
+def t2RFEIAW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base!",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0011011; // W = 1
}
-def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base",
+def t2RFEIA : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0011001; // W = 0
@@ -2676,26 +2674,26 @@ def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base",
//
// Two piece so_imms.
-def : T2Pat<(or GPR:$LHS, t2_so_imm2part:$RHS),
- (t2ORRri (t2ORRri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
+def : T2Pat<(or rGPR:$LHS, t2_so_imm2part:$RHS),
+ (t2ORRri (t2ORRri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
(t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(xor GPR:$LHS, t2_so_imm2part:$RHS),
- (t2EORri (t2EORri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
+def : T2Pat<(xor rGPR:$LHS, t2_so_imm2part:$RHS),
+ (t2EORri (t2EORri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
(t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(add GPR:$LHS, t2_so_imm2part:$RHS),
- (t2ADDri (t2ADDri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
+def : T2Pat<(add rGPR:$LHS, t2_so_imm2part:$RHS),
+ (t2ADDri (t2ADDri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
(t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(add GPR:$LHS, t2_so_neg_imm2part:$RHS),
- (t2SUBri (t2SUBri GPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)),
+def : T2Pat<(add rGPR:$LHS, t2_so_neg_imm2part:$RHS),
+ (t2SUBri (t2SUBri rGPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)),
(t2_so_neg_imm2part_2 imm:$RHS))>;
// 32-bit immediate using movw + movt.
// This is a single pseudo instruction to make it re-materializable. Remove
// when we can do generalized remat.
let isReMaterializable = 1 in
-def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
+def t2MOVi32imm : T2Ix2<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
"movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
- [(set GPR:$dst, (i32 imm:$src))]>;
+ [(set rGPR:$dst, (i32 imm:$src))]>;
// ConstantPool, GlobalAddress, and JumpTable
def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
@@ -2723,7 +2721,7 @@ def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
//
// Rd = Instr{11-8}
-def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
+def t2MRS : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -2734,7 +2732,7 @@ def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
}
// Rd = Instr{11-8}
-def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
+def t2MRSsys : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -2745,7 +2743,7 @@ def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
}
// Rn = Inst{19-16}
-def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr",
+def t2MSR : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr",
"\tcpsr$mask, $src",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
@@ -2757,7 +2755,7 @@ def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr",
}
// Rn = Inst{19-16}
-def t2MSRsys : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr",
+def t2MSRsys : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr",
"\tspsr$mask, $src",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 84c23e1a784c..c29e09606bd4 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -77,61 +77,61 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
//
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
+def VLDMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
variable_ops), IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
+ "vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
let Inst{20} = 1;
}
-def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
+def VLDMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
variable_ops), IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
+ "vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
let Inst{20} = 1;
}
-def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VLDMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
- "$addr.base = $wb", []> {
+ "vldm${addr:submode}${p}\t$addr!, $dsts",
+ "$addr.addr = $wb", []> {
let Inst{20} = 1;
}
-def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VLDMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
- "$addr.base = $wb", []> {
+ "vldm${addr:submode}${p}\t$addr!, $dsts",
+ "$addr.addr = $wb", []> {
let Inst{20} = 1;
}
} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
+def VSTMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
variable_ops), IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
+ "vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
let Inst{20} = 0;
}
-def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
+def VSTMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
variable_ops), IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
+ "vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
let Inst{20} = 0;
}
-def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VSTMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
- "$addr.base = $wb", []> {
+ "vstm${addr:submode}${p}\t$addr!, $srcs",
+ "$addr.addr = $wb", []> {
let Inst{20} = 0;
}
-def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VSTMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
- "$addr.base = $wb", []> {
+ "vstm${addr:submode}${p}\t$addr!, $srcs",
+ "$addr.addr = $wb", []> {
let Inst{20} = 0;
}
} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
@@ -420,34 +420,35 @@ def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
// For disassembly only.
-
+let Uses = [FPSCR] in {
def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$dst), (ins DPR:$a),
IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtr (f64 DPR:$a)))]> {
let Inst{7} = 0; // Z bit
}
def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$dst), (ins SPR:$a),
IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtr SPR:$a))]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$dst), (ins DPR:$a),
IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtru (f64 DPR:$a)))]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$dst), (ins SPR:$a),
IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtru SPR:$a))]> {
let Inst{7} = 0; // Z bit
}
+}
// Convert between floating-point and fixed-point
// Data type for fixed-point naming convention:
@@ -460,6 +461,7 @@ let Constraints = "$a = $dst" in {
// FP to Fixed-Point:
+let isCodeGenOnly = 1 in {
def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
@@ -499,9 +501,11 @@ def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]>;
+}
// Fixed-Point to FP:
+let isCodeGenOnly = 1 in {
def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
@@ -541,6 +545,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]>;
+}
} // End of 'let Constraints = "$src = $dst" in'
@@ -654,32 +659,27 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
}
// FPSCR <-> GPR (for disassembly only)
-
-let neverHasSideEffects = 1 in {
-let Uses = [FPSCR] in {
-def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
- "\t$dst, fpscr",
- [/* For disassembly only; pattern left blank */]> {
+let hasSideEffects = 1, Uses = [FPSCR] in
+def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT,
+ "vmrs", "\t$dst, fpscr",
+ [(set GPR:$dst, (int_arm_get_fpscr))]> {
let Inst{27-20} = 0b11101111;
let Inst{19-16} = 0b0001;
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
let Inst{4} = 1;
}
-}
-let Defs = [FPSCR] in {
-def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr",
- "\tfpscr, $src",
- [/* For disassembly only; pattern left blank */]> {
+let Defs = [FPSCR] in
+def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT,
+ "vmsr", "\tfpscr, $src",
+ [(int_arm_set_fpscr GPR:$src)]> {
let Inst{27-20} = 0b11101110;
let Inst{19-16} = 0b0001;
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
let Inst{4} = 1;
}
-}
-} // neverHasSideEffects
// Materialize FP immediates. VFP3 only.
let isReMaterializable = 1 in {
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index f80e316d23e8..2b7645a42119 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -57,7 +57,7 @@ STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
namespace {
struct ARMLoadStoreOpt : public MachineFunctionPass {
static char ID;
- ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -193,20 +193,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
- bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
- if (isAM4 && Offset == 4) {
- if (isThumb2)
- // Thumb2 does not support ldmib / stmib.
- return false;
+ // VFP and Thumb2 do not support IB or DA modes.
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ bool haveIBAndDA = isNotVFP && !isThumb2;
+ if (Offset == 4 && haveIBAndDA)
Mode = ARM_AM::ib;
- } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
- if (isThumb2)
- // Thumb2 does not support ldmda / stmda.
- return false;
+ else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
Mode = ARM_AM::da;
- } else if (isAM4 && Offset == -4 * (int)NumRegs) {
+ else if (Offset == -4 * (int)NumRegs && isNotVFP)
+ // VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db;
- } else if (Offset != 0) {
+ else if (Offset != 0) {
// If starting offset isn't zero, insert a MI to materialize a new base.
// But only do so if it is cost effective, i.e. merging more than two
// loads / stores.
@@ -246,18 +243,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
BaseKill = true; // New base is always killed right its use.
}
- bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
Opcode == ARM::VLDRD);
Opcode = getLoadStoreMultipleOpcode(Opcode);
- MachineInstrBuilder MIB = (isAM4)
- ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
- : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
- .addImm(Pred).addReg(PredReg);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg);
for (unsigned i = 0; i != NumRegs; ++i)
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
| getKillRegState(Regs[i].second));
@@ -333,6 +324,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
if (KilledRegs.count(Reg)) {
unsigned j = Killer[Reg];
memOps[j].MBBI->getOperand(0).setIsKill(false);
+ memOps[j].isKill = false;
}
}
MBB.erase(memOps[i].MBBI);
@@ -348,7 +340,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned Scratch, MemOpQueue &MemOps,
SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
- bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
int Offset = MemOps[SIndex].Offset;
int SOffset = Offset;
unsigned insertAfter = SIndex;
@@ -366,12 +358,12 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
unsigned Reg = MO.getReg();
unsigned RegNum = MO.isUndef() ? UINT_MAX
: ARMRegisterInfo::getRegisterNumbering(Reg);
- // AM4 - register numbers in ascending order.
- // AM5 - consecutive register numbers in ascending order.
- // Can only do up to 16 double-word registers per insn.
+ // Register numbers must be in ascending order. For VFP, the registers
+ // must also be consecutive and there is a limit of 16 double-word
+ // registers per instruction.
if (Reg != ARM::SP &&
NewOffset == Offset + (int)Size &&
- ((isAM4 && RegNum > PRegNum)
+ ((isNotVFP && RegNum > PRegNum)
|| ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
Offset += Size;
PRegNum = RegNum;
@@ -409,7 +401,7 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
return false;
// Make sure the offset fits in 8 bits.
- if (Bytes <= 0 || (Limit && Bytes >= Limit))
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
return false;
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
@@ -433,7 +425,7 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
MI->getOpcode() != ARM::ADDri)
return false;
- if (Bytes <= 0 || (Limit && Bytes >= Limit))
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
// Make sure the offset fits in 8 bits.
return false;
@@ -464,12 +456,12 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
case ARM::STM:
case ARM::t2LDM:
case ARM::t2STM:
- return (MI->getNumOperands() - 4) * 4;
case ARM::VLDMS:
case ARM::VSTMS:
+ return (MI->getNumOperands() - 4) * 4;
case ARM::VLDMD:
case ARM::VSTMD:
- return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
+ return (MI->getNumOperands() - 4) * 8;
}
}
@@ -512,26 +504,17 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
int Opcode = MI->getOpcode();
DebugLoc dl = MI->getDebugLoc();
- bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
- Opcode == ARM::STM || Opcode == ARM::t2STM);
bool DoMerge = false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
- unsigned Offset = 0;
- if (isAM4) {
- // Can't use an updating ld/st if the base register is also a dest
- // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
- for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
- if (MI->getOperand(i).getReg() == Base)
- return false;
- }
- Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
- } else {
- // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
- Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
- Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
+ // Can't use an updating ld/st if the base register is also a dest
+ // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+ for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).getReg() == Base)
+ return false;
}
+ Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
// Try merging with the previous instruction.
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
@@ -539,22 +522,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
--PrevMBBI;
- if (isAM4) {
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- Mode = ARM_AM::db;
- } else if (isAM4 && Mode == ARM_AM::ib &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- Mode = ARM_AM::da;
- }
- } else {
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::db;
- DoMerge = true;
- }
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::db;
+ DoMerge = true;
+ } else if (Mode == ARM_AM::ib &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::da;
+ DoMerge = true;
}
if (DoMerge)
MBB.erase(PrevMBBI);
@@ -566,19 +541,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
++NextMBBI;
- if (isAM4) {
- if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
- isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
- } else {
- if (Mode == ARM_AM::ia &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
+ if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
+ } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
}
if (DoMerge) {
if (NextMBBI == I) {
@@ -595,16 +563,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
- .addReg(Base, getKillRegState(BaseKill));
- if (isAM4) {
- // [t2]LDM_UPD, [t2]STM_UPD
- MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
- .addImm(Pred).addReg(PredReg);
- } else {
- // VLDM[SD}_UPD, VSTM[SD]_UPD
- MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
- .addImm(Pred).addReg(PredReg);
- }
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM4ModeImm(Mode))
+ .addImm(Pred).addReg(PredReg);
// Transfer the rest of operands.
for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
MIB.addOperand(MI->getOperand(OpNum));
@@ -736,11 +697,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (!DoMerge)
return false;
- bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
unsigned Offset = 0;
if (isAM5)
- Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
- (isDPR ? 2 : 1));
+ Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ?
+ ARM_AM::db : ARM_AM::ia);
else if (isAM2)
Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
else
@@ -748,6 +708,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (isAM5) {
// VLDM[SD}_UPD, VSTM[SD]_UPD
+ // (There are no base-updating versions of VLDR/VSTR instructions, but the
+ // updating load/store-multiple instructions can be used with only one
+ // register.)
MachineOperand &MO = MI->getOperand(0);
BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
@@ -1268,7 +1231,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
namespace {
struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
static char ID;
- ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
const TargetData *TD;
const TargetInstrInfo *TII;
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index ab2b06b60783..ab2b06b60783 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/ARMMCInstLower.h
index b81a30690ce2..b81a30690ce2 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h
+++ b/lib/Target/ARM/ARMMCInstLower.h
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 7e57a1ca5576..514c26b4daf0 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -43,6 +43,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// processFunctionBeforeCalleeSavedScan().
bool HasStackFrame;
+ /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
+ /// emitPrologue.
+ bool RestoreSPFromFP;
+
/// LRSpilledForFarJump - True if the LR register has been for spilled to
/// enable far jump.
bool LRSpilledForFarJump;
@@ -95,7 +99,7 @@ public:
ARMFunctionInfo() :
isThumb(false),
hasThumb2(false),
- VarArgsRegSaveSize(0), HasStackFrame(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -106,7 +110,7 @@ public:
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
- VarArgsRegSaveSize(0), HasStackFrame(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -125,6 +129,9 @@ public:
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
+ bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
+ void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+
bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index d020f3c74bde..305b232e6a99 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===//
+//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -220,41 +220,11 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- // FP is R11, R9 is available.
- static const unsigned ARM_GPR_AO_1[] = {
+ static const unsigned ARM_GPR_AO[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R12,ARM::LR,
ARM::R4, ARM::R5, ARM::R6, ARM::R7,
- ARM::R8, ARM::R9, ARM::R10,
- ARM::R11 };
- // FP is R11, R9 is not available.
- static const unsigned ARM_GPR_AO_2[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6, ARM::R7,
- ARM::R8, ARM::R10,
- ARM::R11 };
- // FP is R7, R9 is available as non-callee-saved register.
- // This is used by Darwin.
- static const unsigned ARM_GPR_AO_3[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R9, ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6,
- ARM::R8, ARM::R10,ARM::R11,ARM::R7 };
- // FP is R7, R9 is not available.
- static const unsigned ARM_GPR_AO_4[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6,
- ARM::R8, ARM::R10,ARM::R11,
- ARM::R7 };
- // FP is R7, R9 is available as callee-saved register.
- // This is used by non-Darwin platform in Thumb mode.
- static const unsigned ARM_GPR_AO_5[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6,
- ARM::R8, ARM::R9, ARM::R10,ARM::R11,ARM::R7 };
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11 };
// For Thumb1 mode, we don't want to allocate hi regs at all, as we
// don't know how to spill them. If we make our prologue/epilogue code
@@ -270,85 +240,71 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
if (Subtarget.isThumb1Only())
return THUMB_GPR_AO;
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- return ARM_GPR_AO_4;
- else
- return ARM_GPR_AO_3;
- } else {
- if (Subtarget.isR9Reserved())
- return ARM_GPR_AO_2;
- else if (Subtarget.isThumb())
- return ARM_GPR_AO_5;
- else
- return ARM_GPR_AO_1;
- }
+ return ARM_GPR_AO;
}
GPRClass::iterator
GPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- GPRClass::iterator I;
-
- if (Subtarget.isThumb1Only()) {
- I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
- }
-
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
- else
- I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned));
- } else {
- if (Subtarget.isR9Reserved())
- I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned));
- else if (Subtarget.isThumb())
- I = ARM_GPR_AO_5 + (sizeof(ARM_GPR_AO_5)/sizeof(unsigned));
- else
- I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
- }
-
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
+ return ARM_GPR_AO + (sizeof(ARM_GPR_AO)/sizeof(unsigned));
}
}];
}
-// Thumb registers are R0-R7 normally. Some instructions can still use
-// the general GPR register class above (MOV, e.g.)
-def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
+// restricted GPR register class. Many Thumb2 instructions allow the full
+// register range for operands, but have undefined behaviours when PC
+// or SP (R13 or R15) are used. The ARM ARM refers to these operands
+// via the BadReg() pseudo-code description.
+def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
+ R7, R8, R9, R10, R11, R12, LR]> {
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- static const unsigned THUMB_tGPR_AO[] = {
+ static const unsigned ARM_rGPR_AO[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10,
+ ARM::R11 };
+
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we
+ // don't know how to spill them. If we make our prologue/epilogue code
+ // smarter at some point, we can go back to using the above allocation
+ // orders for the Thumb1 instructions that know how to use hi regs.
+ static const unsigned THUMB_rGPR_AO[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
- // FP is R7, only low registers available.
- tGPRClass::iterator
- tGPRClass::allocation_order_begin(const MachineFunction &MF) const {
- return THUMB_tGPR_AO;
+ rGPRClass::iterator
+ rGPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isThumb1Only())
+ return THUMB_rGPR_AO;
+ return ARM_rGPR_AO;
}
- tGPRClass::iterator
- tGPRClass::allocation_order_end(const MachineFunction &MF) const {
+ rGPRClass::iterator
+ rGPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- tGPRClass::iterator I =
- THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned));
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+
+ if (Subtarget.isThumb1Only())
+ return THUMB_rGPR_AO + (sizeof(THUMB_rGPR_AO)/sizeof(unsigned));
+ return ARM_rGPR_AO + (sizeof(ARM_rGPR_AO)/sizeof(unsigned));
}
}];
}
+// Thumb registers are R0-R7 normally. Some instructions can still use
+// the general GPR register class above (MOV, e.g.)
+def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {}
+
// For tail calls, we can't use callee-saved registers, as they are restored
// to the saved value before the tail call, which would clobber a call address.
// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
@@ -381,36 +337,20 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> {
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
if (Subtarget.isThumb1Only())
return THUMB_GPR_AO_TC;
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- return ARM_GPR_NOR9_TC;
- else
- return ARM_GPR_R9_TC;
- } else
- // R9 is either callee-saved or reserved; can't use it.
- return ARM_GPR_NOR9_TC;
+ return Subtarget.isTargetDarwin() ? ARM_GPR_R9_TC : ARM_GPR_NOR9_TC;
}
tcGPRClass::iterator
tcGPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- GPRClass::iterator I;
-
- if (Subtarget.isThumb1Only()) {
- I = THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
- return I;
- }
-
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
- else
- I = ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned));
- } else
- // R9 is either callee-saved or reserved; can't use it.
- I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
- return I;
+
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
+
+ return Subtarget.isTargetDarwin() ?
+ ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)) :
+ ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
}
}];
}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 10fd257055fb..cb539f4c01ec 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -33,14 +33,19 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
, ARMFPUType(None)
, UseNEONForSinglePrecisionFP(false)
, SlowVMLx(false)
+ , SlowFPBrcc(false)
, IsThumb(isT)
, ThumbMode(Thumb1)
+ , NoARM(false)
, PostRAScheduler(false)
, IsR9Reserved(ReserveR9)
, UseMovt(UseMOVT)
, HasFP16(false)
, HasHardwareDivide(false)
, HasT2ExtractPack(false)
+ , HasDataBarrier(false)
+ , Pref32BitThumb(false)
+ , FPOnlySP(false)
, stackAlignment(4)
, CPUString("generic")
, TargetType(isELF) // Default to ELF unless otherwise specified.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index e7d92ede9b98..67e58038ee77 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -26,7 +26,7 @@ class GlobalValue;
class ARMSubtarget : public TargetSubtarget {
protected:
enum ARMArchEnum {
- V4, V4T, V5T, V5TE, V6, V6T2, V7A, V7M
+ V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M
};
enum ARMFPEnum {
@@ -63,6 +63,9 @@ protected:
/// ThumbMode - Indicates supported Thumb version.
ThumbTypeEnum ThumbMode;
+ /// NoARM - True if subtarget does not support ARM mode execution.
+ bool NoARM;
+
/// PostRAScheduler - True if using post-register-allocation scheduler.
bool PostRAScheduler;
@@ -84,6 +87,18 @@ protected:
/// instructions.
bool HasT2ExtractPack;
+ /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
+ /// instructions.
+ bool HasDataBarrier;
+
+ /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
+ /// over 16-bit ones.
+ bool Pref32BitThumb;
+
+ /// FPOnlySP - If true, the floating point unit only supports single
+ /// precision.
+ bool FPOnlySP;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -128,6 +143,8 @@ protected:
bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
bool hasV7Ops() const { return ARMArchVersion >= V7A; }
+ bool hasARMOps() const { return !NoARM; }
+
bool hasVFP2() const { return ARMFPUType >= VFPv2; }
bool hasVFP3() const { return ARMFPUType >= VFPv3; }
bool hasNEON() const { return ARMFPUType >= NEON; }
@@ -135,8 +152,11 @@ protected:
return hasNEON() && UseNEONForSinglePrecisionFP; }
bool hasDivide() const { return HasHardwareDivide; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
+ bool hasDataBarrier() const { return HasDataBarrier; }
bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
+ bool isFPOnlySP() const { return FPOnlySP; }
+ bool prefers32BitThumb() const { return Pref32BitThumb; }
bool hasFP16() const { return HasFP16; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 09203f9304df..30ff8276cdaa 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -31,7 +31,6 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
}
}
-
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
@@ -66,6 +65,9 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
"v128:64:128-v64:64:64-n32")),
TLInfo(*this),
TSInfo(*this) {
+ if (!Subtarget.hasARMOps())
+ report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
+ "support ARM mode execution!");
}
ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
@@ -85,9 +87,15 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
TSInfo(*this) {
}
+// Pass Pipeline Configuration
+bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createARMGlobalMergePass(getTargetLowering()));
+ return false;
+}
-// Pass Pipeline Configuration
bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
PM.add(createARMISelDag(*this, OptLevel));
@@ -132,7 +140,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- if (Subtarget.isThumb2())
+ if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
PM.add(createThumb2SizeReductionPass());
PM.add(createARMConstantIslandPass());
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index a222e57b13ff..17e5425a9d37 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -50,6 +50,7 @@ public:
}
// Pass Pipeline Configuration
+ virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 4b083244b241..75e2a739bf1f 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMSubtarget.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -18,8 +19,10 @@
#include "llvm/Target/TargetAsmParser.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
using namespace llvm;
@@ -37,6 +40,7 @@ enum ShiftType {
class ARMAsmParser : public TargetAsmParser {
MCAsmParser &Parser;
+ TargetMachine &TM;
private:
MCAsmParser &getParser() const { return Parser; }
@@ -76,26 +80,33 @@ private:
bool ParseDirectiveSyntax(SMLoc L);
- // TODO - For now hacked versions of the next two are in here in this file to
- // allow some parser testing until the table gen versions are implemented.
+ bool MatchInstruction(SMLoc IDLoc,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCInst &Inst) {
+ if (!MatchInstructionImpl(Operands, Inst))
+ return false;
+
+ // FIXME: We should give nicer diagnostics about the exact failure.
+ Error(IDLoc, "unrecognized instruction");
+
+ return true;
+ }
/// @name Auto-generated Match Functions
/// {
- bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCInst &Inst);
- /// MatchRegisterName - Match the given string to a register name and return
- /// its register number, or -1 if there is no match. To allow return values
- /// to be used directly in register lists, arm registers have values between
- /// 0 and 15.
- int MatchRegisterName(StringRef Name);
+ unsigned ComputeAvailableFeatures(const ARMSubtarget *Subtarget) const;
+
+ bool MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*>
+ &Operands,
+ MCInst &Inst);
/// }
public:
- ARMAsmParser(const Target &T, MCAsmParser &_Parser)
- : TargetAsmParser(T), Parser(_Parser) {}
+ ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
+ : TargetAsmParser(T), Parser(_Parser), TM(_TM) {}
virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -110,16 +121,21 @@ private:
ARMOperand() {}
public:
enum KindTy {
- Token,
- Register,
+ CondCode,
Immediate,
- Memory
+ Memory,
+ Register,
+ Token
} Kind;
SMLoc StartLoc, EndLoc;
union {
struct {
+ ARMCC::CondCodes Val;
+ } CC;
+
+ struct {
const char *Data;
unsigned Length;
} Tok;
@@ -151,16 +167,19 @@ public:
};
- ARMOperand(KindTy K, SMLoc S, SMLoc E)
- : Kind(K), StartLoc(S), EndLoc(E) {}
+ //ARMOperand(KindTy K, SMLoc S, SMLoc E)
+ // : Kind(K), StartLoc(S), EndLoc(E) {}
ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
Kind = o.Kind;
StartLoc = o.StartLoc;
EndLoc = o.EndLoc;
switch (Kind) {
+ case CondCode:
+ CC = o.CC;
+ break;
case Token:
- Tok = o.Tok;
+ Tok = o.Tok;
break;
case Register:
Reg = o.Reg;
@@ -179,6 +198,11 @@ public:
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
+ ARMCC::CondCodes getCondCode() const {
+ assert(Kind == CondCode && "Invalid access!");
+ return CC.Val;
+ }
+
StringRef getToken() const {
assert(Kind == Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
@@ -194,15 +218,50 @@ public:
return Imm.Val;
}
- bool isToken() const {return Kind == Token; }
+ bool isCondCode() const { return Kind == CondCode; }
+
+ bool isImm() const { return Kind == Immediate; }
bool isReg() const { return Kind == Register; }
+ bool isToken() const {return Kind == Token; }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+ // FIXME: What belongs here?
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
void addRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getReg()));
}
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ virtual void dump(raw_ostream &OS) const;
+
+ static void CreateCondCode(OwningPtr<ARMOperand> &Op, ARMCC::CondCodes CC,
+ SMLoc S) {
+ Op.reset(new ARMOperand);
+ Op->Kind = CondCode;
+ Op->CC.Val = CC;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ }
+
static void CreateToken(OwningPtr<ARMOperand> &Op, StringRef Str,
SMLoc S) {
Op.reset(new ARMOperand);
@@ -262,6 +321,33 @@ public:
} // end anonymous namespace.
+void ARMOperand::dump(raw_ostream &OS) const {
+ switch (Kind) {
+ case CondCode:
+ OS << ARMCondCodeToString(getCondCode());
+ break;
+ case Immediate:
+ getImm()->print(OS);
+ break;
+ case Memory:
+ OS << "<memory>";
+ break;
+ case Register:
+ OS << "<register " << getReg() << ">";
+ break;
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
/// Try to parse a register name. The token must be an Identifier when called,
/// and if it is a register name a Reg operand is created, the token is eaten
/// and false is returned. Else true is returned and no token is eaten.
@@ -548,77 +634,6 @@ bool ARMAsmParser::ParseShift(ShiftType &St,
return false;
}
-/// A hack to allow some testing, to be replaced by a real table gen version.
-int ARMAsmParser::MatchRegisterName(StringRef Name) {
- if (Name == "r0" || Name == "R0")
- return 0;
- else if (Name == "r1" || Name == "R1")
- return 1;
- else if (Name == "r2" || Name == "R2")
- return 2;
- else if (Name == "r3" || Name == "R3")
- return 3;
- else if (Name == "r3" || Name == "R3")
- return 3;
- else if (Name == "r4" || Name == "R4")
- return 4;
- else if (Name == "r5" || Name == "R5")
- return 5;
- else if (Name == "r6" || Name == "R6")
- return 6;
- else if (Name == "r7" || Name == "R7")
- return 7;
- else if (Name == "r8" || Name == "R8")
- return 8;
- else if (Name == "r9" || Name == "R9")
- return 9;
- else if (Name == "r10" || Name == "R10")
- return 10;
- else if (Name == "r11" || Name == "R11" || Name == "fp")
- return 11;
- else if (Name == "r12" || Name == "R12" || Name == "ip")
- return 12;
- else if (Name == "r13" || Name == "R13" || Name == "sp")
- return 13;
- else if (Name == "r14" || Name == "R14" || Name == "lr")
- return 14;
- else if (Name == "r15" || Name == "R15" || Name == "pc")
- return 15;
- return -1;
-}
-
-/// A hack to allow some testing, to be replaced by a real table gen version.
-bool ARMAsmParser::
-MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCInst &Inst) {
- ARMOperand &Op0 = *(ARMOperand*)Operands[0];
- assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
- StringRef Mnemonic = Op0.getToken();
- if (Mnemonic == "add" ||
- Mnemonic == "stmfd" ||
- Mnemonic == "str" ||
- Mnemonic == "ldmfd" ||
- Mnemonic == "ldr" ||
- Mnemonic == "mov" ||
- Mnemonic == "sub" ||
- Mnemonic == "bl" ||
- Mnemonic == "push" ||
- Mnemonic == "blx" ||
- Mnemonic == "pop") {
- // Hard-coded to a valid instruction, till we have a real matcher.
- Inst = MCInst();
- Inst.setOpcode(ARM::MOVr);
- Inst.addOperand(MCOperand::CreateReg(2));
- Inst.addOperand(MCOperand::CreateReg(2));
- Inst.addOperand(MCOperand::CreateImm(0));
- Inst.addOperand(MCOperand::CreateImm(0));
- Inst.addOperand(MCOperand::CreateReg(0));
- return false;
- }
-
- return true;
-}
-
/// Parse a arm instruction operand. For now this parses the operand regardless
/// of the mnemonic.
bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
@@ -661,12 +676,56 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
OwningPtr<ARMOperand> Op;
- ARMOperand::CreateToken(Op, Name, NameLoc);
-
+
+ // Create the leading tokens for the mnemonic, split by '.' characters.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Head = Name.slice(Start, Next);
+
+ // Determine the predicate, if any.
+ //
+ // FIXME: We need a way to check whether a prefix supports predication,
+ // otherwise we will end up with an ambiguity for instructions that happen to
+ // end with a predicate name.
+ unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2))
+ .Case("eq", ARMCC::EQ)
+ .Case("ne", ARMCC::NE)
+ .Case("hs", ARMCC::HS)
+ .Case("lo", ARMCC::LO)
+ .Case("mi", ARMCC::MI)
+ .Case("pl", ARMCC::PL)
+ .Case("vs", ARMCC::VS)
+ .Case("vc", ARMCC::VC)
+ .Case("hi", ARMCC::HI)
+ .Case("ls", ARMCC::LS)
+ .Case("ge", ARMCC::GE)
+ .Case("lt", ARMCC::LT)
+ .Case("gt", ARMCC::GT)
+ .Case("le", ARMCC::LE)
+ .Case("al", ARMCC::AL)
+ .Default(~0U);
+ if (CC != ~0U) {
+ Head = Head.slice(0, Head.size() - 2);
+ } else
+ CC = ARMCC::AL;
+
+ ARMOperand::CreateToken(Op, Head, NameLoc);
Operands.push_back(Op.take());
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ ARMOperand::CreateCondCode(Op, ARMCC::CondCodes(CC), NameLoc);
+ Operands.push_back(Op.take());
+
+ // Add the remaining tokens in the mnemonic.
+ while (Next != StringRef::npos) {
+ Start = Next;
+ Next = Name.find('.', Start + 1);
+ Head = Name.slice(Start, Next);
+ ARMOperand::CreateToken(Op, Head, NameLoc);
+ Operands.push_back(Op.take());
+ }
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
OwningPtr<ARMOperand> Op;
if (ParseOperand(Op)) return true;
@@ -809,3 +868,5 @@ extern "C" void LLVMInitializeARMAsmParser() {
RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
LLVMInitializeARMAsmLexer();
}
+
+#include "ARMGenAsmMatcher.inc"
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index edc934549b28..8026e7718ca9 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -158,7 +158,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) &&
MI->getOperand(0).getReg() == ARM::SP) {
const MCOperand &MO1 = MI->getOperand(2);
- if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::db) {
+ if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) {
O << '\t' << "vpush";
printPredicateOperand(MI, 3, O);
O << '\t';
@@ -171,7 +171,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) &&
MI->getOperand(0).getReg() == ARM::SP) {
const MCOperand &MO1 = MI->getOperand(2);
- if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::ia) {
+ if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) {
O << '\t' << "vpop";
printPredicateOperand(MI, 3, O);
O << '\t';
@@ -278,15 +278,13 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
O << getRegisterName(MO1.getReg());
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()))
- << ' ';
-
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
if (MO2.getReg()) {
- O << getRegisterName(MO2.getReg());
+ O << ' ' << getRegisterName(MO2.getReg());
assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
- } else {
- O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ } else if (ShOpc != ARM_AM::rrx) {
+ O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
}
}
@@ -414,16 +412,6 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
return;
}
- if (Modifier && strcmp(Modifier, "submode") == 0) {
- ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
- O << ARM_AM::getAMSubModeStr(Mode);
- return;
- } else if (Modifier && strcmp(Modifier, "base") == 0) {
- // Used for FSTM{D|S} and LSTM{D|S} operations.
- O << getRegisterName(MO1.getReg());
- return;
- }
-
O << "[" << getRegisterName(MO1.getReg());
if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
@@ -463,9 +451,9 @@ void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum,
assert(0 && "FIXME: Implement printAddrModePCOperand");
}
-void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
uint32_t v = ~MO.getImm();
int32_t lsb = CountTrailingZeros_32(v);
@@ -474,6 +462,31 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI,
O << '#' << lsb << ", #" << width;
}
+void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ switch (Opc) {
+ case ARM_AM::no_shift:
+ return;
+ case ARM_AM::lsl:
+ O << ", lsl #";
+ break;
+ case ARM_AM::asr:
+ O << ", asr #";
+ break;
+ default:
+ assert(0 && "unexpected shift opcode for shift immediate operand");
+ }
+ O << ARM_AM::getSORegOffset(ShiftOp);
+}
+
void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
O << "{";
@@ -669,12 +682,11 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
O << getRegisterName(Reg);
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
- << " ";
-
assert(MO2.isImm() && "Not a valid t2_so_reg value!");
- O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc != ARM_AM::rrx)
+ O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
}
void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI,
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index ddf5047793d2..e5ad0d07e9ba 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -57,6 +57,8 @@ public:
void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
+ void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
index 4e299f86ecb6..18645c0864a3 100644
--- a/lib/Target/ARM/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
@@ -1,8 +1,6 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMARMAsmPrinter
- ARMAsmPrinter.cpp
ARMInstPrinter.cpp
- ARMMCInstLower.cpp
)
add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 0df34666b959..6b4dee5965d2 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -7,25 +7,32 @@ tablegen(ARMGenInstrNames.inc -gen-instr-enums)
tablegen(ARMGenInstrInfo.inc -gen-instr-desc)
tablegen(ARMGenCodeEmitter.inc -gen-emitter)
tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
+tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
tablegen(ARMGenDAGISel.inc -gen-dag-isel)
+tablegen(ARMGenFastISel.inc -gen-fast-isel)
tablegen(ARMGenCallingConv.inc -gen-callingconv)
tablegen(ARMGenSubtarget.inc -gen-subtarget)
tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
add_llvm_target(ARMCodeGen
+ ARMAsmPrinter.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
ARMCodeEmitter.cpp
ARMConstantIslandPass.cpp
ARMConstantPoolValue.cpp
ARMExpandPseudoInsts.cpp
+ ARMFastISel.cpp
+ ARMGlobalMerge.cpp
ARMISelDAGToDAG.cpp
ARMISelLowering.cpp
ARMInstrInfo.cpp
ARMJITInfo.cpp
ARMLoadStoreOptimizer.cpp
ARMMCAsmInfo.cpp
+ ARMMCInstLower.cpp
ARMRegisterInfo.cpp
+ ARMSelectionDAGInfo.cpp
ARMSubtarget.cpp
ARMTargetMachine.cpp
ARMTargetObjectFile.cpp
@@ -38,7 +45,6 @@ add_llvm_target(ARMCodeGen
Thumb2InstrInfo.cpp
Thumb2RegisterInfo.cpp
Thumb2SizeReduction.cpp
- ARMSelectionDAGInfo.cpp
)
-target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG)
+target_link_libraries (LLVMARMCodeGen LLVMARMAsmPrinter LLVMSelectionDAG)
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4de697e8bf67..e22028985b46 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -26,6 +26,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+//#define DEBUG(X) do { X; } while (0)
+
/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from
/// ARMDecoderEmitter.cpp TableGen backend. It contains:
///
@@ -87,6 +89,11 @@ static unsigned decodeARMInstruction(uint32_t &insn) {
return ARM::BFI;
}
+ // Ditto for STRBT, which is a super-instruction for A8.6.199 Encoding A1 & A2.
+ // As a result, the decoder fails to deocode USAT properly.
+ if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
+ return ARM::USAT;
+
// Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
// As a result, the decoder fails to decode UMULL properly.
if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
@@ -106,7 +113,7 @@ static unsigned decodeARMInstruction(uint32_t &insn) {
// Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
// As a result, the decoder fails to deocode SSAT properly.
if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
- return slice(insn, 6, 6) == 0 ? ARM::SSATlsl : ARM::SSATasr;
+ return ARM::SSAT;
// Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
// As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
@@ -291,7 +298,7 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
/// decodeInstruction(insn) is invoked on the original insn.
///
/// Otherwise, decodeThumbInstruction is called with the original insn.
-static unsigned decodeThumbSideEffect(bool IsThumb2, uint32_t &insn) {
+static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
if (IsThumb2) {
uint16_t op1 = slice(insn, 28, 27);
uint16_t op2 = slice(insn, 26, 20);
@@ -429,7 +436,7 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
// passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top
// halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to
// the top half followed by the second halfword.
- uint32_t insn = 0;
+ unsigned insn = 0;
// Possible second halfword.
uint16_t insn1 = 0;
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index a07ff2832aa7..9f493b9aee02 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -20,6 +20,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+//#define DEBUG(X) do { X; } while (0)
+
/// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
/// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s
/// describing the operand info for each ARMInsts[i].
@@ -93,6 +95,9 @@ static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister,
RegClassID = ARM::DPRRegClassID;
}
+ // For this purpose, we can treat rGPR as if it were GPR.
+ if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID;
+
// See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm().
unsigned RegNum =
RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister;
@@ -451,12 +456,23 @@ static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) {
//
// A8-11: DecodeImmShift()
static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) {
- // If type == 0b11 and imm5 == 0, we have an rrx, instead.
- if (ShOp == ARM_AM::ror && ShImm == 0)
- ShOp = ARM_AM::rrx;
- // If (lsr or asr) and imm5 == 0, shift amount is 32.
- if ((ShOp == ARM_AM::lsr || ShOp == ARM_AM::asr) && ShImm == 0)
+ if (ShImm != 0)
+ return;
+ switch (ShOp) {
+ case ARM_AM::no_shift:
+ case ARM_AM::rrx:
+ break;
+ case ARM_AM::lsl:
+ ShOp = ARM_AM::no_shift;
+ break;
+ case ARM_AM::lsr:
+ case ARM_AM::asr:
ShImm = 32;
+ break;
+ case ARM_AM::ror:
+ ShOp = ARM_AM::rrx;
+ break;
+ }
}
// getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding
@@ -490,9 +506,6 @@ static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) {
static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO) {
- if (Opcode == ARM::Int_MemBarrierV7 || Opcode == ARM::Int_SyncBarrierV7)
- return true;
-
assert(0 && "Unexpected pseudo instruction!");
return false;
}
@@ -887,7 +900,6 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
- assert(0 && "Unexpected BrMiscFrm Opcode");
return false;
}
@@ -906,34 +918,6 @@ static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) {
return true;
}
-static inline bool SaturateOpcode(unsigned Opcode) {
- switch (Opcode) {
- case ARM::SSATlsl: case ARM::SSATasr: case ARM::SSAT16:
- case ARM::USATlsl: case ARM::USATasr: case ARM::USAT16:
- return true;
- default:
- return false;
- }
-}
-
-static inline unsigned decodeSaturatePos(unsigned Opcode, uint32_t insn) {
- switch (Opcode) {
- case ARM::SSATlsl:
- case ARM::SSATasr:
- return slice(insn, 20, 16) + 1;
- case ARM::SSAT16:
- return slice(insn, 19, 16) + 1;
- case ARM::USATlsl:
- case ARM::USATasr:
- return slice(insn, 20, 16);
- case ARM::USAT16:
- return slice(insn, 19, 16);
- default:
- assert(0 && "Invalid opcode passed in");
- return 0;
- }
-}
-
// A major complication is the fact that some of the saturating add/subtract
// operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
// They are QADD, QDADD, QDSUB, and QSUB.
@@ -959,40 +943,14 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
if (OpIdx >= NumOps)
return false;
- // SSAT/SSAT16/USAT/USAT16 has imm operand after Rd.
- if (SaturateOpcode(Opcode)) {
- MI.addOperand(MCOperand::CreateImm(decodeSaturatePos(Opcode, insn)));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
-
- if (Opcode == ARM::SSAT16 || Opcode == ARM::USAT16) {
- OpIdx += 2;
- return true;
- }
-
- // For SSAT operand reg (Rm) has been disassembled above.
- // Now disassemble the shift amount.
-
- // Inst{11-7} encodes the imm5 shift amount.
- unsigned ShAmt = slice(insn, 11, 7);
-
- // A8.6.183. Possible ASR shift amount of 32...
- if (Opcode == ARM::SSATasr && ShAmt == 0)
- ShAmt = 32;
-
- MI.addOperand(MCOperand::CreateImm(ShAmt));
-
- OpIdx += 3;
- return true;
- }
-
// Special-case handling of BFC/BFI/SBFX/UBFX.
if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
- // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI.
- MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0
- : getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(0));
+ if (Opcode == ARM::BFI) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
+ ++OpIdx;
+ }
uint32_t mask = 0;
if (!getBFCInvMask(insn, mask))
return false;
@@ -1498,13 +1456,55 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Extract the 5-bit immediate field Inst{11-7}.
unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
- MI.addOperand(MCOperand::CreateImm(ShiftAmt));
+ ARM_AM::ShiftOpc Opc = ARM_AM::no_shift;
+ if (Opcode == ARM::PKHBT)
+ Opc = ARM_AM::lsl;
+ else if (Opcode == ARM::PKHBT)
+ Opc = ARM_AM::asr;
+ getImmShiftSE(Opc, ShiftAmt);
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt)));
++OpIdx;
}
return true;
}
+/// DisassembleSatFrm - Disassemble saturate instructions:
+/// SSAT, SSAT16, USAT, and USAT16.
+static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+
+ // Disassemble register def.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ unsigned Pos = slice(insn, 20, 16);
+ if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16)
+ Pos += 1;
+ MI.addOperand(MCOperand::CreateImm(Pos));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+
+ if (NumOpsAdded == 4) {
+ ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl);
+ // Inst{11-7} encodes the imm5 shift amount.
+ unsigned ShAmt = slice(insn, 11, 7);
+ if (ShAmt == 0) {
+ // A8.6.183. Possible ASR shift amount of 32...
+ if (Opc == ARM_AM::asr)
+ ShAmt = 32;
+ else
+ Opc = ARM_AM::no_shift;
+ }
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
+ }
+ return true;
+}
+
// Extend instructions.
// SXT* and UXT*: Rd [Rn] Rm [rot_imm].
// The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the
@@ -1863,7 +1863,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
- bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS) ? true : false;
+ bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS);
unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
// Extract Dd/Sd for operand 0.
@@ -1886,7 +1886,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// VFP Load/Store Multiple Instructions.
// This is similar to the algorithm for LDM/STM in that operand 0 (the base) and
-// operand 1 (the AM5 mode imm) is followed by two predicate operands. It is
+// operand 1 (the AM4 mode imm) is followed by two predicate operands. It is
// followed by a reglist of either DPR(s) or SPR(s).
//
// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
@@ -1910,16 +1910,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateReg(Base));
- // Next comes the AM5 Opcode.
+ // Next comes the AM4 Opcode.
ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
// Must be either "ia" or "db" submode.
if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) {
- DEBUG(errs() << "Illegal addressing mode 5 sub-mode!\n");
+ DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n");
return false;
}
-
- unsigned char Imm8 = insn & 0xFF;
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8)));
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
// Handling the two predicate operands before the reglist.
int64_t CondVal = insn >> ARMII::CondShift;
@@ -1929,13 +1927,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx += 4;
bool isSPVFP = (Opcode == ARM::VLDMS || Opcode == ARM::VLDMS_UPD ||
- Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD) ? true : false;
+ Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD);
unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
// Extract Dd/Sd.
unsigned RegD = decodeVFPRd(insn, isSPVFP);
// Fill the variadic part of reglist.
+ unsigned char Imm8 = insn & 0xFF;
unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
for (unsigned i = 0; i < Regs; ++i) {
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
@@ -2244,9 +2243,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
// We have homogeneous NEON registers for Load/Store.
unsigned RegClass = 0;
+ bool DRegPair = UseDRegPair(Opcode);
// Double-spaced registers have increments of 2.
- unsigned Inc = DblSpaced ? 2 : 1;
+ unsigned Inc = (DblSpaced || DRegPair) ? 2 : 1;
unsigned Rn = decodeRn(insn);
unsigned Rm = decodeRm(insn);
@@ -2292,8 +2292,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
RegClass = OpInfo[OpIdx].RegClass;
while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, Rd,
- UseDRegPair(Opcode))));
+ getRegisterEnum(B, RegClass, Rd, DRegPair)));
Rd += Inc;
++OpIdx;
}
@@ -2312,8 +2311,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, Rd,
- UseDRegPair(Opcode))));
+ getRegisterEnum(B, RegClass, Rd, DRegPair)));
Rd += Inc;
++OpIdx;
}
@@ -2351,6 +2349,11 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
}
+ // Accessing registers past the end of the NEON register file is not
+ // defined.
+ if (Rd > 32)
+ return false;
+
return true;
}
@@ -2423,10 +2426,14 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
break;
case ARM::VMOVv4i16:
case ARM::VMOVv8i16:
+ case ARM::VMVNv4i16:
+ case ARM::VMVNv8i16:
esize = ESize16;
break;
case ARM::VMOVv2i32:
case ARM::VMOVv4i32:
+ case ARM::VMVNv2i32:
+ case ARM::VMVNv4i32:
esize = ESize32;
break;
case ARM::VMOVv1i64:
@@ -2944,7 +2951,7 @@ static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// A8.6.49 ISB
static inline bool MemBarrierInstr(uint32_t insn) {
unsigned op7_4 = slice(insn, 7, 4);
- if (slice(insn, 31, 20) == 0xf57 && (op7_4 >= 4 && op7_4 <= 6))
+ if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6))
return true;
return false;
@@ -3001,8 +3008,15 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- if (MemBarrierInstr(insn))
+ if (MemBarrierInstr(insn)) {
+ // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care
+ // of within the generic ARMBasicMCBuilder::BuildIt() method.
+ //
+ // Inst{3-0} encodes the memory barrier option for the variants.
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+ NumOpsAdded = 1;
return true;
+ }
switch (Opcode) {
case ARM::CLREX:
@@ -3073,6 +3087,7 @@ static const DisassembleFP FuncPtrs[] = {
&DisassembleLdStMulFrm,
&DisassembleLdStExFrm,
&DisassembleArithMiscFrm,
+ &DisassembleSatFrm,
&DisassembleExtFrm,
&DisassembleVFPUnaryFrm,
&DisassembleVFPBinaryFrm,
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
index 7d21256a14f9..9c30d332d1f2 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -23,7 +23,8 @@
#include "llvm/MC/MCInst.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
#include "ARMDisassembler.h"
namespace llvm {
@@ -53,36 +54,35 @@ public:
ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \
ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \
ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \
- ENTRY(ARM_FORMAT_EXTFRM, 13) \
- ENTRY(ARM_FORMAT_VFPUNARYFRM, 14) \
- ENTRY(ARM_FORMAT_VFPBINARYFRM, 15) \
- ENTRY(ARM_FORMAT_VFPCONV1FRM, 16) \
- ENTRY(ARM_FORMAT_VFPCONV2FRM, 17) \
- ENTRY(ARM_FORMAT_VFPCONV3FRM, 18) \
- ENTRY(ARM_FORMAT_VFPCONV4FRM, 19) \
- ENTRY(ARM_FORMAT_VFPCONV5FRM, 20) \
- ENTRY(ARM_FORMAT_VFPLDSTFRM, 21) \
- ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 22) \
- ENTRY(ARM_FORMAT_VFPMISCFRM, 23) \
- ENTRY(ARM_FORMAT_THUMBFRM, 24) \
- ENTRY(ARM_FORMAT_NEONFRM, 25) \
- ENTRY(ARM_FORMAT_NEONGETLNFRM, 26) \
- ENTRY(ARM_FORMAT_NEONSETLNFRM, 27) \
- ENTRY(ARM_FORMAT_NEONDUPFRM, 28) \
- ENTRY(ARM_FORMAT_MISCFRM, 29) \
- ENTRY(ARM_FORMAT_THUMBMISCFRM, 30) \
- ENTRY(ARM_FORMAT_NLdSt, 31) \
- ENTRY(ARM_FORMAT_N1RegModImm, 32) \
- ENTRY(ARM_FORMAT_N2Reg, 33) \
- ENTRY(ARM_FORMAT_NVCVT, 34) \
- ENTRY(ARM_FORMAT_NVecDupLn, 35) \
- ENTRY(ARM_FORMAT_N2RegVecShL, 36) \
- ENTRY(ARM_FORMAT_N2RegVecShR, 37) \
- ENTRY(ARM_FORMAT_N3Reg, 38) \
- ENTRY(ARM_FORMAT_N3RegVecSh, 39) \
- ENTRY(ARM_FORMAT_NVecExtract, 40) \
- ENTRY(ARM_FORMAT_NVecMulScalar, 41) \
- ENTRY(ARM_FORMAT_NVTBL, 42)
+ ENTRY(ARM_FORMAT_SATFRM, 13) \
+ ENTRY(ARM_FORMAT_EXTFRM, 14) \
+ ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \
+ ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \
+ ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \
+ ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \
+ ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \
+ ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \
+ ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \
+ ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \
+ ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
+ ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \
+ ENTRY(ARM_FORMAT_THUMBFRM, 25) \
+ ENTRY(ARM_FORMAT_MISCFRM, 26) \
+ ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \
+ ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \
+ ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \
+ ENTRY(ARM_FORMAT_NLdSt, 30) \
+ ENTRY(ARM_FORMAT_N1RegModImm, 31) \
+ ENTRY(ARM_FORMAT_N2Reg, 32) \
+ ENTRY(ARM_FORMAT_NVCVT, 33) \
+ ENTRY(ARM_FORMAT_NVecDupLn, 34) \
+ ENTRY(ARM_FORMAT_N2RegVecShL, 35) \
+ ENTRY(ARM_FORMAT_N2RegVecShR, 36) \
+ ENTRY(ARM_FORMAT_N3Reg, 37) \
+ ENTRY(ARM_FORMAT_N3RegVecSh, 38) \
+ ENTRY(ARM_FORMAT_NVecExtract, 39) \
+ ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
+ ENTRY(ARM_FORMAT_NVTBL, 41)
// ARM instruction format specifies the encoding used by the instruction.
#define ENTRY(n, v) n = v,
@@ -126,8 +126,8 @@ static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) {
}
/// Utility function for setting [From, To] bits to Val for a uint32_t.
-static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To,
- uint32_t Val) {
+static inline void setSlice(unsigned &Bits, unsigned From, unsigned To,
+ unsigned Val) {
assert(From < 32 && To < 32 && From >= To);
uint32_t Mask = ((1 << (From - To + 1)) - 1);
Bits &= ~(Mask << To);
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 4b7a0bf6fdb9..112817b13cf9 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -103,7 +103,7 @@ static inline unsigned getT1Cond(uint32_t insn) {
}
static inline bool IsGPR(unsigned RegClass) {
- return RegClass == ARM::GPRRegClassID;
+ return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID;
}
// Utilities for 32-bit Thumb instructions.
@@ -220,7 +220,7 @@ static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5,
switch (bits2) {
default: assert(0 && "No such value");
case 0:
- ShOp = ARM_AM::lsl;
+ ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl);
return imm5;
case 1:
ShOp = ARM_AM::lsr;
@@ -1324,7 +1324,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
&& OpInfo[1].RegClass == ARM::GPRRegClassID
&& OpInfo[2].RegClass < 0
&& OpInfo[3].RegClass < 0
- && "Exactlt 4 operands expect and first two as reg operands");
+ && "Exactly 4 operands expect and first two as reg operands");
// Only need to populate the src reg operand.
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
@@ -1338,17 +1338,20 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx = 0;
assert(NumOps >= 2
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && (OpInfo[0].RegClass == ARM::GPRRegClassID ||
+ OpInfo[0].RegClass == ARM::rGPRRegClassID)
+ && (OpInfo[1].RegClass == ARM::GPRRegClassID ||
+ OpInfo[1].RegClass == ARM::rGPRRegClassID)
&& "Expect >= 2 operands and first two as reg operands");
- bool ThreeReg = (NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID);
+ bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID ||
+ OpInfo[2].RegClass == ARM::rGPRRegClassID));
bool NoDstReg = (decodeRs(insn) == 0xF);
// Build the register operands, followed by the constant shift specifier.
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
+ getRegisterEnum(B, OpInfo[0].RegClass,
NoDstReg ? decodeRn(insn) : decodeRs(insn))));
++OpIdx;
@@ -1359,7 +1362,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MI.getOperand(Idx));
++OpIdx;
} else if (!NoDstReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass,
decodeRn(insn))));
++OpIdx;
} else {
@@ -1368,7 +1371,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
}
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
decodeRm(insn))));
++OpIdx;
@@ -1386,14 +1389,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned imm5 = getShiftAmtBits(insn);
ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift;
unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp);
-
- // PKHBT/PKHTB are special in that we need the decodeImmShift() call to
- // decode the shift amount from raw imm5 and bits2, but we DO NOT need
- // to encode the ShOp, as it's in the asm string already.
- if (Opcode == ARM::t2PKHBT || Opcode == ARM::t2PKHTB)
- MI.addOperand(MCOperand::CreateImm(ShAmt));
- else
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
}
++OpIdx;
}
@@ -1416,16 +1412,20 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
OpIdx = 0;
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID
+ unsigned RdRegClassID = OpInfo[0].RegClass;
+ assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+ RdRegClassID == ARM::rGPRRegClassID)
&& "Expect >= 2 operands and first one as reg operand");
- bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID);
+ unsigned RnRegClassID = OpInfo[1].RegClass;
+ bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+ || RnRegClassID == ARM::rGPRRegClassID);
bool NoDstReg = (decodeRs(insn) == 0xF);
// Build the register operands, followed by the modified immediate.
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
+ getRegisterEnum(B, RdRegClassID,
NoDstReg ? decodeRn(insn) : decodeRs(insn))));
++OpIdx;
@@ -1434,7 +1434,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n");
return false;
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
decodeRn(insn))));
++OpIdx;
}
@@ -1455,30 +1455,48 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
static inline bool Thumb2SaturateOpcode(unsigned Opcode) {
switch (Opcode) {
- case ARM::t2SSATlsl: case ARM::t2SSATasr: case ARM::t2SSAT16:
- case ARM::t2USATlsl: case ARM::t2USATasr: case ARM::t2USAT16:
+ case ARM::t2SSAT: case ARM::t2SSAT16:
+ case ARM::t2USAT: case ARM::t2USAT16:
return true;
default:
return false;
}
}
-static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) {
- switch (Opcode) {
- case ARM::t2SSATlsl:
- case ARM::t2SSATasr:
- return slice(insn, 4, 0) + 1;
- case ARM::t2SSAT16:
- return slice(insn, 3, 0) + 1;
- case ARM::t2USATlsl:
- case ARM::t2USATasr:
- return slice(insn, 4, 0);
- case ARM::t2USAT16:
- return slice(insn, 3, 0);
- default:
- assert(0 && "Unexpected opcode");
- return 0;
+/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions:
+/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt
+/// o t2SSAT16, t2USAT16: Rs sat_pos Rn
+static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned &NumOpsAdded, BO B) {
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+
+ // Disassemble the register def.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRs(insn))));
+
+ unsigned Pos = slice(insn, 4, 0);
+ if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16)
+ Pos += 1;
+ MI.addOperand(MCOperand::CreateImm(Pos));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRn(insn))));
+
+ if (NumOpsAdded == 4) {
+ ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ?
+ ARM_AM::asr : ARM_AM::lsl);
+ // Inst{14-12:7-6} encodes the imm5 shift amount.
+ unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
+ if (ShAmt == 0) {
+ if (Opc == ARM_AM::asr)
+ ShAmt = 32;
+ else
+ Opc = ARM_AM::no_shift;
+ }
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
}
+ return true;
}
// A6.3.3 Data-processing (plain binary immediate)
@@ -1492,11 +1510,6 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) {
// o t2SBFX (SBFX): Rs Rn lsb width
// o t2UBFX (UBFX): Rs Rn lsb width
// o t2BFI (BFI): Rs Rn lsb width
-//
-// [Signed|Unsigned] Saturate [16]
-//
-// o t2SSAT[lsl|asr], t2USAT[lsl|asr]: Rs sat_pos Rn shamt
-// o t2SSAT16, t2USAT16: Rs sat_pos Rn
static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -1506,41 +1519,21 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
OpIdx = 0;
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID
+ unsigned RdRegClassID = OpInfo[0].RegClass;
+ assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+ RdRegClassID == ARM::rGPRRegClassID)
&& "Expect >= 2 operands and first one as reg operand");
- bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID);
+ unsigned RnRegClassID = OpInfo[1].RegClass;
+ bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+ || RnRegClassID == ARM::rGPRRegClassID);
// Build the register operand(s), followed by the immediate(s).
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID,
decodeRs(insn))));
++OpIdx;
- // t2SSAT/t2SSAT16/t2USAT/t2USAT16 has imm operand after Rd.
- if (Thumb2SaturateOpcode(Opcode)) {
- MI.addOperand(MCOperand::CreateImm(decodeThumb2SaturatePos(Opcode, insn)));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
-
- if (Opcode == ARM::t2SSAT16 || Opcode == ARM::t2USAT16) {
- OpIdx += 2;
- return true;
- }
-
- // For SSAT operand reg (Rn) has been disassembled above.
- // Now disassemble the shift amount.
-
- // Inst{14-12:7-6} encodes the imm5 shift amount.
- unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
-
- MI.addOperand(MCOperand::CreateImm(ShAmt));
-
- OpIdx += 3;
- return true;
- }
-
if (TwoReg) {
assert(NumOps >= 3 && "Expect >= 3 operands");
int Idx;
@@ -1549,12 +1542,19 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
MI.addOperand(MI.getOperand(Idx));
} else {
// Add src reg operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
decodeRn(insn))));
}
++OpIdx;
}
+ if (Opcode == ARM::t2BFI) {
+ // Add val reg operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1567,7 +1567,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16)
MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
- else if (Opcode == ARM::t2BFC) {
+ else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
uint32_t mask = 0;
if (getBitfieldInvMask(insn, mask))
MI.addOperand(MCOperand::CreateImm(mask));
@@ -1575,17 +1575,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
return false;
} else {
// Handle the case of: lsb width
- assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX ||
- Opcode == ARM::t2BFI) && "Unexpected opcode");
+ assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX)
+ && "Unexpected opcode");
MI.addOperand(MCOperand::CreateImm(getLsb(insn)));
- if (Opcode == ARM::t2BFI) {
- if (getMsb(insn) < getLsb(insn)) {
- DEBUG(errs() << "Encoding error: msb < lsb\n");
- return false;
- }
- MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1));
- } else
- MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
+ MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
++OpIdx;
}
@@ -1618,8 +1611,8 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) {
// A8.6.26
// t2BXJ -> Rn
//
-// Miscellaneous control: t2Int_MemBarrierV7 (and its t2DMB variants),
-// t2Int_SyncBarrierV7 (and its t2DSB varianst), t2ISBsy, t2CLREX
+// Miscellaneous control: t2DMBsy (and its t2DMB variants),
+// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX
// -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants)
//
// Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV
@@ -1959,25 +1952,25 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx = 0;
assert(NumOps >= 2 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
"Expect >= 2 operands and first two as reg operands");
// Build the register operands, followed by the optional rotation amount.
- bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+ bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::rGPRRegClassID;
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRs(insn))));
++OpIdx;
if (ThreeReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRn(insn))));
++OpIdx;
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRm(insn))));
++OpIdx;
@@ -2009,26 +2002,26 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
- OpInfo[2].RegClass == ARM::GPRRegClassID &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[2].RegClass == ARM::rGPRRegClassID &&
"Expect >= 3 operands and first three as reg operands");
// Build the register operands.
- bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID;
+ bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRs(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRn(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRm(insn))));
if (FourReg)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRd(insn))));
NumOpsAdded = FourReg ? 4 : 3;
@@ -2054,26 +2047,26 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
- OpInfo[2].RegClass == ARM::GPRRegClassID &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[2].RegClass == ARM::rGPRRegClassID &&
"Expect >= 3 operands and first three as reg operands");
- bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID;
+ bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
// Build the register operands.
if (FourReg)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRs(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRn(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRm(insn))));
if (FourReg)
@@ -2152,22 +2145,20 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
break;
case 2:
if (op == 0) {
- if (slice(op2, 5, 5) == 0) {
+ if (slice(op2, 5, 5) == 0)
// Data-processing (modified immediate)
return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded,
B);
- } else {
- // Data-processing (plain binary immediate)
- return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
- } else {
- // Branches and miscellaneous control on page A6-20.
- return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
+ if (Thumb2SaturateOpcode(Opcode))
+ return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B);
- break;
+ // Data-processing (plain binary immediate)
+ return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+ // Branches and miscellaneous control on page A6-20.
+ return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
case 3:
switch (slice(op2, 6, 5)) {
case 0:
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index 9e3ff29e07c4..b3fcfaf6bda7 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -14,10 +14,11 @@ TARGET = ARM
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
- ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
+ ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
ARMGenDAGISel.inc ARMGenSubtarget.inc \
ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
- ARMGenDecoderTables.inc ARMGenEDInfo.inc
+ ARMGenDecoderTables.inc ARMGenEDInfo.inc \
+ ARMGenFastISel.inc
DIRS = AsmPrinter AsmParser Disassembler TargetInfo
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index bbdd3c7f7c3e..97e54bfaed9e 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -24,7 +24,7 @@ STATISTIC(NumVMovs, "Number of reg-reg moves converted");
namespace {
struct NEONMoveFixPass : public MachineFunctionPass {
static char ID;
- NEONMoveFixPass() : MachineFunctionPass(&ID) {}
+ NEONMoveFixPass() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &Fn);
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index f67717cdd56f..3407ac6fe08e 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -23,7 +23,7 @@ namespace {
public:
static char ID;
- NEONPreAllocPass() : MachineFunctionPass(&ID) {}
+ NEONPreAllocPass() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -51,13 +51,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
default:
break;
- case ARM::VLD1q8:
- case ARM::VLD1q16:
- case ARM::VLD1q32:
- case ARM::VLD1q64:
- case ARM::VLD2d8:
- case ARM::VLD2d16:
- case ARM::VLD2d32:
case ARM::VLD2LNd8:
case ARM::VLD2LNd16:
case ARM::VLD2LNd32:
@@ -65,13 +58,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 2;
return true;
- case ARM::VLD2q8:
- case ARM::VLD2q16:
- case ARM::VLD2q32:
- FirstOpnd = 0;
- NumRegs = 4;
- return true;
-
case ARM::VLD2LNq16:
case ARM::VLD2LNq32:
FirstOpnd = 0;
@@ -88,10 +74,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VLD3d8:
- case ARM::VLD3d16:
- case ARM::VLD3d32:
- case ARM::VLD1d64T:
case ARM::VLD3LNd8:
case ARM::VLD3LNd16:
case ARM::VLD3LNd32:
@@ -99,24 +81,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 3;
return true;
- case ARM::VLD3q8_UPD:
- case ARM::VLD3q16_UPD:
- case ARM::VLD3q32_UPD:
- FirstOpnd = 0;
- NumRegs = 3;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VLD3q8odd_UPD:
- case ARM::VLD3q16odd_UPD:
- case ARM::VLD3q32odd_UPD:
- FirstOpnd = 0;
- NumRegs = 3;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VLD3LNq16:
case ARM::VLD3LNq32:
FirstOpnd = 0;
@@ -133,10 +97,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VLD4d8:
- case ARM::VLD4d16:
- case ARM::VLD4d32:
- case ARM::VLD1d64Q:
case ARM::VLD4LNd8:
case ARM::VLD4LNd16:
case ARM::VLD4LNd32:
@@ -144,24 +104,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 4;
return true;
- case ARM::VLD4q8_UPD:
- case ARM::VLD4q16_UPD:
- case ARM::VLD4q32_UPD:
- FirstOpnd = 0;
- NumRegs = 4;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VLD4q8odd_UPD:
- case ARM::VLD4q16odd_UPD:
- case ARM::VLD4q32odd_UPD:
- FirstOpnd = 0;
- NumRegs = 4;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VLD4LNq16:
case ARM::VLD4LNq32:
FirstOpnd = 0;
@@ -178,13 +120,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VST1q8:
- case ARM::VST1q16:
- case ARM::VST1q32:
- case ARM::VST1q64:
- case ARM::VST2d8:
- case ARM::VST2d16:
- case ARM::VST2d32:
case ARM::VST2LNd8:
case ARM::VST2LNd16:
case ARM::VST2LNd32:
@@ -192,13 +127,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 2;
return true;
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- FirstOpnd = 2;
- NumRegs = 4;
- return true;
-
case ARM::VST2LNq16:
case ARM::VST2LNq32:
FirstOpnd = 2;
@@ -215,10 +143,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VST3d8:
- case ARM::VST3d16:
- case ARM::VST3d32:
- case ARM::VST1d64T:
case ARM::VST3LNd8:
case ARM::VST3LNd16:
case ARM::VST3LNd32:
@@ -226,24 +150,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 3;
return true;
- case ARM::VST3q8_UPD:
- case ARM::VST3q16_UPD:
- case ARM::VST3q32_UPD:
- FirstOpnd = 4;
- NumRegs = 3;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VST3q8odd_UPD:
- case ARM::VST3q16odd_UPD:
- case ARM::VST3q32odd_UPD:
- FirstOpnd = 4;
- NumRegs = 3;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VST3LNq16:
case ARM::VST3LNq32:
FirstOpnd = 2;
@@ -260,10 +166,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VST4d8:
- case ARM::VST4d16:
- case ARM::VST4d32:
- case ARM::VST1d64Q:
case ARM::VST4LNd8:
case ARM::VST4LNd16:
case ARM::VST4LNd32:
@@ -271,24 +173,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 4;
return true;
- case ARM::VST4q8_UPD:
- case ARM::VST4q16_UPD:
- case ARM::VST4q32_UPD:
- FirstOpnd = 4;
- NumRegs = 4;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VST4q8odd_UPD:
- case ARM::VST4q16odd_UPD:
- case ARM::VST4q32odd_UPD:
- FirstOpnd = 4;
- NumRegs = 4;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VST4LNq16:
case ARM::VST4LNq32:
FirstOpnd = 2;
@@ -468,7 +352,34 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
continue;
if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
continue;
- llvm_unreachable("expected a REG_SEQUENCE");
+
+ MachineBasicBlock::iterator NextI = llvm::next(MBBI);
+ for (unsigned R = 0; R < NumRegs; ++R) {
+ MachineOperand &MO = MI->getOperand(FirstOpnd + R);
+ assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
+ unsigned VirtReg = MO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "expected a virtual register");
+
+ // For now, just assign a fixed set of adjacent registers.
+ // This leaves plenty of room for future improvements.
+ static const unsigned NEONDRegs[] = {
+ ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7
+ };
+ MO.setReg(NEONDRegs[Offset + R * Stride]);
+
+ if (MO.isUse()) {
+ // Insert a copy from VirtReg.
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),MO.getReg())
+ .addReg(VirtReg, getKillRegState(MO.isKill()));
+ MO.setIsKill();
+ } else if (MO.isDef() && !MO.isDead()) {
+ // Add a copy to VirtReg.
+ BuildMI(MBB, NextI, DebugLoc(), TII->get(TargetOpcode::COPY), VirtReg)
+ .addReg(MO.getReg());
+ }
+ }
}
return Modified;
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 0cb8ff01181d..9fc3fb92cb2c 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -611,27 +611,6 @@ constant which was already loaded). Not sure what's necessary to do that.
//===---------------------------------------------------------------------===//
-Given the following on ARMv7:
-int test1(int A, int B) {
- return (A&-8388481)|(B&8388480);
-}
-
-We currently generate:
- bfc r0, #7, #16
- movw r2, #:lower16:8388480
- movt r2, #:upper16:8388480
- and r1, r1, r2
- orr r0, r1, r0
- bx lr
-
-The following is much shorter:
- lsr r1, r1, #7
- bfi r0, r1, #7, #16
- bx lr
-
-
-//===---------------------------------------------------------------------===//
-
The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal:
int a(int x) { return __builtin_bswap32(x); }
@@ -657,3 +636,24 @@ A custom Thumb version would also be a slight improvement over the generic
version.
//===---------------------------------------------------------------------===//
+
+Consider the following simple C code:
+
+void foo(unsigned char *a, unsigned char *b, int *c) {
+ if ((*a | *b) == 0) *c = 0;
+}
+
+currently llvm-gcc generates something like this (nice branchless code I'd say):
+
+ ldrb r0, [r0]
+ ldrb r1, [r1]
+ orr r0, r1, r0
+ tst r0, #255
+ moveq r0, #0
+ streq r0, [r2]
+ bx lr
+
+Note that both "tst" and "moveq" are redundant.
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 39b70b43b23f..a21a3da10bda 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -68,7 +68,7 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
.addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg);
}
-bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+bool Thumb1RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
// It's not always a good idea to include the call frame as part of the
@@ -363,107 +363,19 @@ static void removeOperands(MachineInstr &MI, unsigned i) {
MI.RemoveOperand(Op);
}
-int Thumb1RegisterInfo::
-rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int Offset,
- unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const
-{
- // if/when eliminateFrameIndex() conforms with ARMBaseRegisterInfo
- // version then can pull out Thumb1 specific parts here
- return 0;
-}
-
-/// saveScavengerRegister - Spill the register so it can be used by the
-/// register scavenger. Return true.
-bool
-Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator &UseMI,
- const TargetRegisterClass *RC,
- unsigned Reg) const {
- // Thumb1 can't use the emergency spill slot on the stack because
- // ldr/str immediate offsets must be positive, and if we're referencing
- // off the frame pointer (if, for example, there are alloca() calls in
- // the function, the offset will be negative. Use R12 instead since that's
- // a call clobbered register that we know won't be used in Thumb1 mode.
- DebugLoc DL;
- BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
- addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
-
- // The UseMI is where we would like to restore the register. If there's
- // interference with R12 before then, however, we'll need to restore it
- // before that instead and adjust the UseMI.
- bool done = false;
- for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
- if (II->isDebugValue())
- continue;
- // If this instruction affects R12, adjust our restore point.
- for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = II->getOperand(i);
- if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
- if (MO.getReg() == ARM::R12) {
- UseMI = II;
- done = true;
- break;
- }
- }
- }
- // Restore the register from R12
- BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)).
- addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill);
-
- return true;
-}
-
-unsigned
-Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const{
- unsigned VReg = 0;
- unsigned i = 0;
+bool Thumb1RegisterInfo::
+rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
- MachineFunction &MF = *MBB.getParent();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- unsigned FrameReg = ARM::SP;
- int FrameIndex = MI.getOperand(i).getIndex();
- int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MF.getFrameInfo()->getStackSize() + SPAdj;
-
- if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea1Offset();
- else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea2Offset();
- else if (MF.getFrameInfo()->hasVarSizedObjects()) {
- assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
- // There are alloca()'s in this function, must reference off the frame
- // pointer instead.
- FrameReg = getFrameRegister(MF);
- Offset -= AFI->getFramePtrSpillOffset();
- }
-
- // Special handling of dbg_value instructions.
- if (MI.isDebugValue()) {
- MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
- return 0;
- }
-
unsigned Opcode = MI.getOpcode();
const TargetInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
if (Opcode == ARM::tADDrSPi) {
- Offset += MI.getOperand(i+1).getImm();
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
// Can't use tADDrSPi if it's based off the frame pointer.
unsigned NumBits = 0;
@@ -483,12 +395,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
// Turn it into a move.
MI.setDesc(TII.get(ARM::tMOVgpr2tgpr));
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
// Remove offset and remaining explicit predicate operands.
- do MI.RemoveOperand(i+1);
- while (MI.getNumOperands() > i+1 &&
- (!MI.getOperand(i+1).isReg() || !MI.getOperand(i+1).isImm()));
- return 0;
+ do MI.RemoveOperand(FrameRegIdx+1);
+ while (MI.getNumOperands() > FrameRegIdx+1 &&
+ (!MI.getOperand(FrameRegIdx+1).isReg() ||
+ !MI.getOperand(FrameRegIdx+1).isImm()));
+ return true;
}
// Common case: small offset, fits into instruction.
@@ -496,15 +409,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (((Offset / Scale) & ~Mask) == 0) {
// Replace the FrameIndex with sp / fp
if (Opcode == ARM::tADDi3) {
- removeOperands(MI, i);
+ removeOperands(MI, FrameRegIdx);
MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
.addImm(Offset / Scale));
} else {
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
}
- return 0;
+ return true;
}
unsigned DestReg = MI.getOperand(0).getReg();
@@ -516,7 +429,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
*this, dl);
MBB.erase(II);
- return 0;
+ return true;
}
if (Offset > 0) {
@@ -524,12 +437,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// r0 = add sp, 255*4
// r0 = add r0, (imm - 255*4)
if (Opcode == ARM::tADDi3) {
- removeOperands(MI, i);
+ removeOperands(MI, FrameRegIdx);
MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
} else {
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
- MI.getOperand(i+1).ChangeToImmediate(Mask);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
}
Offset = (Offset - Mask * Scale);
MachineBasicBlock::iterator NII = llvm::next(II);
@@ -542,14 +455,14 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
MI.setDesc(TII.get(ARM::tADDhirr));
- MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
- MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
+ MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
if (Opcode == ARM::tADDi3) {
MachineInstrBuilder MIB(&MI);
AddDefaultPred(MIB);
}
}
- return 0;
+ return true;
} else {
unsigned ImmIdx = 0;
int InstrOffs = 0;
@@ -557,7 +470,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned Scale = 1;
switch (AddrMode) {
case ARMII::AddrModeT1_s: {
- ImmIdx = i+1;
+ ImmIdx = FrameRegIdx+1;
InstrOffs = MI.getOperand(ImmIdx).getImm();
NumBits = (FrameReg == ARM::SP) ? 8 : 5;
Scale = 4;
@@ -577,9 +490,9 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned Mask = (1 << NumBits) - 1;
if ((unsigned)Offset <= Mask * Scale) {
// Replace the FrameIndex with sp
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
ImmOp.ChangeToImmediate(ImmedOffset);
- return 0;
+ return true;
}
bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
@@ -600,12 +513,126 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset &= ~(Mask*Scale);
}
}
+ return Offset == 0;
+}
+
+void
+Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
+ assert (Done && "Unable to resolve frame index!");
+}
+
+/// saveScavengerRegister - Spill the register so it can be used by the
+/// register scavenger. Return true.
+bool
+Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const {
+ // Thumb1 can't use the emergency spill slot on the stack because
+ // ldr/str immediate offsets must be positive, and if we're referencing
+ // off the frame pointer (if, for example, there are alloca() calls in
+ // the function, the offset will be negative. Use R12 instead since that's
+ // a call clobbered register that we know won't be used in Thumb1 mode.
+ DebugLoc DL;
+ BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
+ addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
+
+ // The UseMI is where we would like to restore the register. If there's
+ // interference with R12 before then, however, we'll need to restore it
+ // before that instead and adjust the UseMI.
+ bool done = false;
+ for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+ if (II->isDebugValue())
+ continue;
+ // If this instruction affects R12, adjust our restore point.
+ for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = II->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ if (MO.getReg() == ARM::R12) {
+ UseMI = II;
+ done = true;
+ break;
+ }
+ }
+ }
+ // Restore the register from R12
+ BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)).
+ addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill);
+
+ return true;
+}
+
+void
+Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ unsigned VReg = 0;
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc dl = MI.getDebugLoc();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned FrameReg = ARM::SP;
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MF.getFrameInfo()->getStackSize() + SPAdj;
+
+ if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
+ // There are alloca()'s in this function, must reference off the frame
+ // pointer or base pointer instead.
+ if (!hasBasePointer(MF)) {
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ } else
+ FrameReg = BasePtr;
+ }
+
+ // Special handling of dbg_value instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ assert(AFI->isThumbFunction() &&
+ "This eliminateFrameIndex only supports Thumb1!");
+ if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII))
+ return;
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above, handle the rest, providing a register that is
// SP+LargeImm.
assert(Offset && "This code isn't needed if offset already handled!");
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = MI.getDesc();
+
// Remove predicate first.
int PIdx = MI.findFirstPredOperandIdx();
if (PIdx != -1)
@@ -637,11 +664,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.addOperand(MachineOperand::CreateReg(0, false));
} else if (Desc.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
- assert (Value && "Frame index virtual allocated, but Value arg is NULL!");
bool UseRR = false;
- bool TrackVReg = true;
- Value->first = FrameReg; // use the frame register as a kind indicator
- Value->second = Offset;
if (Opcode == ARM::tSpill) {
if (FrameReg == ARM::SP)
@@ -650,7 +673,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else {
emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
UseRR = true;
- TrackVReg = false;
}
} else
emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII,
@@ -661,8 +683,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
else // tSTR has an extra register operand.
MI.addOperand(MachineOperand::CreateReg(0, false));
- if (!ReuseFrameIndexVals || !TrackVReg)
- VReg = 0;
} else
assert(false && "Unexpected opcode!");
@@ -671,7 +691,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstrBuilder MIB(&MI);
AddDefaultPred(MIB);
}
- return VReg;
}
void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
@@ -742,11 +761,11 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
dl = MBBI->getDebugLoc();
}
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ // Adjust FP so it point to the stack slot that contains the previous FP.
+ if (hasFP(MF)) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addFrameIndex(FramePtrSpillFI).addImm(0);
+ AFI->setShouldRestoreSPFromFP(true);
}
// Determine starting offsets of spill areas.
@@ -764,14 +783,20 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
}
- if (STI.isTargetELF() && hasFP(MF)) {
+ if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
- }
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (hasBasePointer(MF))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
}
static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
@@ -828,7 +853,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize());
- if (hasFP(MF)) {
+ if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
// Reset SP based on frame pointer only if the stack frame extends beyond
// frame pointer stack slot or target is ELF and the function has FP.
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 9a0308afa20c..c578054a5d71 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -38,27 +38,27 @@ public:
unsigned PredReg = 0) const;
/// Code Generation virtual methods...
- bool hasReservedCallFrame(MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- // rewrite MI to access 'Offset' bytes from the FP. Return the offset that
- // could not be handled directly in MI.
- int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int Offset,
- unsigned MOVOpc, unsigned ADDriOpc,
- unsigned SUBriOpc) const;
-
+ // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
+ // however much remains to be handled. Return 'true' if no further
+ // work is required.
+ bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
bool saveScavengerRegister(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
MachineBasicBlock::iterator &UseMI,
const TargetRegisterClass *RC,
unsigned Reg) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index cd15bbed9f23..45e693744b80 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -27,7 +27,7 @@ namespace {
public:
static char ID;
- Thumb2ITBlockPass() : MachineFunctionPass(&ID) {}
+ Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
const Thumb2InstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -91,35 +91,53 @@ static void TrackDefUses(MachineInstr *MI,
}
}
+static bool isCopy(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case ARM::MOVr:
+ case ARM::MOVr_TC:
+ case ARM::tMOVr:
+ case ARM::tMOVgpr2tgpr:
+ case ARM::tMOVtgpr2gpr:
+ case ARM::tMOVgpr2gpr:
+ case ARM::t2MOVr:
+ return true;
+ }
+}
+
bool
Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
SmallSet<unsigned, 4> &Defs,
SmallSet<unsigned, 4> &Uses) {
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- assert(SrcSubIdx == 0 && DstSubIdx == 0 &&
- "Sub-register indices still around?");
- // llvm models select's as two-address instructions. That means a copy
- // is inserted before a t2MOVccr, etc. If the copy is scheduled in
- // between selects we would end up creating multiple IT blocks.
-
- // First check if it's safe to move it.
- if (Uses.count(DstReg) || Defs.count(SrcReg))
- return false;
-
- // Then peek at the next instruction to see if it's predicated on CC or OCC.
- // If not, then there is nothing to be gained by moving the copy.
- MachineBasicBlock::iterator I = MI; ++I;
- MachineBasicBlock::iterator E = MI->getParent()->end();
- while (I != E && I->isDebugValue())
- ++I;
- if (I != E) {
- unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
- if (NCC == CC || NCC == OCC)
- return true;
- }
+ if (!isCopy(MI))
+ return false;
+ // llvm models select's as two-address instructions. That means a copy
+ // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+ // between selects we would end up creating multiple IT blocks.
+ assert(MI->getOperand(0).getSubReg() == 0 &&
+ MI->getOperand(1).getSubReg() == 0 &&
+ "Sub-register indices still around?");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+
+ // First check if it's safe to move it.
+ if (Uses.count(DstReg) || Defs.count(SrcReg))
+ return false;
+
+ // Then peek at the next instruction to see if it's predicated on CC or OCC.
+ // If not, then there is nothing to be gained by moving the copy.
+ MachineBasicBlock::iterator I = MI; ++I;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I != E) {
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+ if (NCC == CC || NCC == OCC)
+ return true;
}
return false;
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index ee517279c9d7..442f41da8a2d 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -147,8 +147,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -173,8 +173,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index ba392f36d946..0c3962dd123d 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -173,7 +173,7 @@ namespace {
char Thumb2SizeReduce::ID = 0;
}
-Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) {
+Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
unsigned FromOpc = ReduceTable[i].WideOpc;
if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
@@ -315,6 +315,18 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia)
return false;
+ // For the non-writeback version (this one), the base register must be
+ // one of the registers being loaded.
+ bool isOK = false;
+ for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
+ if (MI->getOperand(i).getReg() == BaseReg) {
+ isOK = true;
+ break;
+ }
+ }
+ if (!isOK)
+ return false;
+
OpNum = 0;
isLdStMul = true;
break;