aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.h4
-rw-r--r--lib/Target/ARM/ARM.td20
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp165
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h5
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp665
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h18
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp220
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h28
-rw-r--r--lib/Target/ARM/ARMBuildAttrs.h2
-rw-r--r--lib/Target/ARM/ARMCallingConv.h21
-rw-r--r--lib/Target/ARM/ARMCallingConv.td38
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp25
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp1323
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp8
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h2
-rw-r--r--lib/Target/ARM/ARMELFWriterInfo.cpp33
-rw-r--r--lib/Target/ARM/ARMELFWriterInfo.h1
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp581
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp1317
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp454
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h5
-rw-r--r--lib/Target/ARM/ARMGlobalMerge.cpp219
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp28
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.h8
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp284
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp1150
-rw-r--r--lib/Target/ARM/ARMISelLowering.h29
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td112
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp19
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h8
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td753
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td3103
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td127
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td405
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td494
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp6
-rw-r--r--lib/Target/ARM/ARMJITInfo.h2
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp214
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp10
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.cpp14
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h19
-rw-r--r--lib/Target/ARM/ARMPerfectShuffle.h2
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp6
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h6
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td188
-rw-r--r--lib/Target/ARM/ARMRelocations.h2
-rw-r--r--lib/Target/ARM/ARMSchedule.td10
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td19
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td36
-rw-r--r--lib/Target/ARM/ARMScheduleV6.td12
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp10
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp12
-rw-r--r--lib/Target/ARM/ARMSubtarget.h18
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp108
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h24
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp34
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h4
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmLexer.cpp9
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp3562
-rw-r--r--lib/Target/ARM/AsmParser/CMakeLists.txt8
-rw-r--r--lib/Target/ARM/AsmParser/LLVMBuild.txt23
-rw-r--r--lib/Target/ARM/CMakeLists.txt46
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp1359
-rw-r--r--lib/Target/ARM/Disassembler/CMakeLists.txt8
-rw-r--r--lib/Target/ARM/Disassembler/LLVMBuild.txt23
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp192
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h34
-rw-r--r--lib/Target/ARM/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/ARM/InstPrinter/LLVMBuild.txt23
-rw-r--r--lib/Target/ARM/LLVMBuild.txt35
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp180
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h36
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp283
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h24
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h10
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp76
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp5
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp24
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp116
-rw-r--r--lib/Target/ARM/MCTargetDesc/CMakeLists.txt9
-rw-r--r--lib/Target/ARM/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp10
-rw-r--r--lib/Target/ARM/README.txt21
-rw-r--r--lib/Target/ARM/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/ARM/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp19
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp22
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h8
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp35
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h3
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp21
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp31
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h8
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.cpp6
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.h3
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp141
100 files changed, 13950 insertions, 4973 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 16d0da3b8ac7..2a1e8e4d3079 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -1,4 +1,4 @@
-//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===//
+//===-- ARM.h - Top-level interface for ARM representation ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,9 +18,7 @@
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
-#include <cassert>
namespace llvm {
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 5c727ad6e343..9b0cb0c9e575 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -1,4 +1,4 @@
-//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===//
+//===-- ARM.td - Describe the ARM Target Machine -----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -23,9 +23,6 @@ include "llvm/Target/Target.td"
def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
"Thumb mode">;
-def ModeNaCl : SubtargetFeature<"nacl-mode", "InNaClMode", "true",
- "Native client mode">;
-
//===----------------------------------------------------------------------===//
// ARM Subtarget features.
//
@@ -35,6 +32,9 @@ def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
"Enable VFP3 instructions",
[FeatureVFP2]>;
+def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
+ "Enable VFP4 instructions",
+ [FeatureVFP3]>;
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable NEON instructions",
[FeatureVFP3]>;
@@ -86,6 +86,11 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
"AvoidCPSRPartialUpdate", "true",
"Avoid CPSR partial update for OOO execution">;
+// Some processors perform return stack prediction. CodeGen should avoid issue
+// "normal" call instructions to callees which do not return.
+def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
+ "Has return address stack">;
+
/// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
"Supports v7 DSP instructions in Thumb2">;
@@ -201,13 +206,14 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
// V7a Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
[ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2]>;
+ FeatureDSPThumb2, FeatureHasRAS]>;
def : Processor<"cortex-a9", CortexA9Itineraries,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2]>;
+ FeatureDSPThumb2, FeatureHasRAS]>;
def : Processor<"cortex-a9-mp", CortexA9Itineraries,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureMP]>;
+ FeatureDSPThumb2, FeatureMP,
+ FeatureHasRAS]>;
// V7M Processors.
def : ProcNoItin<"cortex-m3", [HasV7Ops,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index ea3319fb0e03..410790a7baa0 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -13,10 +13,9 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
-#include "ARM.h"
#include "ARMAsmPrinter.h"
+#include "ARM.h"
#include "ARMBuildAttrs.h"
-#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMTargetMachine.h"
@@ -35,7 +34,6 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCObjectStreamer.h"
@@ -44,10 +42,7 @@
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -85,15 +80,15 @@ namespace {
void EmitTextAttribute(unsigned Attribute, StringRef String) {
switch (Attribute) {
+ default: llvm_unreachable("Unsupported Text attribute in ASM Mode");
case ARMBuildAttrs::CPU_name:
- Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String));
+ Streamer.EmitRawText(StringRef("\t.cpu ") + String.lower());
break;
/* GAS requires .fpu to be emitted regardless of EABI attribute */
case ARMBuildAttrs::Advanced_SIMD_arch:
case ARMBuildAttrs::VFP_arch:
- Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String));
+ Streamer.EmitRawText(StringRef("\t.fpu ") + String.lower());
break;
- default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
}
}
void Finish() { }
@@ -197,15 +192,14 @@ namespace {
AttributeItemType item = Contents[i];
Streamer.EmitULEB128IntValue(item.Tag, 0);
switch (item.Type) {
+ default: llvm_unreachable("Invalid attribute type");
case AttributeItemType::NumericAttribute:
Streamer.EmitULEB128IntValue(item.IntValue, 0);
break;
case AttributeItemType::TextAttribute:
- Streamer.EmitBytes(UppercaseString(item.StringValue), 0);
+ Streamer.EmitBytes(item.StringValue.upper(), 0);
Streamer.EmitIntValue(0, 1); // '\0'
break;
- default:
- assert(0 && "Invalid attribute type");
}
}
@@ -300,6 +294,22 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer.EmitLabel(CurrentFnSym);
}
+void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
+ uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+ assert(Size && "C++ constructor pointer had zero size!");
+
+ const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
+ assert(GV && "C++ constructor pointer was not a GlobalValue!");
+
+ const MCExpr *E = MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+ (Subtarget->isTargetDarwin()
+ ? MCSymbolRefExpr::VK_None
+ : MCSymbolRefExpr::VK_ARM_TARGET1),
+ OutContext);
+
+ OutStreamer.EmitValue(E, Size);
+}
+
/// runOnMachineFunction - This uses the EmitInstruction()
/// method to print assembly for each instruction.
///
@@ -316,8 +326,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
unsigned TF = MO.getTargetFlags();
switch (MO.getType()) {
- default:
- assert(0 && "<unknown operand type>");
+ default: llvm_unreachable("<unknown operand type>");
case MachineOperand::MO_Register: {
unsigned Reg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
@@ -494,11 +503,21 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
}
- // These modifiers are not yet supported.
- case 'p': // The high single-precision register of a VFP double-precision
- // register.
case 'e': // The low doubleword register of a NEON quad register.
- case 'f': // The high doubleword register of a NEON quad register.
+ case 'f': { // The high doubleword register of a NEON quad register.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ if (!ARM::QPRRegClass.contains(Reg))
+ return true;
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ?
+ ARM::dsub_0 : ARM::dsub_1);
+ O << ARMInstPrinter::getRegisterName(SubReg);
+ return false;
+ }
+
+ // These modifiers are not yet supported.
case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
case 'H': // The highest-numbered register of a pair.
return true;
@@ -576,10 +595,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
// Emit ARM Build Attributes
- if (Subtarget->isTargetELF()) {
-
+ if (Subtarget->isTargetELF())
emitAttributes();
- }
}
@@ -710,15 +727,26 @@ void ARMAsmPrinter::emitAttributes() {
if (Subtarget->hasNEON() && emitFPU) {
/* NEON is not exactly a VFP architecture, but GAS emit one of
- * neon/vfpv3/vfpv2 for .fpu parameters */
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
+ * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
+ if (Subtarget->hasVFP4())
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ "neon-vfpv4");
+ else
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
/* If emitted for NEON, omit from VFP below, since you can have both
* NEON and VFP in build attributes but only one .fpu */
emitFPU = false;
}
+ /* VFPv4 + .fpu */
+ if (Subtarget->hasVFP4()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv4A);
+ if (emitFPU)
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4");
+
/* VFPv3 + .fpu */
- if (Subtarget->hasVFP3()) {
+ } else if (Subtarget->hasVFP3()) {
AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
ARMBuildAttrs::AllowFPv3A);
if (emitFPU)
@@ -740,14 +768,14 @@ void ARMAsmPrinter::emitAttributes() {
}
// Signal various FP modes.
- if (!UnsafeFPMath) {
+ if (!TM.Options.UnsafeFPMath) {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
ARMBuildAttrs::Allowed);
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
ARMBuildAttrs::Allowed);
}
- if (NoInfsFPMath && NoNaNsFPMath)
+ if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
ARMBuildAttrs::Allowed);
else
@@ -760,7 +788,7 @@ void ARMAsmPrinter::emitAttributes() {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
// Hard float. Use both S and D registers and conform to AAPCS-VFP.
- if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
+ if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
}
@@ -808,7 +836,6 @@ static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
static MCSymbolRefExpr::VariantKind
getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
switch (Modifier) {
- default: llvm_unreachable("Unknown modifier!");
case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None;
case ARMCP::TLSGD: return MCSymbolRefExpr::VK_ARM_TLSGD;
case ARMCP::TPOFF: return MCSymbolRefExpr::VK_ARM_TPOFF;
@@ -816,7 +843,7 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
case ARMCP::GOT: return MCSymbolRefExpr::VK_ARM_GOT;
case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_ARM_GOTOFF;
}
- return MCSymbolRefExpr::VK_None;
+ llvm_unreachable("Invalid ARMCPModifier!");
}
MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
@@ -1070,7 +1097,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
}
// Try to figure out the unwinding opcode out of src / dst regs.
- if (MI->getDesc().mayStore()) {
+ if (MI->mayStore()) {
// Register saves.
assert(DstReg == ARM::SP &&
"Only stack pointer as a destination reg is supported");
@@ -1084,7 +1111,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
switch (Opc) {
default:
MI->dump();
- assert(0 && "Unsupported opcode for unwinding information");
+ llvm_unreachable("Unsupported opcode for unwinding information");
case ARM::tPUSH:
// Special case here: no src & dst reg, but two extra imp ops.
StartOp = 2; NumOffset = 2;
@@ -1099,6 +1126,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
break;
case ARM::STR_PRE_IMM:
case ARM::STR_PRE_REG:
+ case ARM::t2STR_PRE:
assert(MI->getOperand(2).getReg() == ARM::SP &&
"Only stack pointer as a source reg is supported");
RegList.push_back(SrcReg);
@@ -1112,14 +1140,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
switch (Opc) {
default:
MI->dump();
- assert(0 && "Unsupported opcode for unwinding information");
+ llvm_unreachable("Unsupported opcode for unwinding information");
case ARM::MOVr:
+ case ARM::tMOVr:
Offset = 0;
break;
case ARM::ADDri:
Offset = -MI->getOperand(2).getImm();
break;
case ARM::SUBri:
+ case ARM::t2SUBri:
Offset = MI->getOperand(2).getImm();
break;
case ARM::tSUBspi:
@@ -1157,16 +1187,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
OutStreamer.EmitPad(Offset);
} else {
MI->dump();
- assert(0 && "Unsupported opcode for unwinding information");
+ llvm_unreachable("Unsupported opcode for unwinding information");
}
} else if (DstReg == ARM::SP) {
// FIXME: .movsp goes here
MI->dump();
- assert(0 && "Unsupported opcode for unwinding information");
+ llvm_unreachable("Unsupported opcode for unwinding information");
}
else {
MI->dump();
- assert(0 && "Unsupported opcode for unwinding information");
+ llvm_unreachable("Unsupported opcode for unwinding information");
}
}
}
@@ -1195,7 +1225,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Check for manual lowerings.
unsigned Opc = MI->getOpcode();
switch (Opc) {
- case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass");
+ case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass");
case ARM::DBG_VALUE: {
if (isVerbose() && OutStreamer.hasRawTextSupport()) {
SmallString<128> TmpStr;
@@ -1237,7 +1267,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
// Darwin call instructions are just normal call instructions with different
// clobber semantics (they clobber R9).
- case ARM::BXr9_CALL:
case ARM::BX_CALL: {
{
MCInst TmpInst;
@@ -1259,7 +1288,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
return;
}
- case ARM::tBXr9_CALL:
case ARM::tBX_CALL: {
{
MCInst TmpInst;
@@ -1282,7 +1310,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
return;
}
- case ARM::BMOVPCRXr9_CALL:
case ARM::BMOVPCRX_CALL: {
{
MCInst TmpInst;
@@ -1310,6 +1337,58 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
return;
}
+ case ARM::BMOVPCB_CALL: {
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::Bcc);
+ const GlobalValue *GV = MI->getOperand(0).getGlobal();
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::t2BMOVPCB_CALL: {
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2B);
+ const GlobalValue *GV = MI->getOperand(0).getGlobal();
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
case ARM::MOVi16_ga_pcrel:
case ARM::t2MOVi16_ga_pcrel: {
MCInst TmpInst;
@@ -1482,11 +1561,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
/// in the function. The first operand is the ID# for this instruction, the
/// second is the index into the MachineConstantPool that this is, the third
/// is the size in bytes of this constant pool entry.
+ /// The required alignment is specified on the basic block holding this MI.
unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
- EmitAlignment(2);
-
// Mark the constant pool entry as data if we're not already in a data
// region.
OutStreamer.EmitDataRegion();
@@ -1898,10 +1976,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
{
MCInst TmpInst;
- TmpInst.setOpcode(ARM::tLDRr);
+ TmpInst.setOpcode(ARM::tLDRi);
TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
TmpInst.addOperand(MCOperand::CreateReg(0));
@@ -1935,4 +2013,3 @@ extern "C" void LLVMInitializeARMAsmPrinter() {
RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
}
-
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 7741fc4b34e8..af3f75a0e892 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file ------------===//
+//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -73,6 +73,7 @@ public:
virtual void EmitFunctionEntryLabel();
void EmitStartOfAsmFile(Module &M);
void EmitEndOfAsmFile(Module &M);
+ void EmitXXStructor(const Constant *CV);
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
@@ -106,7 +107,7 @@ public:
if (!Subtarget->isTargetDarwin())
return 0;
return Subtarget->isThumb() ?
- llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+ ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
}
MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 408edfc20d4c..c6280f819a4f 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===//
+//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,10 +13,10 @@
#include "ARMBaseInstrInfo.h"
#include "ARM.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
-#include "ARMRegisterInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/BranchProbability.h"
@@ -47,7 +46,7 @@ EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
static cl::opt<bool>
-WidenVMOVS("widen-vmovs", cl::Hidden,
+WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
cl::desc("Widen ARM vmovs to vmovd when possible"));
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
@@ -147,7 +146,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- bool isLoad = !MCID.mayStore();
+ bool isLoad = !MI->mayStore();
const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
const MachineOperand &Base = MI->getOperand(2);
const MachineOperand &Offset = MI->getOperand(NumOps-3);
@@ -157,9 +156,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned OffImm = MI->getOperand(NumOps-2).getImm();
ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
switch (AddrMode) {
- default:
- assert(false && "Unknown indexed op!");
- return NULL;
+ default: llvm_unreachable("Unknown indexed op!");
case ARMII::AddrMode2: {
bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
unsigned Amt = ARM_AM::getAM2Offset(OffImm);
@@ -440,6 +437,22 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
+bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
+ if (MI->isBundle()) {
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ int PIdx = I->findFirstPredOperandIdx();
+ if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
+ return true;
+ }
+ return false;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+}
+
bool ARMBaseInstrInfo::
PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
@@ -490,15 +503,11 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
- // FIXME: This confuses implicit_def with optional CPSR def.
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef())
- return false;
-
bool Found = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+ if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
+ (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
Pred.push_back(MO);
Found = true;
}
@@ -511,11 +520,10 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
/// By default, this returns true for every instruction with a
/// PredicateOperand.
bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return false;
- if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+ if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
ARMFunctionInfo *AFI =
MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
return AFI->isThumb2Function();
@@ -544,83 +552,95 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
if (MCID.getSize())
return MCID.getSize();
- // If this machine instr is an inline asm, measure it.
- if (MI->getOpcode() == ARM::INLINEASM)
- return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
- if (MI->isLabel())
- return 0;
+ // If this machine instr is an inline asm, measure it.
+ if (MI->getOpcode() == ARM::INLINEASM)
+ return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+ if (MI->isLabel())
+ return 0;
unsigned Opc = MI->getOpcode();
- switch (Opc) {
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case TargetOpcode::PROLOG_LABEL:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::DBG_VALUE:
- return 0;
- case ARM::MOVi16_ga_pcrel:
- case ARM::MOVTi16_ga_pcrel:
- case ARM::t2MOVi16_ga_pcrel:
- case ARM::t2MOVTi16_ga_pcrel:
- return 4;
- case ARM::MOVi32imm:
- case ARM::t2MOVi32imm:
- return 8;
- case ARM::CONSTPOOL_ENTRY:
- // If this machine instr is a constant pool entry, its size is recorded as
- // operand #2.
- return MI->getOperand(2).getImm();
- case ARM::Int_eh_sjlj_longjmp:
- return 16;
- case ARM::tInt_eh_sjlj_longjmp:
- return 10;
- case ARM::Int_eh_sjlj_setjmp:
- case ARM::Int_eh_sjlj_setjmp_nofp:
- return 20;
- case ARM::tInt_eh_sjlj_setjmp:
- case ARM::t2Int_eh_sjlj_setjmp:
- case ARM::t2Int_eh_sjlj_setjmp_nofp:
- return 12;
- case ARM::BR_JTr:
- case ARM::BR_JTm:
- case ARM::BR_JTadd:
- case ARM::tBR_JTr:
- case ARM::t2BR_JT:
- case ARM::t2TBB_JT:
- case ARM::t2TBH_JT: {
- // These are jumptable branches, i.e. a branch followed by an inlined
- // jumptable. The size is 4 + 4 * number of entries. For TBB, each
- // entry is one byte; TBH two byte each.
- unsigned EntrySize = (Opc == ARM::t2TBB_JT)
- ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
- unsigned NumOps = MCID.getNumOperands();
- MachineOperand JTOP =
- MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2));
- unsigned JTI = JTOP.getIndex();
- const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- assert(MJTI != 0);
- const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
- assert(JTI < JT.size());
- // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
- // 4 aligned. The assembler / linker may add 2 byte padding just before
- // the JT entries. The size does not include this padding; the
- // constant islands pass does separate bookkeeping for it.
- // FIXME: If we know the size of the function is less than (1 << 16) *2
- // bytes, we can use 16-bit entries instead. Then there won't be an
- // alignment issue.
- unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
- unsigned NumEntries = getNumJTEntries(JT, JTI);
- if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
- // Make sure the instruction that follows TBB is 2-byte aligned.
- // FIXME: Constant island pass should insert an "ALIGN" instruction
- // instead.
- ++NumEntries;
- return NumEntries * EntrySize + InstSize;
- }
- default:
- // Otherwise, pseudo-instruction sizes are zero.
- return 0;
- }
- return 0; // Not reached
+ switch (Opc) {
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::DBG_VALUE:
+ return 0;
+ case TargetOpcode::BUNDLE:
+ return getInstBundleLength(MI);
+ case ARM::MOVi16_ga_pcrel:
+ case ARM::MOVTi16_ga_pcrel:
+ case ARM::t2MOVi16_ga_pcrel:
+ case ARM::t2MOVTi16_ga_pcrel:
+ return 4;
+ case ARM::MOVi32imm:
+ case ARM::t2MOVi32imm:
+ return 8;
+ case ARM::CONSTPOOL_ENTRY:
+ // If this machine instr is a constant pool entry, its size is recorded as
+ // operand #2.
+ return MI->getOperand(2).getImm();
+ case ARM::Int_eh_sjlj_longjmp:
+ return 16;
+ case ARM::tInt_eh_sjlj_longjmp:
+ return 10;
+ case ARM::Int_eh_sjlj_setjmp:
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ return 20;
+ case ARM::tInt_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ return 12;
+ case ARM::BR_JTr:
+ case ARM::BR_JTm:
+ case ARM::BR_JTadd:
+ case ARM::tBR_JTr:
+ case ARM::t2BR_JT:
+ case ARM::t2TBB_JT:
+ case ARM::t2TBH_JT: {
+ // These are jumptable branches, i.e. a branch followed by an inlined
+ // jumptable. The size is 4 + 4 * number of entries. For TBB, each
+ // entry is one byte; TBH two byte each.
+ unsigned EntrySize = (Opc == ARM::t2TBB_JT)
+ ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
+ unsigned NumOps = MCID.getNumOperands();
+ MachineOperand JTOP =
+ MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
+ unsigned JTI = JTOP.getIndex();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ assert(MJTI != 0);
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ assert(JTI < JT.size());
+ // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+ // 4 aligned. The assembler / linker may add 2 byte padding just before
+ // the JT entries. The size does not include this padding; the
+ // constant islands pass does separate bookkeeping for it.
+ // FIXME: If we know the size of the function is less than (1 << 16) *2
+ // bytes, we can use 16-bit entries instead. Then there won't be an
+ // alignment issue.
+ unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
+ unsigned NumEntries = getNumJTEntries(JT, JTI);
+ if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
+ // Make sure the instruction that follows TBB is 2-byte aligned.
+ // FIXME: Constant island pass should insert an "ALIGN" instruction
+ // instead.
+ ++NumEntries;
+ return NumEntries * EntrySize + InstSize;
+ }
+ default:
+ // Otherwise, pseudo-instruction sizes are zero.
+ return 0;
+ }
+}
+
+unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
+ unsigned Size = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ assert(!I->isBundle() && "No nested bundle!");
+ Size += GetInstSizeInBytes(&*I);
+ }
+ return Size;
}
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -660,29 +680,51 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place.
- if (ARM::QQPRRegClass.contains(DestReg, SrcReg) ||
- ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
+ // Handle register classes that require multiple instructions.
+ unsigned BeginIdx = 0;
+ unsigned SubRegs = 0;
+ unsigned Spacing = 1;
+
+ // Use VORRq when possible.
+ if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2;
+ else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4;
+ // Fall back to VMOVD.
+ else if (ARM::DPairRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2;
+ else if (ARM::DTripleRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3;
+ else if (ARM::DQuadRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4;
+
+ else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2;
+ else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2;
+ else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2;
+
+ if (Opc) {
const TargetRegisterInfo *TRI = &getRegisterInfo();
- assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum.");
- unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ?
- ARM::qsub_1 : ARM::qsub_3;
- for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) {
- unsigned Dst = TRI->getSubReg(DestReg, i);
- unsigned Src = TRI->getSubReg(SrcReg, i);
- MachineInstrBuilder Mov =
- AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VORRq))
- .addReg(Dst, RegState::Define)
- .addReg(Src, getKillRegState(KillSrc))
- .addReg(Src, getKillRegState(KillSrc)));
- if (i == EndSubReg) {
- Mov->addRegisterDefined(DestReg, TRI);
- if (KillSrc)
- Mov->addRegisterKilled(SrcReg, TRI);
- }
+ MachineInstrBuilder Mov;
+ for (unsigned i = 0; i != SubRegs; ++i) {
+ unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
+ unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing);
+ assert(Dst && Src && "Bad sub-register");
+ Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
+ .addReg(Src));
+ // VORR takes two source operands.
+ if (Opc == ARM::VORRq)
+ Mov.addReg(Src);
}
+ // Add implicit super-register defs and kills to the last instruction.
+ Mov->addRegisterDefined(DestReg, TRI);
+ if (KillSrc)
+ Mov->addRegisterKilled(SrcReg, TRI);
return;
}
+
llvm_unreachable("Impossible reg-to-reg copy");
}
@@ -710,8 +752,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(
- PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
Align);
@@ -738,9 +779,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Unknown reg class!");
break;
case 16:
- if (ARM::QPRRegClass.hasSubClassEq(RC)) {
- if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
+ if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+ // Use aligned spills if the stack can be realigned.
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
.addFrameIndex(FI).addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO));
@@ -825,7 +867,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return MI->getOperand(0).getReg();
}
break;
- case ARM::VST1q64Pseudo:
+ case ARM::VST1q64:
if (MI->getOperand(0).isFI() &&
MI->getOperand(2).getSubReg() == 0) {
FrameIndex = MI->getOperand(0).getIndex();
@@ -847,7 +889,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
const MachineMemOperand *Dummy;
- return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+ return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
}
void ARMBaseInstrInfo::
@@ -862,7 +904,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
Align);
@@ -887,9 +929,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Unknown reg class!");
break;
case 16:
- if (ARM::QPRRegClass.hasSubClassEq(RC)) {
- if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
+ if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
.addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
} else {
@@ -911,11 +953,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI))
.addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
}
} else
llvm_unreachable("Unknown reg class!");
@@ -926,15 +969,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI))
.addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
- MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -971,7 +1015,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
return MI->getOperand(0).getReg();
}
break;
- case ARM::VLD1q64Pseudo:
+ case ARM::VLD1q64:
if (MI->getOperand(1).isFI() &&
MI->getOperand(0).getSubReg() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
@@ -993,7 +1037,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
const MachineMemOperand *Dummy;
- return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
@@ -1359,7 +1403,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
return false;
// Terminators and labels can't be scheduled around.
- if (MI->getDesc().isTerminator() || MI->isLabel())
+ if (MI->isTerminator() || MI->isLabel())
return true;
// Treat the start of the IT block as a scheduling boundary, but schedule
@@ -1380,7 +1424,10 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
// saves compile time, because it doesn't require every single
// stack slot reference to depend on the instruction that does the
// modification.
- if (MI->definesRegister(ARM::SP))
+ // Calls don't actually change the stack pointer, even if they have imp-defs.
+ // No ARM calling conventions change the stack pointer. (X86 calling
+ // conventions sometimes do).
+ if (!MI->isCall() && MI->definesRegister(ARM::SP))
return true;
return false;
@@ -1445,15 +1492,37 @@ llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
int llvm::getMatchingCondBranchOpcode(int Opc) {
if (Opc == ARM::B)
return ARM::Bcc;
- else if (Opc == ARM::tB)
+ if (Opc == ARM::tB)
return ARM::tBcc;
- else if (Opc == ARM::t2B)
- return ARM::t2Bcc;
+ if (Opc == ARM::t2B)
+ return ARM::t2Bcc;
llvm_unreachable("Unknown unconditional branch opcode!");
- return 0;
}
+/// commuteInstruction - Handle commutable instructions.
+MachineInstr *
+ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+ switch (MI->getOpcode()) {
+ case ARM::MOVCCr:
+ case ARM::t2MOVCCr: {
+ // MOVCC can be commuted by inverting the condition.
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
+ // MOVCC AL can't be inverted. Shouldn't happen.
+ if (CC == ARMCC::AL || PredReg != ARM::CPSR)
+ return NULL;
+ MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ if (!MI)
+ return NULL;
+ // After swapping the MOVCC operands, also invert the condition.
+ MI->getOperand(MI->findFirstPredOperandIdx())
+ .setImm(ARMCC::getOppositeCondition(CC));
+ return MI;
+ }
+ }
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+}
/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
/// instruction is encoded with an 'S' bit is determined by the optional CPSR
@@ -1478,7 +1547,6 @@ static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::SUBSrsr, ARM::SUBrsr},
{ARM::RSBSri, ARM::RSBri},
- {ARM::RSBSrr, ARM::RSBrr},
{ARM::RSBSrsi, ARM::RSBrsi},
{ARM::RSBSrsr, ARM::RSBrsr},
@@ -1625,7 +1693,6 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
}
default:
llvm_unreachable("Unsupported addressing mode!");
- break;
}
Offset += InstrOffs * Scale;
@@ -1765,8 +1832,7 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change.
- MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
- B = MI->getParent()->begin();
+ MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin();
// Early exit if CmpInstr is at the beginning of the BB.
if (I == B) return false;
@@ -1777,6 +1843,8 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
const MachineOperand &MO = Instr.getOperand(IO);
+ if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR))
+ return false;
if (!MO.isReg()) continue;
// This instruction modifies or uses CPSR after the one we want to
@@ -1838,6 +1906,10 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
for (unsigned IO = 0, EO = Instr.getNumOperands();
!isSafe && IO != EO; ++IO) {
const MachineOperand &MO = Instr.getOperand(IO);
+ if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
+ isSafe = true;
+ break;
+ }
if (!MO.isReg() || MO.getReg() != ARM::CPSR)
continue;
if (MO.isDef()) {
@@ -1889,6 +1961,25 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
if (!MRI->hasOneNonDBGUse(Reg))
return false;
+ const MCInstrDesc &DefMCID = DefMI->getDesc();
+ if (DefMCID.hasOptionalDef()) {
+ unsigned NumOps = DefMCID.getNumOperands();
+ const MachineOperand &MO = DefMI->getOperand(NumOps-1);
+ if (MO.getReg() == ARM::CPSR && !MO.isDead())
+ // If DefMI defines CPSR and it is not dead, it's obviously not safe
+ // to delete DefMI.
+ return false;
+ }
+
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+ if (UseMCID.hasOptionalDef()) {
+ unsigned NumOps = UseMCID.getNumOperands();
+ if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR)
+ // If the instruction sets the flag, do not attempt this optimization
+ // since it may change the semantics of the code.
+ return false;
+ }
+
unsigned UseOpc = UseMI->getOpcode();
unsigned NewUseOpc = 0;
uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
@@ -1960,7 +2051,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
bool isKill = UseMI->getOperand(OpIdx).isKill();
unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
- *UseMI, UseMI->getDebugLoc(),
+ UseMI, UseMI->getDebugLoc(),
get(NewUseOpc), NewReg)
.addReg(Reg1, getKillRegState(isKill))
.addImm(SOImmValV1)));
@@ -1988,7 +2079,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
switch (Opc) {
default:
llvm_unreachable("Unexpected multi-uops instruction!");
- break;
case ARM::VLDMQIA:
case ARM::VSTMQIA:
return 2;
@@ -2335,6 +2425,59 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return UseCycle;
}
+static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
+ const MachineInstr *MI, unsigned Reg,
+ unsigned &DefIdx, unsigned &Dist) {
+ Dist = 0;
+
+ MachineBasicBlock::const_iterator I = MI; ++I;
+ MachineBasicBlock::const_instr_iterator II =
+ llvm::prior(I.getInstrIterator());
+ assert(II->isInsideBundle() && "Empty bundle?");
+
+ int Idx = -1;
+ while (II->isInsideBundle()) {
+ Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ if (Idx != -1)
+ break;
+ --II;
+ ++Dist;
+ }
+
+ assert(Idx != -1 && "Cannot find bundled definition!");
+ DefIdx = Idx;
+ return II;
+}
+
+static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
+ const MachineInstr *MI, unsigned Reg,
+ unsigned &UseIdx, unsigned &Dist) {
+ Dist = 0;
+
+ MachineBasicBlock::const_instr_iterator II = MI; ++II;
+ assert(II->isInsideBundle() && "Empty bundle?");
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+
+ // FIXME: This doesn't properly handle multiple uses.
+ int Idx = -1;
+ while (II != E && II->isInsideBundle()) {
+ Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
+ if (Idx != -1)
+ break;
+ if (II->getOpcode() != ARM::t2IT)
+ ++Dist;
+ ++II;
+ }
+
+ if (Idx == -1) {
+ Dist = 0;
+ return 0;
+ }
+
+ UseIdx = Idx;
+ return II;
+}
+
int
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
@@ -2343,35 +2486,77 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
DefMI->isRegSequence() || DefMI->isImplicitDef())
return 1;
- const MCInstrDesc &DefMCID = DefMI->getDesc();
if (!ItinData || ItinData->isEmpty())
- return DefMCID.mayLoad() ? 3 : 1;
+ return DefMI->mayLoad() ? 3 : 1;
- const MCInstrDesc &UseMCID = UseMI->getDesc();
+ const MCInstrDesc *DefMCID = &DefMI->getDesc();
+ const MCInstrDesc *UseMCID = &UseMI->getDesc();
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
- if (DefMO.getReg() == ARM::CPSR) {
+ unsigned Reg = DefMO.getReg();
+ if (Reg == ARM::CPSR) {
if (DefMI->getOpcode() == ARM::FMSTAT) {
// fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
return Subtarget.isCortexA9() ? 1 : 20;
}
// CPSR set and branch can be paired in the same cycle.
- if (UseMCID.isBranch())
+ if (UseMI->isBranch())
return 0;
+
+ // Otherwise it takes the instruction latency (generally one).
+ int Latency = getInstrLatency(ItinData, DefMI);
+
+ // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
+ // its uses. Instructions which are otherwise scheduled between them may
+ // incur a code size penalty (not able to use the CPSR setting 16-bit
+ // instructions).
+ if (Latency > 0 && Subtarget.isThumb2()) {
+ const MachineFunction *MF = DefMI->getParent()->getParent();
+ if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ --Latency;
+ }
+ return Latency;
}
unsigned DefAlign = DefMI->hasOneMemOperand()
? (*DefMI->memoperands_begin())->getAlignment() : 0;
unsigned UseAlign = UseMI->hasOneMemOperand()
? (*UseMI->memoperands_begin())->getAlignment() : 0;
- int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
- UseMCID, UseIdx, UseAlign);
+
+ unsigned DefAdj = 0;
+ if (DefMI->isBundle()) {
+ DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef())
+ return 1;
+ DefMCID = &DefMI->getDesc();
+ }
+ unsigned UseAdj = 0;
+ if (UseMI->isBundle()) {
+ unsigned NewUseIdx;
+ const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
+ Reg, NewUseIdx, UseAdj);
+ if (NewUseMI) {
+ UseMI = NewUseMI;
+ UseIdx = NewUseIdx;
+ UseMCID = &UseMI->getDesc();
+ }
+ }
+
+ int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
+ *UseMCID, UseIdx, UseAlign);
+ int Adj = DefAdj + UseAdj;
+ if (Adj) {
+ Latency -= (int)(DefAdj + UseAdj);
+ if (Latency < 1)
+ return 1;
+ }
if (Latency > 1 &&
(Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
- switch (DefMCID.getOpcode()) {
+ switch (DefMCID->getOpcode()) {
default: break;
case ARM::LDRrs:
case ARM::LDRBrs: {
@@ -2396,28 +2581,38 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
if (DefAlign < 8 && Subtarget.isCortexA9())
- switch (DefMCID.getOpcode()) {
+ switch (DefMCID->getOpcode()) {
default: break;
case ARM::VLD1q8:
case ARM::VLD1q16:
case ARM::VLD1q32:
case ARM::VLD1q64:
- case ARM::VLD1q8_UPD:
- case ARM::VLD1q16_UPD:
- case ARM::VLD1q32_UPD:
- case ARM::VLD1q64_UPD:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
case ARM::VLD2d8:
case ARM::VLD2d16:
case ARM::VLD2d32:
case ARM::VLD2q8:
case ARM::VLD2q16:
case ARM::VLD2q32:
- case ARM::VLD2d8_UPD:
- case ARM::VLD2d16_UPD:
- case ARM::VLD2d32_UPD:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8wb_register:
+ case ARM::VLD2q16wb_register:
+ case ARM::VLD2q32wb_register:
case ARM::VLD3d8:
case ARM::VLD3d16:
case ARM::VLD3d32:
@@ -2425,7 +2620,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD3d8_UPD:
case ARM::VLD3d16_UPD:
case ARM::VLD3d32_UPD:
- case ARM::VLD1d64T_UPD:
+ case ARM::VLD1d64Twb_fixed:
+ case ARM::VLD1d64Twb_register:
case ARM::VLD3q8_UPD:
case ARM::VLD3q16_UPD:
case ARM::VLD3q32_UPD:
@@ -2436,22 +2632,29 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD4d8_UPD:
case ARM::VLD4d16_UPD:
case ARM::VLD4d32_UPD:
- case ARM::VLD1d64Q_UPD:
+ case ARM::VLD1d64Qwb_fixed:
+ case ARM::VLD1d64Qwb_register:
case ARM::VLD4q8_UPD:
case ARM::VLD4q16_UPD:
case ARM::VLD4q32_UPD:
case ARM::VLD1DUPq8:
case ARM::VLD1DUPq16:
case ARM::VLD1DUPq32:
- case ARM::VLD1DUPq8_UPD:
- case ARM::VLD1DUPq16_UPD:
- case ARM::VLD1DUPq32_UPD:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_register:
case ARM::VLD2DUPd8:
case ARM::VLD2DUPd16:
case ARM::VLD2DUPd32:
- case ARM::VLD2DUPd8_UPD:
- case ARM::VLD2DUPd16_UPD:
- case ARM::VLD2DUPd32_UPD:
+ case ARM::VLD2DUPd8wb_fixed:
+ case ARM::VLD2DUPd16wb_fixed:
+ case ARM::VLD2DUPd32wb_fixed:
+ case ARM::VLD2DUPd8wb_register:
+ case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_register:
case ARM::VLD4DUPd8:
case ARM::VLD4DUPd16:
case ARM::VLD4DUPd32:
@@ -2559,26 +2762,36 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
if (DefAlign < 8 && Subtarget.isCortexA9())
switch (DefMCID.getOpcode()) {
default: break;
- case ARM::VLD1q8Pseudo:
- case ARM::VLD1q16Pseudo:
- case ARM::VLD1q32Pseudo:
- case ARM::VLD1q64Pseudo:
- case ARM::VLD1q8Pseudo_UPD:
- case ARM::VLD1q16Pseudo_UPD:
- case ARM::VLD1q32Pseudo_UPD:
- case ARM::VLD1q64Pseudo_UPD:
- case ARM::VLD2d8Pseudo:
- case ARM::VLD2d16Pseudo:
- case ARM::VLD2d32Pseudo:
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD2d8:
+ case ARM::VLD2d16:
+ case ARM::VLD2d32:
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
- case ARM::VLD2d8Pseudo_UPD:
- case ARM::VLD2d16Pseudo_UPD:
- case ARM::VLD2d32Pseudo_UPD:
- case ARM::VLD2q8Pseudo_UPD:
- case ARM::VLD2q16Pseudo_UPD:
- case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8PseudoWB_fixed:
+ case ARM::VLD2q16PseudoWB_fixed:
+ case ARM::VLD2q32PseudoWB_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8PseudoWB_register:
+ case ARM::VLD2q16PseudoWB_register:
+ case ARM::VLD2q32PseudoWB_register:
case ARM::VLD3d8Pseudo:
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:
@@ -2586,7 +2799,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD3d8Pseudo_UPD:
case ARM::VLD3d16Pseudo_UPD:
case ARM::VLD3d32Pseudo_UPD:
- case ARM::VLD1d64TPseudo_UPD:
case ARM::VLD3q8Pseudo_UPD:
case ARM::VLD3q16Pseudo_UPD:
case ARM::VLD3q32Pseudo_UPD:
@@ -2603,7 +2815,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD4d8Pseudo_UPD:
case ARM::VLD4d16Pseudo_UPD:
case ARM::VLD4d32Pseudo_UPD:
- case ARM::VLD1d64QPseudo_UPD:
case ARM::VLD4q8Pseudo_UPD:
case ARM::VLD4q16Pseudo_UPD:
case ARM::VLD4q32Pseudo_UPD:
@@ -2613,18 +2824,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD4q8oddPseudo_UPD:
case ARM::VLD4q16oddPseudo_UPD:
case ARM::VLD4q32oddPseudo_UPD:
- case ARM::VLD1DUPq8Pseudo:
- case ARM::VLD1DUPq16Pseudo:
- case ARM::VLD1DUPq32Pseudo:
- case ARM::VLD1DUPq8Pseudo_UPD:
- case ARM::VLD1DUPq16Pseudo_UPD:
- case ARM::VLD1DUPq32Pseudo_UPD:
- case ARM::VLD2DUPd8Pseudo:
- case ARM::VLD2DUPd16Pseudo:
- case ARM::VLD2DUPd32Pseudo:
- case ARM::VLD2DUPd8Pseudo_UPD:
- case ARM::VLD2DUPd16Pseudo_UPD:
- case ARM::VLD2DUPd32Pseudo_UPD:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16:
+ case ARM::VLD2DUPd32:
+ case ARM::VLD2DUPd8wb_fixed:
+ case ARM::VLD2DUPd16wb_fixed:
+ case ARM::VLD2DUPd32wb_fixed:
+ case ARM::VLD2DUPd8wb_register:
+ case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_register:
case ARM::VLD4DUPd8Pseudo:
case ARM::VLD4DUPd16Pseudo:
case ARM::VLD4DUPd32Pseudo:
@@ -2666,6 +2883,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+unsigned
+ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *DepMI) const {
+ unsigned Reg = DefMI->getOperand(DefIdx).getReg();
+ if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI))
+ return 1;
+
+ // If the second MI is predicated, then there is an implicit use dependency.
+ return getOperandLatency(ItinData, DefMI, DefIdx, DepMI,
+ DepMI->getNumOperands());
+}
+
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -2676,10 +2906,21 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
if (!ItinData || ItinData->isEmpty())
return 1;
+ if (MI->isBundle()) {
+ int Latency = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ if (I->getOpcode() != ARM::t2IT)
+ Latency += getInstrLatency(ItinData, I, PredCost);
+ }
+ return Latency;
+ }
+
const MCInstrDesc &MCID = MI->getDesc();
unsigned Class = MCID.getSchedClass();
unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
- if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR))
+ if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)))
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
*PredCost = 1;
@@ -2828,3 +3069,7 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// This will go before any implicit ops.
AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
}
+
+bool ARMBaseInstrInfo::hasNOP() const {
+ return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 0f9f32179a31..2fe85072a330 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -1,4 +1,4 @@
-//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===//
+//===-- ARMBaseInstrInfo.h - ARM Base Instruction Information ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -35,6 +35,9 @@ protected:
explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
public:
+ // Return whether the target has an explicit NOP encoding.
+ bool hasNOP() const;
+
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
@@ -69,10 +72,7 @@ public:
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
// Predication support.
- bool isPredicated(const MachineInstr *MI) const {
- int PIdx = MI->findFirstPredOperandIdx();
- return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
- }
+ bool isPredicated(const MachineInstr *MI) const;
ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
int PIdx = MI->findFirstPredOperandIdx();
@@ -139,6 +139,8 @@ public:
MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
+ MachineInstr *commuteInstruction(MachineInstr*, bool=false) const;
+
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1,
const MachineRegisterInfo *MRI) const;
@@ -213,12 +215,18 @@ public:
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const;
+ virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *DepMI) const;
+
/// VFP/NEON execution domains.
std::pair<uint16_t, uint16_t>
getExecutionDomain(const MachineInstr *MI) const;
void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
private:
+ unsigned getInstBundleLength(const MachineInstr *MI) const;
+
int getVLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefClass,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 154f1f8ff997..3907f7535260 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMBaseRegisterInfo.cpp - ARM Register Information -------*- C++ -*-===//
+//===-- ARMBaseRegisterInfo.cpp - ARM Register Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,11 +11,10 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMBaseRegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
-#include "ARMBaseRegisterInfo.h"
#include "ARMFrameLowering.h"
-#include "ARMInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
@@ -61,41 +60,14 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
BasePtr(ARM::R6) {
}
-const unsigned*
+const uint16_t*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- bool ghcCall = false;
-
- if (MF) {
- const Function *F = MF->getFunction();
- ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
- }
-
- static const unsigned CalleeSavedRegs[] = {
- ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
- ARM::R7, ARM::R6, ARM::R5, ARM::R4,
-
- ARM::D15, ARM::D14, ARM::D13, ARM::D12,
- ARM::D11, ARM::D10, ARM::D9, ARM::D8,
- 0
- };
-
- static const unsigned DarwinCalleeSavedRegs[] = {
- // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved
- // register.
- ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4,
- ARM::R11, ARM::R10, ARM::R8,
-
- ARM::D15, ARM::D14, ARM::D13, ARM::D12,
- ARM::D11, ARM::D10, ARM::D9, ARM::D8,
- 0
- };
-
- static const unsigned GhcCalleeSavedRegs[] = {
- 0
- };
+ return (STI.isTargetIOS()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+}
- return ghcCall ? GhcCalleeSavedRegs :
- STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
+const uint32_t*
+ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+ return (STI.isTargetIOS()) ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
}
BitVector ARMBaseRegisterInfo::
@@ -148,104 +120,6 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
return false;
}
-const TargetRegisterClass *
-ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B,
- unsigned SubIdx) const {
- switch (SubIdx) {
- default: return 0;
- case ARM::ssub_0:
- case ARM::ssub_1:
- case ARM::ssub_2:
- case ARM::ssub_3: {
- // S sub-registers.
- if (A->getSize() == 8) {
- if (B == &ARM::SPR_8RegClass)
- return &ARM::DPR_8RegClass;
- assert(B == &ARM::SPRRegClass && "Expecting SPR register class!");
- if (A == &ARM::DPR_8RegClass)
- return A;
- return &ARM::DPR_VFP2RegClass;
- }
-
- if (A->getSize() == 16) {
- if (B == &ARM::SPR_8RegClass)
- return &ARM::QPR_8RegClass;
- return &ARM::QPR_VFP2RegClass;
- }
-
- if (A->getSize() == 32) {
- if (B == &ARM::SPR_8RegClass)
- return 0; // Do not allow coalescing!
- return &ARM::QQPR_VFP2RegClass;
- }
-
- assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
- return 0; // Do not allow coalescing!
- }
- case ARM::dsub_0:
- case ARM::dsub_1:
- case ARM::dsub_2:
- case ARM::dsub_3: {
- // D sub-registers.
- if (A->getSize() == 16) {
- if (B == &ARM::DPR_VFP2RegClass)
- return &ARM::QPR_VFP2RegClass;
- if (B == &ARM::DPR_8RegClass)
- return 0; // Do not allow coalescing!
- return A;
- }
-
- if (A->getSize() == 32) {
- if (B == &ARM::DPR_VFP2RegClass)
- return &ARM::QQPR_VFP2RegClass;
- if (B == &ARM::DPR_8RegClass)
- return 0; // Do not allow coalescing!
- return A;
- }
-
- assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
- if (B != &ARM::DPRRegClass)
- return 0; // Do not allow coalescing!
- return A;
- }
- case ARM::dsub_4:
- case ARM::dsub_5:
- case ARM::dsub_6:
- case ARM::dsub_7: {
- // D sub-registers of QQQQ registers.
- if (A->getSize() == 64 && B == &ARM::DPRRegClass)
- return A;
- return 0; // Do not allow coalescing!
- }
-
- case ARM::qsub_0:
- case ARM::qsub_1: {
- // Q sub-registers.
- if (A->getSize() == 32) {
- if (B == &ARM::QPR_VFP2RegClass)
- return &ARM::QQPR_VFP2RegClass;
- if (B == &ARM::QPR_8RegClass)
- return 0; // Do not allow coalescing!
- return A;
- }
-
- assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
- if (B == &ARM::QPRRegClass)
- return A;
- return 0; // Do not allow coalescing!
- }
- case ARM::qsub_2:
- case ARM::qsub_3: {
- // Q sub-registers of QQQQ registers.
- if (A->getSize() == 64 && B == &ARM::QPRRegClass)
- return A;
- return 0; // Do not allow coalescing!
- }
- }
- return 0;
-}
-
bool
ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
SmallVectorImpl<unsigned> &SubIndices,
@@ -416,7 +290,7 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
/// getRawAllocationOrder - Returns the register allocation order for a
/// specified register class with a target-dependent hint.
-ArrayRef<unsigned>
+ArrayRef<uint16_t>
ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
unsigned HintType, unsigned HintReg,
const MachineFunction &MF) const {
@@ -425,71 +299,71 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
// of register pairs.
// No FP, R9 is available.
- static const unsigned GPREven1[] = {
+ static const uint16_t GPREven1[] = {
ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
ARM::R9, ARM::R11
};
- static const unsigned GPROdd1[] = {
+ static const uint16_t GPROdd1[] = {
ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
ARM::R8, ARM::R10
};
// FP is R7, R9 is available.
- static const unsigned GPREven2[] = {
+ static const uint16_t GPREven2[] = {
ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10,
ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
ARM::R9, ARM::R11
};
- static const unsigned GPROdd2[] = {
+ static const uint16_t GPROdd2[] = {
ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11,
ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
ARM::R8, ARM::R10
};
// FP is R11, R9 is available.
- static const unsigned GPREven3[] = {
+ static const uint16_t GPREven3[] = {
ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
ARM::R9
};
- static const unsigned GPROdd3[] = {
+ static const uint16_t GPROdd3[] = {
ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
ARM::R8
};
// No FP, R9 is not available.
- static const unsigned GPREven4[] = {
+ static const uint16_t GPREven4[] = {
ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10,
ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
ARM::R11
};
- static const unsigned GPROdd4[] = {
+ static const uint16_t GPROdd4[] = {
ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11,
ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
ARM::R10
};
// FP is R7, R9 is not available.
- static const unsigned GPREven5[] = {
+ static const uint16_t GPREven5[] = {
ARM::R0, ARM::R2, ARM::R4, ARM::R10,
ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
ARM::R11
};
- static const unsigned GPROdd5[] = {
+ static const uint16_t GPROdd5[] = {
ARM::R1, ARM::R3, ARM::R5, ARM::R11,
ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
ARM::R10
};
// FP is R11, R9 is not available.
- static const unsigned GPREven6[] = {
+ static const uint16_t GPREven6[] = {
ARM::R0, ARM::R2, ARM::R4, ARM::R6,
ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
};
- static const unsigned GPROdd6[] = {
+ static const uint16_t GPROdd6[] = {
ARM::R1, ARM::R3, ARM::R5, ARM::R7,
ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
};
@@ -610,11 +484,15 @@ ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
if (!EnableBasePointer)
return false;
- if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+ // When outgoing call frames are so large that we adjust the stack pointer
+ // around the call, we can no longer use the stack pointer to reach the
+ // emergency spill slot.
+ if (needsStackRealignment(MF) && !TFI->hasReservedCallFrame(MF))
return true;
// Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
@@ -638,14 +516,29 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
}
bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// We can't realign the stack if:
// 1. Dynamic stack realignment is explicitly disabled,
// 2. This is a Thumb1 function (it's not useful, so we don't bother), or
// 3. There are VLAs in the function and the base pointer is disabled.
- return (RealignStack && !AFI->isThumb1OnlyFunction() &&
- (!MFI->hasVarSizedObjects() || EnableBasePointer));
+ if (!MF.getTarget().Options.RealignStack)
+ return false;
+ if (AFI->isThumb1OnlyFunction())
+ return false;
+ // Stack realignment requires a frame pointer. If we already started
+ // register allocation with frame pointer elimination, it is too late now.
+ if (!MRI->canReserveReg(FramePtr))
+ return false;
+ // We may also need a base pointer if there are dynamic allocas or stack
+ // pointer adjustments around calls.
+ if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF))
+ return true;
+ if (!EnableBasePointer)
+ return false;
+ // A base pointer is required and allowed. Check that it isn't too late to
+ // reserve it.
+ return MRI->canReserveReg(BasePtr);
}
bool ARMBaseRegisterInfo::
@@ -653,7 +546,7 @@ needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
- bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) ||
+ bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
F->hasFnAttr(Attribute::StackAlignment));
return requiresRealignment && canRealignStack(MF);
@@ -662,7 +555,7 @@ needsStackRealignment(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
cannotEliminateFrame(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- if (DisableFramePointerElim(MF) && MFI->adjustsStack())
+ if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
return true;
return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
|| needsStackRealignment(MF);
@@ -679,12 +572,10 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
llvm_unreachable("What is the exception register");
- return 0;
}
unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
- return 0;
}
unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
@@ -892,7 +783,7 @@ int64_t ARMBaseRegisterInfo::
getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
const MCInstrDesc &Desc = MI->getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
- int64_t InstrOffs = 0;;
+ int64_t InstrOffs = 0;
int Scale = 1;
unsigned ImmIdx = 0;
switch (AddrMode) {
@@ -933,7 +824,6 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
}
default:
llvm_unreachable("Unsupported addressing mode!");
- break;
}
return InstrOffs * Scale;
@@ -1129,7 +1019,6 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
break;
default:
llvm_unreachable("Unsupported addressing mode!");
- break;
}
Offset += getFrameIndexInstrOffset(MI, i);
@@ -1171,6 +1060,21 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
+ // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the
+ // call frame setup/destroy instructions have already been eliminated. That
+ // means the stack pointer cannot be used to access the emergency spill slot
+ // when !hasReservedCallFrame().
+#ifndef NDEBUG
+ if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+ assert(TFI->hasReservedCallFrame(MF) &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions without a reserved call frame");
+ assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions with variable sized frame objects");
+ }
+#endif // NDEBUG
+
// Special handling of dbg_value instructions.
if (MI.isDebugValue()) {
MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index fee17ff3c1ca..af7935147e48 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- ARMBaseRegisterInfo.h - ARM Register Information Impl ----*- C++ -*-===//
+//===-- ARMBaseRegisterInfo.h - ARM Register Information Impl ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -35,7 +35,7 @@ namespace ARMRI {
/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
/// or a stack/pc register that we should push/pop.
-static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
+static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
using namespace ARM;
switch (Reg) {
case R0: case R1: case R2: case R3:
@@ -43,25 +43,25 @@ static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
case LR: case SP: case PC:
return true;
case R8: case R9: case R10: case R11:
- // For darwin we want r7 and lr to be next to each other.
- return !isDarwin;
+ // For iOS we want r7 and lr to be next to each other.
+ return !isIOS;
default:
return false;
}
}
-static inline bool isARMArea2Register(unsigned Reg, bool isDarwin) {
+static inline bool isARMArea2Register(unsigned Reg, bool isIOS) {
using namespace ARM;
switch (Reg) {
case R8: case R9: case R10: case R11:
- // Darwin has this second area.
- return isDarwin;
+ // iOS has this second area.
+ return isIOS;
default:
return false;
}
}
-static inline bool isARMArea3Register(unsigned Reg, bool isDarwin) {
+static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
using namespace ARM;
switch (Reg) {
case D15: case D14: case D13: case D12:
@@ -94,17 +94,11 @@ protected:
public:
/// Code Generation virtual methods...
- const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const uint32_t *getCallPreservedMask(CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
- /// getMatchingSuperRegClass - Return a subclass of the specified register
- /// class A so that each register in it has a sub-register of the
- /// specified sub-register index which is in the specified register class B.
- virtual const TargetRegisterClass *
- getMatchingSuperRegClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B, unsigned Idx) const;
-
/// canCombineSubRegIndices - Given a register class and a list of
/// subregister indices, return true if it's possible to combine the
/// subregister indices into one that corresponds to a larger
@@ -125,7 +119,7 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const;
- ArrayRef<unsigned> getRawAllocationOrder(const TargetRegisterClass *RC,
+ ArrayRef<uint16_t> getRawAllocationOrder(const TargetRegisterClass *RC,
unsigned HintType, unsigned HintReg,
const MachineFunction &MF) const;
diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h
index 69eddf03ec94..11bd6a4a8dbc 100644
--- a/lib/Target/ARM/ARMBuildAttrs.h
+++ b/lib/Target/ARM/ARMBuildAttrs.h
@@ -1,4 +1,4 @@
-//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===//
+//===-- ARMBuildAttrs.h - ARM Build Attributes ------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index ff7db1ff62ed..0bd1c3ee2feb 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -1,4 +1,4 @@
-//===-- ARMCallingConv.h - ARM Custom Calling Convention Routines ---------===//
+//=== ARMCallingConv.h - ARM Custom Calling Convention Routines -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,13 +15,12 @@
#ifndef ARMCALLINGCONV_H
#define ARMCALLINGCONV_H
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMRegisterInfo.h"
-#include "ARMSubtarget.h"
-#include "ARM.h"
namespace llvm {
@@ -29,7 +28,7 @@ namespace llvm {
static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
CCState &State, bool CanFail) {
- static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+ static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
// Try to get the first register.
if (unsigned Reg = State.AllocateReg(RegList, 4))
@@ -72,9 +71,9 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
CCState &State, bool CanFail) {
- static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
- static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
- static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
+ static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
+ static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
+ static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
if (Reg == 0) {
@@ -118,8 +117,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo, CCState &State) {
- static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
- static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+ static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
+ static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
if (Reg == 0)
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 47b2e9829834..d33364bb2871 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -1,4 +1,4 @@
-//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===//
+//===-- ARMCallingConv.td - Calling Conventions for ARM ----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -25,7 +25,7 @@ def CC_ARM_APCS : CallingConv<[
// Handles byval parameters.
CCIfByVal<CCPassByVal<4, 4>>,
- CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
@@ -43,6 +43,7 @@ def CC_ARM_APCS : CallingConv<[
]>;
def RetCC_ARM_APCS : CallingConv<[
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[f32], CCBitConvertToType<i32>>,
// Handle all vector types as either f64 or v2f64.
@@ -82,25 +83,6 @@ def RetFastCC_ARM_APCS : CallingConv<[
CCDelegateTo<RetCC_ARM_APCS>
]>;
-//===----------------------------------------------------------------------===//
-// ARM APCS Calling Convention for GHC
-//===----------------------------------------------------------------------===//
-
-def CC_ARM_APCS_GHC : CallingConv<[
- // Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
-
- CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
- CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
- CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>,
-
- // Promote i8/i16 arguments to i32.
- CCIfType<[i8, i16], CCPromoteToType<i32>>,
-
- // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim
- CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>>
-]>;
//===----------------------------------------------------------------------===//
// ARM AAPCS (EABI) Calling Convention, common parts
@@ -108,7 +90,7 @@ def CC_ARM_APCS_GHC : CallingConv<[
def CC_ARM_AAPCS_Common : CallingConv<[
- CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
// i64/f64 is passed in even pairs of GPRs
// i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
@@ -125,6 +107,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
]>;
def RetCC_ARM_AAPCS_Common : CallingConv<[
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
]>;
@@ -181,3 +164,14 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
S9, S10, S11, S12, S13, S14, S15]>>,
CCDelegateTo<RetCC_ARM_AAPCS_Common>
]>;
+
+//===----------------------------------------------------------------------===//
+// Callee-saved register lists.
+//===----------------------------------------------------------------------===//
+
+def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
+ (sequence "D%u", 15, 8))>;
+
+// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
+// Also save R7-R4 first to match the stack frame fixed spill areas.
+def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 4148d4ab10e9..32ef345c058f 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -15,7 +15,7 @@
#define DEBUG_TYPE "jit"
#include "ARM.h"
#include "ARMConstantPoolValue.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMRelocations.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
@@ -46,7 +46,7 @@ namespace {
class ARMCodeEmitter : public MachineFunctionPass {
ARMJITInfo *JTI;
- const ARMInstrInfo *II;
+ const ARMBaseInstrInfo *II;
const TargetData *TD;
const ARMSubtarget *Subtarget;
TargetMachine &TM;
@@ -66,7 +66,7 @@ namespace {
public:
ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
: MachineFunctionPass(ID), JTI(0),
- II((const ARMInstrInfo *)tm.getInstrInfo()),
+ II((const ARMBaseInstrInfo *)tm.getInstrInfo()),
TD(tm.getTargetData()), TM(tm),
MCE(mce), MCPEs(0), MJTEs(0),
IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
@@ -74,7 +74,7 @@ namespace {
/// getBinaryCodeForInstr - This function, generated by the
/// CodeEmitterGenerator using TableGen, produces the binary encoding for
/// machine instructions.
- unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
bool runOnMachineFunction(MachineFunction &MF);
@@ -189,6 +189,8 @@ namespace {
unsigned Op) const { return 0; }
unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
+ unsigned getARMBLTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
@@ -366,9 +368,9 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
MF.getTarget().getRelocationModel() != Reloc::Static) &&
"JIT relocation model must be set to static or default!");
- JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo();
- II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo();
- TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData();
+ JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo();
+ II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo();
+ TD = MF.getTarget().getTargetData();
Subtarget = &TM.getSubtarget<ARMSubtarget>();
MCPEs = &MF.getConstantPool()->getConstants();
MJTEs = 0;
@@ -386,7 +388,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I)
emitInstruction(*I);
}
@@ -406,7 +408,6 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
case ARM_AM::ror:
case ARM_AM::rrx: return 3;
}
- return 0;
}
/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
@@ -532,7 +533,6 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
switch (MI.getDesc().TSFlags & ARMII::FormMask) {
default: {
llvm_unreachable("Unhandled instruction encoding format!");
- break;
}
case ARMII::MiscFrm:
if (MI.getOpcode() == ARM::LEApcrelJT) {
@@ -541,7 +541,6 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
break;
}
llvm_unreachable("Unhandled instruction encoding!");
- break;
case ARMII::Pseudo:
emitPseudoInstruction(MI);
break;
@@ -837,9 +836,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
default:
llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
case ARM::BX_CALL:
- case ARM::BMOVPCRX_CALL:
- case ARM::BXr9_CALL:
- case ARM::BMOVPCRXr9_CALL: {
+ case ARM::BMOVPCRX_CALL: {
// First emit mov lr, pc
unsigned Binary = 0x01a0e00f;
Binary |= II->getPredicate(&MI) << ARMII::CondShift;
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 3e3a4134c704..fc35c7cb0200 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -16,16 +16,17 @@
#define DEBUG_TYPE "arm-cp-islands"
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
-#include "ARMInstrInfo.h"
#include "Thumb2InstrInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -51,6 +52,44 @@ static cl::opt<bool>
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
cl::desc("Adjust basic block layout to better use TB[BH]"));
+// FIXME: This option should be removed once it has received sufficient testing.
+static cl::opt<bool>
+AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true),
+ cl::desc("Align constant islands in code"));
+
+/// UnknownPadding - Return the worst case padding that could result from
+/// unknown offset bits. This does not include alignment padding caused by
+/// known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+ if (KnownBits < LogAlign)
+ return (1u << LogAlign) - (1u << KnownBits);
+ return 0;
+}
+
+/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact,
+/// add padding such that:
+///
+/// 1. The result is aligned to 1 << LogAlign.
+///
+/// 2. No other value of the unknown bits would require more padding.
+///
+/// This may add more padding than is required to satisfy just one of the
+/// constraints. It is necessary to compute alignment this way to guarantee
+/// that we don't underestimate the padding before an aligned block. If the
+/// real padding before a block is larger than we think, constant pool entries
+/// may go out of range.
+static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign,
+ unsigned KnownBits) {
+ // Add the worst possible padding that the unknown bits could cause.
+ Offset += UnknownPadding(LogAlign, KnownBits);
+
+ // Then align the result.
+ return RoundUpToAlignment(Offset, 1u << LogAlign);
+}
+
namespace {
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
/// requires constant pool entries to be scattered among the instructions
@@ -64,16 +103,70 @@ namespace {
/// CPE - A constant pool entry that has been placed somewhere, which
/// tracks a list of users.
class ARMConstantIslands : public MachineFunctionPass {
- /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
- /// by MBB Number. The two-byte pads required for Thumb alignment are
- /// counted as part of the following block (i.e., the offset and size for
- /// a padded block will both be ==2 mod 4).
- std::vector<unsigned> BBSizes;
+ /// BasicBlockInfo - Information about the offset and size of a single
+ /// basic block.
+ struct BasicBlockInfo {
+ /// Offset - Distance from the beginning of the function to the beginning
+ /// of this basic block.
+ ///
+ /// The offset is always aligned as required by the basic block.
+ unsigned Offset;
+
+ /// Size - Size of the basic block in bytes. If the block contains
+ /// inline assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size;
+
+ /// KnownBits - The number of low bits in Offset that are known to be
+ /// exact. The remaining bits of Offset are an upper bound.
+ uint8_t KnownBits;
+
+ /// Unalign - When non-zero, the block contains instructions (inline asm)
+ /// of unknown size. The real size may be smaller than Size bytes by a
+ /// multiple of 1 << Unalign.
+ uint8_t Unalign;
+
+ /// PostAlign - When non-zero, the block terminator contains a .align
+ /// directive, so the end of the block is aligned to 1 << PostAlign
+ /// bytes.
+ uint8_t PostAlign;
+
+ BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
+ PostAlign(0) {}
+
+ /// Compute the number of known offset bits internally to this block.
+ /// This number should be used to predict worst case padding when
+ /// splitting the block.
+ unsigned internalKnownBits() const {
+ return Unalign ? Unalign : KnownBits;
+ }
+
+ /// Compute the offset immediately following this block. If LogAlign is
+ /// specified, return the offset the successor block will get if it has
+ /// this alignment.
+ unsigned postOffset(unsigned LogAlign = 0) const {
+ unsigned PO = Offset + Size;
+ unsigned LA = std::max(unsigned(PostAlign), LogAlign);
+ if (!LA)
+ return PO;
+ // Add alignment padding from the terminator.
+ return WorstCaseAlign(PO, LA, internalKnownBits());
+ }
+
+ /// Compute the number of known low bits of postOffset. If this block
+ /// contains inline asm, the number of known bits drops to the
+ /// instruction alignment. An aligned terminator may increase the number
+ /// of know bits.
+ /// If LogAlign is given, also consider the alignment of the next block.
+ unsigned postKnownBits(unsigned LogAlign = 0) const {
+ return std::max(std::max(unsigned(PostAlign), LogAlign),
+ internalKnownBits());
+ }
+ };
- /// BBOffsets - the offset of each MBB in bytes, starting from 0.
- /// The two-byte pads required for Thumb alignment are counted as part of
- /// the following block.
- std::vector<unsigned> BBOffsets;
+ std::vector<BasicBlockInfo> BBInfo;
/// WaterList - A sorted list of basic blocks where islands could be placed
/// (i.e. blocks that don't fall through to the following block, due
@@ -102,14 +195,24 @@ namespace {
MachineInstr *MI;
MachineInstr *CPEMI;
MachineBasicBlock *HighWaterMark;
+ private:
unsigned MaxDisp;
+ public:
bool NegOk;
bool IsSoImm;
+ bool KnownAlignment;
CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
bool neg, bool soimm)
- : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm),
+ KnownAlignment(false) {
HighWaterMark = CPEMI->getParent();
}
+ /// getMaxDisp - Returns the maximum displacement supported by MI.
+ /// Correct for unknown alignment.
+ /// Conservatively subtract 2 bytes to handle weird alignment effects.
+ unsigned getMaxDisp() const {
+ return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2;
+ }
};
/// CPUsers - Keep track of all of the machine instructions that use various
@@ -162,10 +265,9 @@ namespace {
/// the branch fix up pass.
bool HasFarJump;
- /// HasInlineAsm - True if the function contains inline assembly.
- bool HasInlineAsm;
-
- const ARMInstrInfo *TII;
+ MachineFunction *MF;
+ MachineConstantPool *MCP;
+ const ARMBaseInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
bool isThumb;
@@ -182,85 +284,100 @@ namespace {
}
private:
- void DoInitialPlacement(MachineFunction &MF,
- std::vector<MachineInstr*> &CPEMIs);
+ void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
- void JumpTableFunctionScan(MachineFunction &MF);
- void InitialFunctionScan(MachineFunction &MF,
- const std::vector<MachineInstr*> &CPEMIs);
- MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
- void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
- void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
- bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
- int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
- bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter);
- void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
+ unsigned getCPELogAlign(const MachineInstr *CPEMI);
+ void scanFunctionJumpTables();
+ void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
+ MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
+ void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+ void adjustBBOffsetsAfter(MachineBasicBlock *BB);
+ bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
+ int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
+ bool findAvailableWater(CPUser&U, unsigned UserOffset,
+ water_iterator &WaterIter);
+ void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
MachineBasicBlock *&NewMBB);
- bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
- void RemoveDeadCPEMI(MachineInstr *CPEMI);
- bool RemoveUnusedCPEntries();
- bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
- MachineInstr *CPEMI, unsigned Disp, bool NegOk,
- bool DoDump = false);
- bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
- CPUser &U);
- bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
- unsigned Disp, bool NegativeOK, bool IsSoImm = false);
- bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
- bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br);
- bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br);
- bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br);
- bool UndoLRSpillRestore();
- bool OptimizeThumb2Instructions(MachineFunction &MF);
- bool OptimizeThumb2Branches(MachineFunction &MF);
- bool ReorderThumb2JumpTables(MachineFunction &MF);
- bool OptimizeThumb2JumpTables(MachineFunction &MF);
- MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
+ bool handleConstantPoolUser(unsigned CPUserIndex);
+ void removeDeadCPEMI(MachineInstr *CPEMI);
+ bool removeUnusedCPEntries();
+ bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned Disp, bool NegOk,
+ bool DoDump = false);
+ bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
+ CPUser &U, unsigned &Growth);
+ bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+ bool fixupImmediateBr(ImmBranch &Br);
+ bool fixupConditionalBr(ImmBranch &Br);
+ bool fixupUnconditionalBr(ImmBranch &Br);
+ bool undoLRSpillRestore();
+ bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const;
+ bool optimizeThumb2Instructions();
+ bool optimizeThumb2Branches();
+ bool reorderThumb2JumpTables();
+ bool optimizeThumb2JumpTables();
+ MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
MachineBasicBlock *JTBB);
- unsigned GetOffsetOf(MachineInstr *MI) const;
+ void computeBlockSize(MachineBasicBlock *MBB);
+ unsigned getOffsetOf(MachineInstr *MI) const;
+ unsigned getUserOffset(CPUser&) const;
void dumpBBs();
- void verify(MachineFunction &MF);
+ void verify();
+
+ bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+ bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U) {
+ return isOffsetInRange(UserOffset, TrialOffset,
+ U.getMaxDisp(), U.NegOk, U.IsSoImm);
+ }
};
char ARMConstantIslands::ID = 0;
}
/// verify - check BBOffsets, BBSizes, alignment of islands
-void ARMConstantIslands::verify(MachineFunction &MF) {
- assert(BBOffsets.size() == BBSizes.size());
- for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
- assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
- if (!isThumb)
- return;
+void ARMConstantIslands::verify() {
#ifndef NDEBUG
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock *MBB = MBBI;
- if (!MBB->empty() &&
- MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
- unsigned MBBId = MBB->getNumber();
- assert(HasInlineAsm ||
- (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
- (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
- }
+ unsigned Align = MBB->getAlignment();
+ unsigned MBBId = MBB->getNumber();
+ assert(BBInfo[MBBId].Offset % (1u << Align) == 0);
+ assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
}
+ DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
CPUser &U = CPUsers[i];
- unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8);
- unsigned CPEOffset = GetOffsetOf(U.CPEMI);
- unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset :
- UserOffset - CPEOffset;
- assert(Disp <= U.MaxDisp || "Constant pool entry out of range!");
+ unsigned UserOffset = getUserOffset(U);
+ // Verify offset using the real max displacement without the safety
+ // adjustment.
+ if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk,
+ /* DoDump = */ true)) {
+ DEBUG(dbgs() << "OK\n");
+ continue;
+ }
+ DEBUG(dbgs() << "Out of range.\n");
+ dumpBBs();
+ DEBUG(MF->dump());
+ llvm_unreachable("Constant pool entry out of range!");
}
#endif
}
/// print block size and offset information - debugging
void ARMConstantIslands::dumpBBs() {
- for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
- DEBUG(errs() << "block " << J << " offset " << BBOffsets[J]
- << " size " << BBSizes[J] << "\n");
- }
+ DEBUG({
+ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+ << " kb=" << unsigned(BBI.KnownBits)
+ << " ua=" << unsigned(BBI.Unalign)
+ << " pa=" << unsigned(BBI.PostAlign)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
+ });
}
/// createARMConstantIslandPass - returns an instance of the constpool
@@ -269,34 +386,41 @@ FunctionPass *llvm::createARMConstantIslandPass() {
return new ARMConstantIslands();
}
-bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
- MachineConstantPool &MCP = *MF.getConstantPool();
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MCP = mf.getConstantPool();
- TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
- AFI = MF.getInfo<ARMFunctionInfo>();
- STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
+ DEBUG(dbgs() << "***** ARMConstantIslands: "
+ << MCP->getConstants().size() << " CP entries, aligned to "
+ << MCP->getConstantPoolAlignment() << " bytes *****\n");
+
+ TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo();
+ AFI = MF->getInfo<ARMFunctionInfo>();
+ STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
isThumb = AFI->isThumbFunction();
isThumb1 = AFI->isThumb1OnlyFunction();
isThumb2 = AFI->isThumb2Function();
HasFarJump = false;
- HasInlineAsm = false;
+
+ // This pass invalidates liveness information when it splits basic blocks.
+ MF->getRegInfo().invalidateLiveness();
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
// Try to reorder and otherwise adjust the block layout to make good use
// of the TB[BH] instructions.
bool MadeChange = false;
if (isThumb2 && AdjustJumpTableBlocks) {
- JumpTableFunctionScan(MF);
- MadeChange |= ReorderThumb2JumpTables(MF);
+ scanFunctionJumpTables();
+ MadeChange |= reorderThumb2JumpTables();
// Data is out of date, so clear it. It'll be re-computed later.
T2JumpTables.clear();
// Blocks may have shifted around. Keep the numbering up to date.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
}
// Thumb1 functions containing constant pools get 4-byte alignment.
@@ -304,16 +428,13 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// ARM and Thumb2 functions need to be 4-byte aligned.
if (!isThumb1)
- MF.EnsureAlignment(2); // 2 = log2(4)
+ MF->EnsureAlignment(2); // 2 = log2(4)
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
std::vector<MachineInstr*> CPEMIs;
- if (!MCP.isEmpty()) {
- DoInitialPlacement(MF, CPEMIs);
- if (isThumb1)
- MF.EnsureAlignment(2); // 2 = log2(4)
- }
+ if (!MCP->isEmpty())
+ doInitialPlacement(CPEMIs);
/// The next UID to take is the first unused one.
AFI->initPICLabelUId(CPEMIs.size());
@@ -321,34 +442,36 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// Do the initial scan of the function, building up information about the
// sizes of each block, the location of all the water, and finding all of the
// constant pool users.
- InitialFunctionScan(MF, CPEMIs);
+ initializeFunctionInfo(CPEMIs);
CPEMIs.clear();
DEBUG(dumpBBs());
/// Remove dead constant pool entries.
- MadeChange |= RemoveUnusedCPEntries();
+ MadeChange |= removeUnusedCPEntries();
// Iteratively place constant pool entries and fix up branches until there
// is no change.
unsigned NoCPIters = 0, NoBRIters = 0;
while (true) {
+ DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
bool CPChange = false;
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
- CPChange |= HandleConstantPoolUser(MF, i);
+ CPChange |= handleConstantPoolUser(i);
if (CPChange && ++NoCPIters > 30)
- llvm_unreachable("Constant Island pass failed to converge!");
+ report_fatal_error("Constant Island pass failed to converge!");
DEBUG(dumpBBs());
// Clear NewWaterList now. If we split a block for branches, it should
// appear as "new water" for the next iteration of constant pool placement.
NewWaterList.clear();
+ DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
bool BRChange = false;
for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
- BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
+ BRChange |= fixupImmediateBr(ImmBranches[i]);
if (BRChange && ++NoBRIters > 30)
- llvm_unreachable("Branch Fix Up pass failed to converge!");
+ report_fatal_error("Branch Fix Up pass failed to converge!");
DEBUG(dumpBBs());
if (!CPChange && !BRChange)
@@ -358,15 +481,15 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// Shrink 32-bit Thumb2 branch, load, and store instructions.
if (isThumb2 && !STI->prefers32BitThumb())
- MadeChange |= OptimizeThumb2Instructions(MF);
+ MadeChange |= optimizeThumb2Instructions();
// After a while, this might be made debug-only, but it is not expensive.
- verify(MF);
+ verify();
// If LR has been forced spilled and no far jump (i.e. BL) has been issued,
// undo the spill / restore of LR if possible.
if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
- MadeChange |= UndoLRSpillRestore();
+ MadeChange |= undoLRSpillRestore();
// Save the mapping between original and cloned constpool entries.
for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
@@ -376,10 +499,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
}
}
- DEBUG(errs() << '\n'; dumpBBs());
+ DEBUG(dbgs() << '\n'; dumpBBs());
- BBSizes.clear();
- BBOffsets.clear();
+ BBInfo.clear();
WaterList.clear();
CPUsers.clear();
CPEntries.clear();
@@ -390,39 +512,68 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
return MadeChange;
}
-/// DoInitialPlacement - Perform the initial placement of the constant pool
+/// doInitialPlacement - Perform the initial placement of the constant pool
/// entries. To start with, we put them all at the end of the function.
-void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
- std::vector<MachineInstr*> &CPEMIs) {
+void
+ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// Create the basic block to hold the CPE's.
- MachineBasicBlock *BB = MF.CreateMachineBasicBlock();
- MF.push_back(BB);
+ MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
+ MF->push_back(BB);
+
+ // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
+ unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+
+ // Mark the basic block as required by the const-pool.
+ // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
+ BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+
+ // The function needs to be as aligned as the basic blocks. The linker may
+ // move functions around based on their alignment.
+ MF->EnsureAlignment(BB->getAlignment());
+
+ // Order the entries in BB by descending alignment. That ensures correct
+ // alignment of all entries as long as BB is sufficiently aligned. Keep
+ // track of the insertion point for each alignment. We are going to bucket
+ // sort the entries as they are created.
+ SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
// Add all of the constants from the constant pool to the end block, use an
// identity mapping of CPI's to CPE's.
- const std::vector<MachineConstantPoolEntry> &CPs =
- MF.getConstantPool()->getConstants();
+ const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const TargetData &TD = *MF.getTarget().getTargetData();
+ const TargetData &TD = *MF->getTarget().getTargetData();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
- // Verify that all constant pool entries are a multiple of 4 bytes. If not,
- // we would have to pad them out or something so that instructions stay
- // aligned.
- assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+ assert(Size >= 4 && "Too small constant pool entry");
+ unsigned Align = CPs[i].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid alignment");
+ // Verify that all constant pool entries are a multiple of their alignment.
+ // If not, we would have to pad them out so that instructions stay aligned.
+ assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
+
+ // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
+ unsigned LogAlign = Log2_32(Align);
+ MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
MachineInstr *CPEMI =
- BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
.addImm(i).addConstantPoolIndex(i).addImm(Size);
CPEMIs.push_back(CPEMI);
+ // Ensure that future entries with higher alignment get inserted before
+ // CPEMI. This is bucket sort with iterators.
+ for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a)
+ if (InsPoint[a] == InsAt)
+ InsPoint[a] = CPEMI;
+
// Add a new CPEntry, but no corresponding CPUser yet.
std::vector<CPEntry> CPEs;
CPEs.push_back(CPEntry(CPEMI, i));
CPEntries.push_back(CPEs);
++NumCPEs;
- DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
- << "\n");
+ DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
+ << Size << ", align = " << Align <<'\n');
}
+ DEBUG(BB->dump());
}
/// BBHasFallthrough - Return true if the specified basic block can fallthrough
@@ -458,41 +609,61 @@ ARMConstantIslands::CPEntry
return NULL;
}
-/// JumpTableFunctionScan - Do a scan of the function, building up
+/// getCPELogAlign - Returns the required alignment of the constant pool entry
+/// represented by CPEMI. Alignment is measured in log2(bytes) units.
+unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
+ assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
+
+ // Everything is 4-byte aligned unless AlignConstantIslands is set.
+ if (!AlignConstantIslands)
+ return 2;
+
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
+ unsigned Align = MCP->getConstants()[CPI].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
+ return Log2_32(Align);
+}
+
+/// scanFunctionJumpTables - Do a scan of the function, building up
/// information about the sizes of each block and the locations of all
/// the jump tables.
-void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+void ARMConstantIslands::scanFunctionJumpTables() {
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock &MBB = *MBBI;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I)
- if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+ if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT)
T2JumpTables.push_back(I);
}
}
-/// InitialFunctionScan - Do the initial scan of the function, building up
+/// initializeFunctionInfo - Do the initial scan of the function, building up
/// information about the sizes of each block, the location of all the water,
/// and finding all of the constant pool users.
-void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
- const std::vector<MachineInstr*> &CPEMIs) {
- // First thing, see if the function has any inline assembly in it. If so,
- // we have to be conservative about alignment assumptions, as we don't
- // know for sure the size of any instructions in the inline assembly.
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock &MBB = *MBBI;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I)
- if (I->getOpcode() == ARM::INLINEASM)
- HasInlineAsm = true;
- }
+void ARMConstantIslands::
+initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
+ BBInfo.clear();
+ BBInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+ computeBlockSize(I);
+
+ // The known bits of the entry block offset are determined by the function
+ // alignment.
+ BBInfo.front().KnownBits = MF->getAlignment();
+
+ // Compute block offsets and known bits.
+ adjustBBOffsetsAfter(MF->begin());
// Now go back through the instructions and build up our data structures.
- unsigned Offset = 0;
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock &MBB = *MBBI;
@@ -501,16 +672,13 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
if (!BBHasFallthrough(&MBB))
WaterList.push_back(&MBB);
- unsigned MBBSize = 0;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
if (I->isDebugValue())
continue;
- // Add instruction size to MBBSize.
- MBBSize += TII->GetInstSizeInBytes(I);
int Opc = I->getOpcode();
- if (I->getDesc().isBranch()) {
+ if (I->isBranch()) {
bool isCond = false;
unsigned Bits = 0;
unsigned Scale = 1;
@@ -518,18 +686,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
switch (Opc) {
default:
continue; // Ignore other JT branches
- case ARM::tBR_JTr:
- // A Thumb1 table jump may involve padding; for the offsets to
- // be right, functions containing these must be 4-byte aligned.
- // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
- // table entries. So this code checks whether offset of tBR_JTr + 2
- // is aligned. That is held in Offset+MBBSize, which already has
- // 2 added in for the size of the mov pc instruction.
- MF.EnsureAlignment(2U);
- if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
- // FIXME: Add a pseudo ALIGN instruction instead.
- MBBSize += 2; // padding
- continue; // Does not get an entry in ImmBranches
case ARM::t2BR_JT:
T2JumpTables.push_back(I);
continue; // Does not get an entry in ImmBranches
@@ -589,7 +745,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
switch (Opc) {
default:
llvm_unreachable("Unknown addressing mode for CP reference!");
- break;
// Taking the address of a CP entry.
case ARM::LEApcrel:
@@ -647,45 +802,53 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
break;
}
}
+ }
+}
+
+/// computeBlockSize - Compute the size and some alignment information for MBB.
+/// This function updates BBInfo directly.
+void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
+ BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+ BBI.Size = 0;
+ BBI.Unalign = 0;
+ BBI.PostAlign = 0;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ BBI.Size += TII->GetInstSizeInBytes(I);
+ // For inline asm, GetInstSizeInBytes returns a conservative estimate.
+ // The actual size may be smaller, but still a multiple of the instr size.
+ if (I->isInlineAsm())
+ BBI.Unalign = isThumb ? 1 : 2;
+ // Also consider instructions that may be shrunk later.
+ else if (isThumb && mayOptimizeThumb2Instruction(I))
+ BBI.Unalign = 1;
+ }
- // In thumb mode, if this block is a constpool island, we may need padding
- // so it's aligned on 4 byte boundary.
- if (isThumb &&
- !MBB.empty() &&
- MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
- ((Offset%4) != 0 || HasInlineAsm))
- MBBSize += 2;
-
- BBSizes.push_back(MBBSize);
- BBOffsets.push_back(Offset);
- Offset += MBBSize;
+ // tBR_JTr contains a .align 2 directive.
+ if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
+ BBI.PostAlign = 2;
+ MBB->getParent()->EnsureAlignment(2);
}
}
-/// GetOffsetOf - Return the current offset of the specified machine instruction
+/// getOffsetOf - Return the current offset of the specified machine instruction
/// from the start of the function. This offset changes as stuff is moved
/// around inside the function.
-unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
+unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const {
MachineBasicBlock *MBB = MI->getParent();
// The offset is composed of two things: the sum of the sizes of all MBB's
// before this instruction's block, and the offset from the start of the block
// it is in.
- unsigned Offset = BBOffsets[MBB->getNumber()];
-
- // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
- // alignment padding, and compensate if so.
- if (isThumb &&
- MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
- (Offset%4 != 0 || HasInlineAsm))
- Offset += 2;
+ unsigned Offset = BBInfo[MBB->getNumber()].Offset;
// Sum instructions before MI in MBB.
- for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
+ for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
assert(I != MBB->end() && "Didn't find MI in its own basic block?");
- if (&*I == MI) return Offset;
Offset += TII->GetInstSizeInBytes(I);
}
+ return Offset;
}
/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
@@ -695,19 +858,16 @@ static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
return LHS->getNumber() < RHS->getNumber();
}
-/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
+/// updateForInsertedWaterBlock - When a block is newly inserted into the
/// machine function, it upsets all of the block numbers. Renumber the blocks
/// and update the arrays that parallel this numbering.
-void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
// Renumber the MBB's to keep them consecutive.
NewBB->getParent()->RenumberBlocks(NewBB);
- // Insert a size into BBSizes to align it properly with the (newly
+ // Insert an entry into BBInfo to align it properly with the (newly
// renumbered) block numbers.
- BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
-
- // Likewise for BBOffsets.
- BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
// Next, update WaterList. Specifically, we need to add NewMBB as having
// available water after it.
@@ -721,15 +881,14 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
/// Split the basic block containing MI into two blocks, which are joined by
/// an unconditional branch. Update data structures and renumber blocks to
/// account for this change and returns the newly created block.
-MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
+MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
MachineBasicBlock *OrigBB = MI->getParent();
- MachineFunction &MF = *OrigBB->getParent();
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
- MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
MachineFunction::iterator MBBI = OrigBB; ++MBBI;
- MF.insert(MBBI, NewBB);
+ MF->insert(MBBI, NewBB);
// Splice the instructions starting with MI over to NewBB.
NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
@@ -747,31 +906,19 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
++NumSplit;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
- while (!OrigBB->succ_empty()) {
- MachineBasicBlock *Succ = *OrigBB->succ_begin();
- OrigBB->removeSuccessor(Succ);
- NewBB->addSuccessor(Succ);
-
- // This pass should be run after register allocation, so there should be no
- // PHI nodes to update.
- assert((Succ->empty() || !Succ->begin()->isPHI())
- && "PHI nodes should be eliminated by now!");
- }
+ NewBB->transferSuccessors(OrigBB);
// OrigBB branches to NewBB.
OrigBB->addSuccessor(NewBB);
// Update internal data structures to account for the newly inserted MBB.
- // This is almost the same as UpdateForInsertedWaterBlock, except that
+ // This is almost the same as updateForInsertedWaterBlock, except that
// the Water goes after OrigBB, not NewBB.
- MF.RenumberBlocks(NewBB);
+ MF->RenumberBlocks(NewBB);
- // Insert a size into BBSizes to align it properly with the (newly
+ // Insert an entry into BBInfo to align it properly with the (newly
// renumbered) block numbers.
- BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
-
- // Likewise for BBOffsets.
- BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
// Next, update WaterList. Specifically, we need to add OrigMBB as having
// available water after it (but not if it's already there, which happens
@@ -787,86 +934,56 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
WaterList.insert(IP, OrigBB);
NewWaterList.insert(OrigBB);
- unsigned OrigBBI = OrigBB->getNumber();
- unsigned NewBBI = NewBB->getNumber();
-
- int delta = isThumb1 ? 2 : 4;
-
// Figure out how large the OrigBB is. As the first half of the original
// block, it cannot contain a tablejump. The size includes
// the new jump we added. (It should be possible to do this without
// recounting everything, but it's very confusing, and this is rarely
// executed.)
- unsigned OrigBBSize = 0;
- for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
- I != E; ++I)
- OrigBBSize += TII->GetInstSizeInBytes(I);
- BBSizes[OrigBBI] = OrigBBSize;
-
- // ...and adjust BBOffsets for NewBB accordingly.
- BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+ computeBlockSize(OrigBB);
// Figure out how large the NewMBB is. As the second half of the original
// block, it may contain a tablejump.
- unsigned NewBBSize = 0;
- for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
- I != E; ++I)
- NewBBSize += TII->GetInstSizeInBytes(I);
- // Set the size of NewBB in BBSizes. It does not include any padding now.
- BBSizes[NewBBI] = NewBBSize;
-
- MachineInstr* ThumbJTMI = prior(NewBB->end());
- if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
- // We've added another 2-byte instruction before this tablejump, which
- // means we will always need padding if we didn't before, and vice versa.
-
- // The original offset of the jump instruction was:
- unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
- if (OrigOffset%4 == 0) {
- // We had padding before and now we don't. No net change in code size.
- delta = 0;
- } else {
- // We didn't have padding before and now we do.
- BBSizes[NewBBI] += 2;
- delta = 4;
- }
- }
+ computeBlockSize(NewBB);
// All BBOffsets following these blocks must be modified.
- if (delta)
- AdjustBBOffsetsAfter(NewBB, delta);
+ adjustBBOffsetsAfter(OrigBB);
return NewBB;
}
-/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool
+/// getUserOffset - Compute the offset of U.MI as seen by the hardware
+/// displacement computation. Update U.KnownAlignment to match its current
+/// basic block location.
+unsigned ARMConstantIslands::getUserOffset(CPUser &U) const {
+ unsigned UserOffset = getOffsetOf(U.MI);
+ const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()];
+ unsigned KnownBits = BBI.internalKnownBits();
+
+ // The value read from PC is offset from the actual instruction address.
+ UserOffset += (isThumb ? 4 : 8);
+
+ // Because of inline assembly, we may not know the alignment (mod 4) of U.MI.
+ // Make sure U.getMaxDisp() returns a constrained range.
+ U.KnownAlignment = (KnownBits >= 2);
+
+ // On Thumb, offsets==2 mod 4 are rounded down by the hardware for
+ // purposes of the displacement computation; compensate for that here.
+ // For unknown alignments, getMaxDisp() constrains the range instead.
+ if (isThumb && U.KnownAlignment)
+ UserOffset &= ~3u;
+
+ return UserOffset;
+}
+
+/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
/// reference) is within MaxDisp of TrialOffset (a proposed location of a
/// constant pool entry).
-bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
+/// UserOffset is computed by getUserOffset above to include PC adjustments. If
+/// the mod 4 alignment of UserOffset is not known, the uncertainty must be
+/// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that.
+bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset,
unsigned TrialOffset, unsigned MaxDisp,
bool NegativeOK, bool IsSoImm) {
- // On Thumb offsets==2 mod 4 are rounded down by the hardware for
- // purposes of the displacement computation; compensate for that here.
- // Effectively, the valid range of displacements is 2 bytes smaller for such
- // references.
- unsigned TotalAdj = 0;
- if (isThumb && UserOffset%4 !=0) {
- UserOffset -= 2;
- TotalAdj = 2;
- }
- // CPEs will be rounded up to a multiple of 4.
- if (isThumb && TrialOffset%4 != 0) {
- TrialOffset += 2;
- TotalAdj += 2;
- }
-
- // In Thumb2 mode, later branch adjustments can shift instructions up and
- // cause alignment change. In the worst case scenario this can cause the
- // user's effective address to be subtracted by 2 and the CPE's address to
- // be plus 2.
- if (isThumb2 && TotalAdj != 4)
- MaxDisp -= (4 - TotalAdj);
-
if (UserOffset <= TrialOffset) {
// User before the Trial.
if (TrialOffset - UserOffset <= MaxDisp)
@@ -880,40 +997,71 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
return false;
}
-/// WaterIsInRange - Returns true if a CPE placed after the specified
+/// isWaterInRange - Returns true if a CPE placed after the specified
/// Water (a basic block) will be in range for the specific MI.
-
-bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
- MachineBasicBlock* Water, CPUser &U) {
- unsigned MaxDisp = U.MaxDisp;
- unsigned CPEOffset = BBOffsets[Water->getNumber()] +
- BBSizes[Water->getNumber()];
-
- // If the CPE is to be inserted before the instruction, that will raise
- // the offset of the instruction.
- if (CPEOffset < UserOffset)
- UserOffset += U.CPEMI->getOperand(2).getImm();
-
- return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm);
+///
+/// Compute how much the function will grow by inserting a CPE after Water.
+bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
+ MachineBasicBlock* Water, CPUser &U,
+ unsigned &Growth) {
+ unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
+ unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
+ unsigned NextBlockOffset, NextBlockAlignment;
+ MachineFunction::const_iterator NextBlock = Water;
+ if (++NextBlock == MF->end()) {
+ NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
+ NextBlockAlignment = 0;
+ } else {
+ NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
+ NextBlockAlignment = NextBlock->getAlignment();
+ }
+ unsigned Size = U.CPEMI->getOperand(2).getImm();
+ unsigned CPEEnd = CPEOffset + Size;
+
+ // The CPE may be able to hide in the alignment padding before the next
+ // block. It may also cause more padding to be required if it is more aligned
+ // that the next block.
+ if (CPEEnd > NextBlockOffset) {
+ Growth = CPEEnd - NextBlockOffset;
+ // Compute the padding that would go at the end of the CPE to align the next
+ // block.
+ Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction. Also account for unknown alignment padding
+ // in blocks between CPE and the user.
+ if (CPEOffset < UserOffset)
+ UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
+ } else
+ // CPE fits in existing padding.
+ Growth = 0;
+
+ return isOffsetInRange(UserOffset, CPEOffset, U);
}
-/// CPEIsInRange - Returns true if the distance between specific MI and
+/// isCPEntryInRange - Returns true if the distance between specific MI and
/// specific ConstPool entry instruction can fit in MI's displacement field.
-bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
MachineInstr *CPEMI, unsigned MaxDisp,
bool NegOk, bool DoDump) {
- unsigned CPEOffset = GetOffsetOf(CPEMI);
- assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
+ unsigned CPEOffset = getOffsetOf(CPEMI);
+ assert(CPEOffset % 4 == 0 && "Misaligned CPE");
if (DoDump) {
- DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
- << " max delta=" << MaxDisp
- << " insn address=" << UserOffset
- << " CPE address=" << CPEOffset
- << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI);
+ DEBUG({
+ unsigned Block = MI->getParent()->getNumber();
+ const BasicBlockInfo &BBI = BBInfo[Block];
+ dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << format(" insn address=%#x", UserOffset)
+ << " in BB#" << Block << ": "
+ << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
+ << format("CPE address=%#x offset=%+d: ", CPEOffset,
+ int(CPEOffset-UserOffset));
+ });
}
- return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
+ return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
}
#ifndef NDEBUG
@@ -933,69 +1081,40 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
}
#endif // NDEBUG
-void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
- int delta) {
- MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
- for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
- i < e; ++i) {
- BBOffsets[i] += delta;
- // If some existing blocks have padding, adjust the padding as needed, a
- // bit tricky. delta can be negative so don't use % on that.
- if (!isThumb)
- continue;
- MachineBasicBlock *MBB = MBBI;
- if (!MBB->empty() && !HasInlineAsm) {
- // Constant pool entries require padding.
- if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
- unsigned OldOffset = BBOffsets[i] - delta;
- if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) {
- // add new padding
- BBSizes[i] += 2;
- delta += 2;
- } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) {
- // remove existing padding
- BBSizes[i] -= 2;
- delta -= 2;
- }
- }
- // Thumb1 jump tables require padding. They should be at the end;
- // following unconditional branches are removed by AnalyzeBranch.
- // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
- // table entries. So this code checks whether offset of tBR_JTr
- // is aligned; if it is, the offset of the jump table following the
- // instruction will not be aligned, and we need padding.
- MachineInstr *ThumbJTMI = prior(MBB->end());
- if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
- unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
- unsigned OldMIOffset = NewMIOffset - delta;
- if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) {
- // remove existing padding
- BBSizes[i] -= 2;
- delta -= 2;
- } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) {
- // add new padding
- BBSizes[i] += 2;
- delta += 2;
- }
- }
- if (delta==0)
- return;
- }
- MBBI = llvm::next(MBBI);
+void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+ unsigned BBNum = BB->getNumber();
+ for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
+ unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+ unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+ // This is where block i begins. Stop if the offset is already correct,
+ // and we have updated 2 blocks. This is the maximum number of blocks
+ // changed before calling this function.
+ if (i > BBNum + 2 &&
+ BBInfo[i].Offset == Offset &&
+ BBInfo[i].KnownBits == KnownBits)
+ break;
+
+ BBInfo[i].Offset = Offset;
+ BBInfo[i].KnownBits = KnownBits;
}
}
-/// DecrementOldEntry - find the constant pool entry with index CPI
+/// decrementCPEReferenceCount - find the constant pool entry with index CPI
/// and instruction CPEMI, and decrement its refcount. If the refcount
/// becomes 0 remove the entry and instruction. Returns true if we removed
/// the entry, false if we didn't.
-bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
+bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
+ MachineInstr *CPEMI) {
// Find the old entry. Eliminate it if it is no longer used.
CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
assert(CPE && "Unexpected!");
if (--CPE->RefCount == 0) {
- RemoveDeadCPEMI(CPEMI);
+ removeDeadCPEMI(CPEMI);
CPE->CPEMI = NULL;
--NumCPEs;
return true;
@@ -1009,14 +1128,15 @@ bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
/// 0 = no existing entry found
/// 1 = entry found, and there were no code insertions or deletions
/// 2 = entry found, and there were code insertions or deletions
-int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
+int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
{
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
// Check to see if the CPE is already in-range.
- if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) {
- DEBUG(errs() << "In range\n");
+ if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk,
+ true)) {
+ DEBUG(dbgs() << "In range\n");
return 1;
}
@@ -1030,8 +1150,9 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
// Removing CPEs can leave empty entries, skip
if (CPEs[i].CPEMI == NULL)
continue;
- if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) {
- DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#"
+ if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
+ U.NegOk)) {
+ DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
<< CPEs[i].CPI << "\n");
// Point the CPUser node to the replacement
U.CPEMI = CPEs[i].CPEMI;
@@ -1045,7 +1166,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
CPEs[i].RefCount++;
// ...and the original. If we didn't remove the old entry, none of the
// addresses changed, so we don't need another pass.
- return DecrementOldEntry(CPI, CPEMI) ? 2 : 1;
+ return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
}
}
return 0;
@@ -1066,7 +1187,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) {
return ((1<<23)-1)*4;
}
-/// LookForWater - Look for an existing entry in the WaterList in which
+/// findAvailableWater - Look for an existing entry in the WaterList in which
/// we can place the CPE referenced from U so it's within range of U's MI.
/// Returns true if found, false if not. If it returns true, WaterIter
/// is set to the WaterList entry. For Thumb, prefer water that will not
@@ -1074,15 +1195,14 @@ static inline unsigned getUnconditionalBrDisp(int Opc) {
/// terminates, the CPE location for a particular CPUser is only allowed to
/// move to a lower address, so search backward from the end of the list and
/// prefer the first water that is in range.
-bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
+bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
water_iterator &WaterIter) {
if (WaterList.empty())
return false;
- bool FoundWaterThatWouldPad = false;
- water_iterator IPThatWouldPad;
- for (water_iterator IP = prior(WaterList.end()),
- B = WaterList.begin();; --IP) {
+ unsigned BestGrowth = ~0u;
+ for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
+ --IP) {
MachineBasicBlock* WaterBB = *IP;
// Check if water is in range and is either at a lower address than the
// current "high water mark" or a new water block that was created since
@@ -1092,166 +1212,186 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
// should be relatively uncommon and when it does happen, we want to be
// sure to take advantage of it for all the CPEs near that block, so that
// we don't insert more branches than necessary.
- if (WaterIsInRange(UserOffset, WaterBB, U) &&
+ unsigned Growth;
+ if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
(WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
- NewWaterList.count(WaterBB))) {
- unsigned WBBId = WaterBB->getNumber();
- if (isThumb &&
- (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
- // This is valid Water, but would introduce padding. Remember
- // it in case we don't find any Water that doesn't do this.
- if (!FoundWaterThatWouldPad) {
- FoundWaterThatWouldPad = true;
- IPThatWouldPad = IP;
- }
- } else {
- WaterIter = IP;
+ NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+ // This is the least amount of required padding seen so far.
+ BestGrowth = Growth;
+ WaterIter = IP;
+ DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
+ << " Growth=" << Growth << '\n');
+
+ // Keep looking unless it is perfect.
+ if (BestGrowth == 0)
return true;
- }
}
if (IP == B)
break;
}
- if (FoundWaterThatWouldPad) {
- WaterIter = IPThatWouldPad;
- return true;
- }
- return false;
+ return BestGrowth != ~0u;
}
-/// CreateNewWater - No existing WaterList entry will work for
+/// createNewWater - No existing WaterList entry will work for
/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the
/// block is used if in range, and the conditional branch munged so control
/// flow is correct. Otherwise the block is split to create a hole with an
/// unconditional branch around it. In either case NewMBB is set to a
/// block following which the new island can be inserted (the WaterList
/// is not adjusted).
-void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
+void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
unsigned UserOffset,
MachineBasicBlock *&NewMBB) {
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPELogAlign = getCPELogAlign(CPEMI);
MachineBasicBlock *UserMBB = UserMI->getParent();
- unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
- BBSizes[UserMBB->getNumber()];
- assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+ const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
// If the block does not end in an unconditional branch already, and if the
// end of the block is within range, make new water there. (The addition
// below is for the unconditional branch we will be adding: 4 bytes on ARM +
- // Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for
- // inside OffsetIsInRange.
- if (BBHasFallthrough(UserMBB) &&
- OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
- U.MaxDisp, U.NegOk, U.IsSoImm)) {
- DEBUG(errs() << "Split at end of block\n");
- if (&UserMBB->back() == UserMI)
- assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
- NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
- // Add an unconditional branch from UserMBB to fallthrough block.
- // Record it for branch lengthening; this new branch will not get out of
- // range, but if the preceding conditional branch is out of range, the
- // targets will be exchanged, and the altered branch may be out of
- // range, so the machinery has to know about it.
- int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
- if (!isThumb)
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
- else
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
- .addImm(ARMCC::AL).addReg(0);
- unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
- ImmBranches.push_back(ImmBranch(&UserMBB->back(),
- MaxDisp, false, UncondBr));
- int delta = isThumb1 ? 2 : 4;
- BBSizes[UserMBB->getNumber()] += delta;
- AdjustBBOffsetsAfter(UserMBB, delta);
- } else {
- // What a big block. Find a place within the block to split it.
- // This is a little tricky on Thumb1 since instructions are 2 bytes
- // and constant pool entries are 4 bytes: if instruction I references
- // island CPE, and instruction I+1 references CPE', it will
- // not work well to put CPE as far forward as possible, since then
- // CPE' cannot immediately follow it (that location is 2 bytes
- // farther away from I+1 than CPE was from I) and we'd need to create
- // a new island. So, we make a first guess, then walk through the
- // instructions between the one currently being looked at and the
- // possible insertion point, and make sure any other instructions
- // that reference CPEs will be able to use the same island area;
- // if not, we back up the insertion point.
-
- // The 4 in the following is for the unconditional branch we'll be
- // inserting (allows for long branch on Thumb1). Alignment of the
- // island is handled inside OffsetIsInRange.
- unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
- // This could point off the end of the block if we've already got
- // constant pool entries following this block; only the last one is
- // in the water list. Back past any possible branches (allow for a
- // conditional and a maximally long unconditional).
- if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
- BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
- (isThumb1 ? 6 : 8);
- unsigned EndInsertOffset = BaseInsertOffset +
- CPEMI->getOperand(2).getImm();
- MachineBasicBlock::iterator MI = UserMI;
- ++MI;
- unsigned CPUIndex = CPUserIndex+1;
- unsigned NumCPUsers = CPUsers.size();
- MachineInstr *LastIT = 0;
- for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
- Offset < BaseInsertOffset;
- Offset += TII->GetInstSizeInBytes(MI),
- MI = llvm::next(MI)) {
- if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
- CPUser &U = CPUsers[CPUIndex];
- if (!OffsetIsInRange(Offset, EndInsertOffset,
- U.MaxDisp, U.NegOk, U.IsSoImm)) {
- BaseInsertOffset -= (isThumb1 ? 2 : 4);
- EndInsertOffset -= (isThumb1 ? 2 : 4);
- }
- // This is overly conservative, as we don't account for CPEMIs
- // being reused within the block, but it doesn't matter much.
- EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
- CPUIndex++;
- }
+ // Thumb2, 2 on Thumb1.
+ if (BBHasFallthrough(UserMBB)) {
+ // Size of branch to insert.
+ unsigned Delta = isThumb1 ? 2 : 4;
+ // End of UserBlock after adding a branch.
+ unsigned UserBlockEnd = UserBBI.postOffset() + Delta;
+ // Compute the offset where the CPE will begin.
+ unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign,
+ UserBBI.postKnownBits());
+
+ if (isOffsetInRange(UserOffset, CPEOffset, U)) {
+ DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
+ << format(", expected CPE offset %#x\n", CPEOffset));
+ NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+ // Add an unconditional branch from UserMBB to fallthrough block. Record
+ // it for branch lengthening; this new branch will not get out of range,
+ // but if the preceding conditional branch is out of range, the targets
+ // will be exchanged, and the altered branch may be out of range, so the
+ // machinery has to know about it.
+ int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
+ if (!isThumb)
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ else
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
+ .addImm(ARMCC::AL).addReg(0);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ BBInfo[UserMBB->getNumber()].Size += Delta;
+ adjustBBOffsetsAfter(UserMBB);
+ return;
+ }
+ }
- // Remember the last IT instruction.
- if (MI->getOpcode() == ARM::t2IT)
- LastIT = MI;
+ // What a big block. Find a place within the block to split it. This is a
+ // little tricky on Thumb1 since instructions are 2 bytes and constant pool
+ // entries are 4 bytes: if instruction I references island CPE, and
+ // instruction I+1 references CPE', it will not work well to put CPE as far
+ // forward as possible, since then CPE' cannot immediately follow it (that
+ // location is 2 bytes farther away from I+1 than CPE was from I) and we'd
+ // need to create a new island. So, we make a first guess, then walk through
+ // the instructions between the one currently being looked at and the
+ // possible insertion point, and make sure any other instructions that
+ // reference CPEs will be able to use the same island area; if not, we back
+ // up the insertion point.
+
+ // Try to split the block so it's fully aligned. Compute the latest split
+ // point where we can add a 4-byte branch instruction, and then
+ // WorstCaseAlign to LogAlign.
+ unsigned LogAlign = MF->getAlignment();
+ assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+ unsigned KnownBits = UserBBI.internalKnownBits();
+ unsigned UPad = UnknownPadding(LogAlign, KnownBits);
+ unsigned BaseInsertOffset = UserOffset + U.getMaxDisp();
+ DEBUG(dbgs() << format("Split in middle of big block before %#x",
+ BaseInsertOffset));
+
+ // Account for alignment and unknown padding.
+ BaseInsertOffset &= ~((1u << LogAlign) - 1);
+ BaseInsertOffset -= UPad;
+
+ // The 4 in the following is for the unconditional branch we'll be inserting
+ // (allows for long branch on Thumb1). Alignment of the island is handled
+ // inside isOffsetInRange.
+ BaseInsertOffset -= 4;
+
+ DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
+ << " la=" << LogAlign
+ << " kb=" << KnownBits
+ << " up=" << UPad << '\n');
+
+ // This could point off the end of the block if we've already got constant
+ // pool entries following this block; only the last one is in the water list.
+ // Back past any possible branches (allow for a conditional and a maximally
+ // long unconditional).
+ if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset)
+ BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset -
+ (isThumb1 ? 6 : 8);
+ unsigned EndInsertOffset =
+ WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ MachineInstr *LastIT = 0;
+ for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->GetInstSizeInBytes(MI),
+ MI = llvm::next(MI)) {
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ CPUser &U = CPUsers[CPUIndex];
+ if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
+ // Shift intertion point by one unit of alignment so it is within reach.
+ BaseInsertOffset -= 1u << LogAlign;
+ EndInsertOffset -= 1u << LogAlign;
+ }
+ // This is overly conservative, as we don't account for CPEMIs being
+ // reused within the block, but it doesn't matter much. Also assume CPEs
+ // are added in order with alignment padding. We may eventually be able
+ // to pack the aligned CPEs better.
+ EndInsertOffset = RoundUpToAlignment(EndInsertOffset,
+ 1u << getCPELogAlign(U.CPEMI)) +
+ U.CPEMI->getOperand(2).getImm();
+ CPUIndex++;
}
- DEBUG(errs() << "Split in middle of big block\n");
- --MI;
+ // Remember the last IT instruction.
+ if (MI->getOpcode() == ARM::t2IT)
+ LastIT = MI;
+ }
+
+ --MI;
- // Avoid splitting an IT block.
- if (LastIT) {
- unsigned PredReg = 0;
- ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
- if (CC != ARMCC::AL)
- MI = LastIT;
- }
- NewMBB = SplitBlockBeforeInstr(MI);
+ // Avoid splitting an IT block.
+ if (LastIT) {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
+ if (CC != ARMCC::AL)
+ MI = LastIT;
}
+ NewMBB = splitBlockBeforeInstr(MI);
}
-/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
+/// handleConstantPoolUser - Analyze the specified user, checking to see if it
/// is out-of-range. If so, pick up the constant pool value and move it some
/// place in-range. Return true if we changed any addresses (thus must run
/// another pass of branch lengthening), false otherwise.
-bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
- unsigned CPUserIndex) {
+bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
unsigned CPI = CPEMI->getOperand(1).getIndex();
unsigned Size = CPEMI->getOperand(2).getImm();
- // Compute this only once, it's expensive. The 4 or 8 is the value the
- // hardware keeps in the PC.
- unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
+ // Compute this only once, it's expensive.
+ unsigned UserOffset = getUserOffset(U);
// See if the current entry is within range, or there is a clone of it
// in range.
- int result = LookForExistingCPEntry(U, UserOffset);
+ int result = findInRangeCPEntry(U, UserOffset);
if (result==1) return false;
else if (result==2) return true;
@@ -1260,11 +1400,11 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
unsigned ID = AFI->createPICLabelUId();
// Look for water where we can place this CPE.
- MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
MachineBasicBlock *NewMBB;
water_iterator IP;
- if (LookForWater(U, UserOffset, IP)) {
- DEBUG(errs() << "found water in range\n");
+ if (findAvailableWater(U, UserOffset, IP)) {
+ DEBUG(dbgs() << "Found water in range\n");
MachineBasicBlock *WaterBB = *IP;
// If the original WaterList entry was "new water" on this iteration,
@@ -1279,10 +1419,10 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
} else {
// No water found.
- DEBUG(errs() << "No water found\n");
- CreateNewWater(CPUserIndex, UserOffset, NewMBB);
+ DEBUG(dbgs() << "No water found\n");
+ createNewWater(CPUserIndex, UserOffset, NewMBB);
- // SplitBlockBeforeInstr adds to WaterList, which is important when it is
+ // splitBlockBeforeInstr adds to WaterList, which is important when it is
// called while handling branches so that the water will be seen on the
// next iteration for constant pools, but in this context, we don't want
// it. Check for this so it will be removed from the WaterList.
@@ -1304,13 +1444,13 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
WaterList.erase(IP);
// Okay, we know we can put an island before NewMBB now, do it!
- MF.insert(NewMBB, NewIsland);
+ MF->insert(NewMBB, NewIsland);
// Update internal data structures to account for the newly inserted MBB.
- UpdateForInsertedWaterBlock(NewIsland);
+ updateForInsertedWaterBlock(NewIsland);
// Decrement the old entry, and remove it if refcount becomes 0.
- DecrementOldEntry(CPI, CPEMI);
+ decrementCPEReferenceCount(CPI, CPEMI);
// Now that we have an island to add the CPE to, clone the original CPE and
// add it to the island.
@@ -1320,13 +1460,12 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
++NumCPEs;
- BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
- // Compensate for .align 2 in thumb mode.
- if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
- Size += 2;
+ // Mark the basic block as aligned as required by the const-pool entry.
+ NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
+
// Increase the size of the island block to account for the new entry.
- BBSizes[NewIsland->getNumber()] += Size;
- AdjustBBOffsetsAfter(NewIsland, Size);
+ BBInfo[NewIsland->getNumber()].Size += Size;
+ adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
// Finally, change the CPI in the instruction operand to be ID.
for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1335,31 +1474,30 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
break;
}
- DEBUG(errs() << " Moved CPE to #" << ID << " CPI=" << CPI
- << '\t' << *UserMI);
+ DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI
+ << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
return true;
}
-/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update
/// sizes and offsets of impacted basic blocks.
-void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
+void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
MachineBasicBlock *CPEBB = CPEMI->getParent();
unsigned Size = CPEMI->getOperand(2).getImm();
CPEMI->eraseFromParent();
- BBSizes[CPEBB->getNumber()] -= Size;
+ BBInfo[CPEBB->getNumber()].Size -= Size;
// All succeeding offsets have the current size value added in, fix this.
if (CPEBB->empty()) {
- // In thumb1 mode, the size of island may be padded by two to compensate for
- // the alignment requirement. Then it will now be 2 when the block is
- // empty, so fix this.
- // All succeeding offsets have the current size value added in, fix this.
- if (BBSizes[CPEBB->getNumber()] != 0) {
- Size += BBSizes[CPEBB->getNumber()];
- BBSizes[CPEBB->getNumber()] = 0;
- }
- }
- AdjustBBOffsetsAfter(CPEBB, -Size);
+ BBInfo[CPEBB->getNumber()].Size = 0;
+
+ // This block no longer needs to be aligned. <rdar://problem/10534709>.
+ CPEBB->setAlignment(0);
+ } else
+ // Entries are sorted by descending alignment, so realign from the front.
+ CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
+
+ adjustBBOffsetsAfter(CPEBB);
// An island has only one predecessor BB and one successor BB. Check if
// this BB's predecessor jumps directly to this BB's successor. This
// shouldn't happen currently.
@@ -1367,15 +1505,15 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
// FIXME: remove the empty blocks after all the work is done?
}
-/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts
+/// removeUnusedCPEntries - Remove constant pool entries whose refcounts
/// are zero.
-bool ARMConstantIslands::RemoveUnusedCPEntries() {
+bool ARMConstantIslands::removeUnusedCPEntries() {
unsigned MadeChange = false;
for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
std::vector<CPEntry> &CPEs = CPEntries[i];
for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
- RemoveDeadCPEMI(CPEs[j].CPEMI);
+ removeDeadCPEMI(CPEs[j].CPEMI);
CPEs[j].CPEMI = NULL;
MadeChange = true;
}
@@ -1384,18 +1522,18 @@ bool ARMConstantIslands::RemoveUnusedCPEntries() {
return MadeChange;
}
-/// BBIsInRange - Returns true if the distance between specific MI and
+/// isBBInRange - Returns true if the distance between specific MI and
/// specific BB can fit in MI's displacement field.
-bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
+bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
unsigned MaxDisp) {
unsigned PCAdj = isThumb ? 4 : 8;
- unsigned BrOffset = GetOffsetOf(MI) + PCAdj;
- unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+ unsigned BrOffset = getOffsetOf(MI) + PCAdj;
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
- DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber()
+ DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
<< " from BB#" << MI->getParent()->getNumber()
<< " max delta=" << MaxDisp
- << " from " << GetOffsetOf(MI) << " to " << DestOffset
+ << " from " << getOffsetOf(MI) << " to " << DestOffset
<< " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
if (BrOffset <= DestOffset) {
@@ -1409,50 +1547,50 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
return false;
}
-/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
+/// fixupImmediateBr - Fix up an immediate branch whose destination is too far
/// away to fit in its displacement field.
-bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
+bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
// Check to see if the DestBB is already in-range.
- if (BBIsInRange(MI, DestBB, Br.MaxDisp))
+ if (isBBInRange(MI, DestBB, Br.MaxDisp))
return false;
if (!Br.isCond)
- return FixUpUnconditionalBr(MF, Br);
- return FixUpConditionalBr(MF, Br);
+ return fixupUnconditionalBr(Br);
+ return fixupConditionalBr(Br);
}
-/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
+/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is
/// too far away to fit in its displacement field. If the LR register has been
/// spilled in the epilogue, then we can use BL to implement a far jump.
/// Otherwise, add an intermediate branch instruction to a branch.
bool
-ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *MBB = MI->getParent();
if (!isThumb1)
- llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!");
+ llvm_unreachable("fixupUnconditionalBr is Thumb1 only!");
// Use BL to implement far jump.
Br.MaxDisp = (1 << 21) * 2;
MI->setDesc(TII->get(ARM::tBfar));
- BBSizes[MBB->getNumber()] += 2;
- AdjustBBOffsetsAfter(MBB, 2);
+ BBInfo[MBB->getNumber()].Size += 2;
+ adjustBBOffsetsAfter(MBB);
HasFarJump = true;
++NumUBrFixed;
- DEBUG(errs() << " Changed B to long jump " << *MI);
+ DEBUG(dbgs() << " Changed B to long jump " << *MI);
return true;
}
-/// FixUpConditionalBr - Fix up a conditional branch whose destination is too
+/// fixupConditionalBr - Fix up a conditional branch whose destination is too
/// far away to fit in its displacement field. It is converted to an inverse
/// conditional branch + an unconditional branch to the destination.
bool
-ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
@@ -1486,8 +1624,8 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
// bne L2
// b L1
MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
- if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
- DEBUG(errs() << " Invert Bcc condition and swap its destination with "
+ if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
+ DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
<< *BMI);
BMI->getOperand(0).setMBB(DestBB);
MI->getOperand(0).setMBB(NewDest);
@@ -1498,19 +1636,17 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
}
if (NeedSplit) {
- SplitBlockBeforeInstr(MI);
+ splitBlockBeforeInstr(MI);
// No need for the branch to the next block. We're adding an unconditional
// branch to the destination.
int delta = TII->GetInstSizeInBytes(&MBB->back());
- BBSizes[MBB->getNumber()] -= delta;
- MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
- AdjustBBOffsetsAfter(SplitBB, -delta);
+ BBInfo[MBB->getNumber()].Size -= delta;
MBB->back().eraseFromParent();
- // BBOffsets[SplitBB] is wrong temporarily, fixed below
+ // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
}
MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
- DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber()
+ DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
<< " also invert condition and change dest. to BB#"
<< NextBB->getNumber() << "\n");
@@ -1519,30 +1655,27 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
.addMBB(NextBB).addImm(CC).addReg(CCReg);
Br.MI = &MBB->back();
- BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
if (isThumb)
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
.addImm(ARMCC::AL).addReg(0);
else
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
- BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
// Remove the old conditional branch. It may or may not still be in MBB.
- BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+ BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
MI->eraseFromParent();
-
- // The net size change is an addition of one unconditional branch.
- int delta = TII->GetInstSizeInBytes(&MBB->back());
- AdjustBBOffsetsAfter(MBB, delta);
+ adjustBBOffsetsAfter(MBB);
return true;
}
-/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
+/// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills
/// LR / restores LR to pc. FIXME: This is done here because it's only possible
/// to do this if tBfar is not used.
-bool ARMConstantIslands::UndoLRSpillRestore() {
+bool ARMConstantIslands::undoLRSpillRestore() {
bool MadeChange = false;
for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
MachineInstr *MI = PushPopMIs[i];
@@ -1561,7 +1694,26 @@ bool ARMConstantIslands::UndoLRSpillRestore() {
return MadeChange;
}
-bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
+// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
+// below may shrink MI.
+bool
+ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const {
+ switch(MI->getOpcode()) {
+ // optimizeThumb2Instructions.
+ case ARM::t2LEApcrel:
+ case ARM::t2LDRpci:
+ // optimizeThumb2Branches.
+ case ARM::t2B:
+ case ARM::t2Bcc:
+ case ARM::tBcc:
+ // optimizeThumb2JumpTables.
+ case ARM::t2BR_JT:
+ return true;
+ }
+ return false;
+}
+
+bool ARMConstantIslands::optimizeThumb2Instructions() {
bool MadeChange = false;
// Shrink ADR and LDR from constantpool.
@@ -1592,25 +1744,31 @@ bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
if (!NewOpc)
continue;
- unsigned UserOffset = GetOffsetOf(U.MI) + 4;
+ unsigned UserOffset = getUserOffset(U);
unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
+
+ // Be conservative with inline asm.
+ if (!U.KnownAlignment)
+ MaxOffs -= 2;
+
// FIXME: Check if offset is multiple of scale if scale is not 4.
- if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+ if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+ DEBUG(dbgs() << "Shrink: " << *U.MI);
U.MI->setDesc(TII->get(NewOpc));
MachineBasicBlock *MBB = U.MI->getParent();
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
++NumT2CPShrunk;
MadeChange = true;
}
}
- MadeChange |= OptimizeThumb2Branches(MF);
- MadeChange |= OptimizeThumb2JumpTables(MF);
+ MadeChange |= optimizeThumb2Branches();
+ MadeChange |= optimizeThumb2JumpTables();
return MadeChange;
}
-bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
+bool ARMConstantIslands::optimizeThumb2Branches() {
bool MadeChange = false;
for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
@@ -1636,11 +1794,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
if (NewOpc) {
unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
- if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
+ if (isBBInRange(Br.MI, DestBB, MaxOffs)) {
+ DEBUG(dbgs() << "Shrink branch: " << *Br.MI);
Br.MI->setDesc(TII->get(NewOpc));
MachineBasicBlock *MBB = Br.MI->getParent();
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
++NumT2BrShrunk;
MadeChange = true;
}
@@ -1650,9 +1809,14 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
if (Opcode != ARM::tBcc)
continue;
+ // If the conditional branch doesn't kill CPSR, then CPSR can be liveout
+ // so this transformation is not safe.
+ if (!Br.MI->killsRegister(ARM::CPSR))
+ continue;
+
NewOpc = 0;
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg);
if (Pred == ARMCC::EQ)
NewOpc = ARM::tCBZ;
else if (Pred == ARMCC::NE)
@@ -1662,27 +1826,28 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
// Check if the distance is within 126. Subtract starting offset by 2
// because the cmp will be eliminated.
- unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
- unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+ unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2;
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
MachineBasicBlock::iterator CmpMI = Br.MI;
if (CmpMI != Br.MI->getParent()->begin()) {
--CmpMI;
if (CmpMI->getOpcode() == ARM::tCMPi8) {
unsigned Reg = CmpMI->getOperand(0).getReg();
- Pred = llvm::getInstrPredicate(CmpMI, PredReg);
+ Pred = getInstrPredicate(CmpMI, PredReg);
if (Pred == ARMCC::AL &&
CmpMI->getOperand(1).getImm() == 0 &&
isARMLowRegister(Reg)) {
MachineBasicBlock *MBB = Br.MI->getParent();
+ DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
MachineInstr *NewBR =
BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
.addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
CmpMI->eraseFromParent();
Br.MI->eraseFromParent();
Br.MI = NewBR;
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
++NumCBZ;
MadeChange = true;
}
@@ -1694,14 +1859,14 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
return MadeChange;
}
-/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
+/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
/// jumptables when it's possible.
-bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
+bool ARMConstantIslands::optimizeThumb2JumpTables() {
bool MadeChange = false;
// FIXME: After the tables are shrunk, can we get rid some of the
// constantpool tables?
- MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (MJTI == 0) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -1709,18 +1874,18 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
MachineInstr *MI = T2JumpTables[i];
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
MachineOperand JTOP = MI->getOperand(JTOpIdx);
unsigned JTI = JTOP.getIndex();
assert(JTI < JT.size());
bool ByteOk = true;
bool HalfWordOk = true;
- unsigned JTOffset = GetOffsetOf(MI) + 4;
+ unsigned JTOffset = getOffsetOf(MI) + 4;
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
MachineBasicBlock *MBB = JTBBs[j];
- unsigned DstOffset = BBOffsets[MBB->getNumber()];
+ unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
// Negative offset is not ok. FIXME: We should change BB layout to make
// sure all the branches are forward.
if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
@@ -1791,11 +1956,14 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
if (!OptOk)
continue;
+ DEBUG(dbgs() << "Shrink JT: " << *MI << " addr: " << *AddrMI
+ << " lea: " << *LeaMI);
unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
.addReg(IdxReg, getKillRegState(IdxRegKill))
.addJumpTableIndex(JTI, JTOP.getTargetFlags())
.addImm(MI->getOperand(JTOpIdx+1).getImm());
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
// FIXME: Insert an "ALIGN" instruction to ensure the next instruction
// is 2-byte aligned. For now, asm printer will fix it up.
unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
@@ -1808,8 +1976,8 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
MI->eraseFromParent();
int delta = OrigSize - NewSize;
- BBSizes[MBB->getNumber()] -= delta;
- AdjustBBOffsetsAfter(MBB, -delta);
+ BBInfo[MBB->getNumber()].Size -= delta;
+ adjustBBOffsetsAfter(MBB);
++NumTBs;
MadeChange = true;
@@ -1819,12 +1987,12 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
return MadeChange;
}
-/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// reorderThumb2JumpTables - Adjust the function's block layout to ensure that
/// jump tables always branch forwards, since that's what tbb and tbh need.
-bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+bool ARMConstantIslands::reorderThumb2JumpTables() {
bool MadeChange = false;
- MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (MJTI == 0) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -1832,7 +2000,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
MachineInstr *MI = T2JumpTables[i];
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
MachineOperand JTOP = MI->getOperand(JTOpIdx);
unsigned JTI = JTOP.getIndex();
assert(JTI < JT.size());
@@ -1850,7 +2018,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
// The destination precedes the switch. Try to move the block forward
// so we have a positive offset.
MachineBasicBlock *NewBB =
- AdjustJTTargetBlockForward(MBB, MI->getParent());
+ adjustJTTargetBlockForward(MBB, MI->getParent());
if (NewBB)
MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
MadeChange = true;
@@ -1862,10 +2030,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
}
MachineBasicBlock *ARMConstantIslands::
-AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
-{
- MachineFunction &MF = *BB->getParent();
-
+adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// If the destination block is terminated by an unconditional branch,
// try to move it; otherwise, create a new block following the jump
// table that branches back to the actual target. This is a very simple
@@ -1882,22 +2047,22 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
// If the block ends in an unconditional branch, move it. The prior block
// has to have an analyzable terminator for us to move this one. Be paranoid
// and make sure we're not trying to move the entry block of the function.
- if (!B && Cond.empty() && BB != MF.begin() &&
+ if (!B && Cond.empty() && BB != MF->begin() &&
!TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
BB->moveAfter(JTBB);
OldPrior->updateTerminator();
BB->updateTerminator();
// Update numbering to account for the block being moved.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
++NumJTMoved;
return NULL;
}
// Create a new MBB for the code after the jump BB.
MachineBasicBlock *NewBB =
- MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+ MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
MachineFunction::iterator MBBI = JTBB; ++MBBI;
- MF.insert(MBBI, NewBB);
+ MF->insert(MBBI, NewBB);
// Add an unconditional branch from NewBB to BB.
// There doesn't seem to be meaningful DebugInfo available; this doesn't
@@ -1907,7 +2072,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
.addImm(ARMCC::AL).addReg(0);
// Update internal data structures to account for the newly inserted MBB.
- MF.RenumberBlocks(NewBB);
+ MF->RenumberBlocks(NewBB);
// Update the CFG.
NewBB->addSuccessor(BB);
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index aadfd4779db3..fa3226e37eb9 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -1,4 +1,4 @@
-//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
+//===-- ARMConstantPoolValue.cpp - ARM constantpool value -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -48,7 +48,6 @@ ARMConstantPoolValue::~ARMConstantPoolValue() {}
const char *ARMConstantPoolValue::getModifierText() const {
switch (Modifier) {
- default: llvm_unreachable("Unknown modifier!");
// FIXME: Are these case sensitive? It'd be nice to lower-case all the
// strings if that's legal.
case ARMCP::no_modifier: return "none";
@@ -58,12 +57,12 @@ const char *ARMConstantPoolValue::getModifierText() const {
case ARMCP::GOTTPOFF: return "gottpoff";
case ARMCP::TPOFF: return "tpoff";
}
+ llvm_unreachable("Unknown modifier!");
}
int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
- assert(false && "Shouldn't be calling this directly!");
- return -1;
+ llvm_unreachable("Shouldn't be calling this directly!");
}
void
@@ -315,5 +314,6 @@ void ARMConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
}
void ARMConstantPoolMBB::print(raw_ostream &O) const {
+ O << "BB#" << MBB->getNumber();
ARMConstantPoolValue::print(O);
}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 0d0def32b7d8..6b98d446b003 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -1,4 +1,4 @@
-//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
+//===-- ARMConstantPoolValue.h - ARM constantpool value ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp
index 51e68b4553ff..f671317d0948 100644
--- a/lib/Target/ARM/ARMELFWriterInfo.cpp
+++ b/lib/Target/ARM/ARMELFWriterInfo.cpp
@@ -41,43 +41,38 @@ unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
case ARM::reloc_arm_machine_cp_entry:
case ARM::reloc_arm_jt_base:
case ARM::reloc_arm_pic_jt:
- assert(0 && "unsupported ARM relocation type"); break;
-
- case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; break;
- case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; break;
- case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; break;
+ llvm_unreachable("unsupported ARM relocation type");
+
+ case ARM::reloc_arm_branch: return ELF::R_ARM_CALL;
+ case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS;
+ case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC;
default:
- llvm_unreachable("unknown ARM relocation type"); break;
+ llvm_unreachable("unknown ARM relocation type");
}
- return 0;
}
long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
long int Modifier) const {
- assert(0 && "ARMELFWriterInfo::getDefaultAddendForRelTy() not implemented");
- return 0;
+ llvm_unreachable("ARMELFWriterInfo::getDefaultAddendForRelTy() not "
+ "implemented");
}
unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
- assert(0 && "ARMELFWriterInfo::getRelocationTySize() not implemented");
- return 0;
+ llvm_unreachable("ARMELFWriterInfo::getRelocationTySize() not implemented");
}
bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
- assert(0 && "ARMELFWriterInfo::isPCRelativeRel() not implemented");
- return 1;
+ llvm_unreachable("ARMELFWriterInfo::isPCRelativeRel() not implemented");
}
unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
- assert(0 &&
- "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
- return 0;
+ llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not "
+ "implemented");
}
long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset,
unsigned RelOffset,
unsigned RelTy) const {
- assert(0 &&
- "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
- return 0;
+ llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not "
+ "implemented");
}
diff --git a/lib/Target/ARM/ARMELFWriterInfo.h b/lib/Target/ARM/ARMELFWriterInfo.h
index 1c4e5329ac61..6a84f8ac4235 100644
--- a/lib/Target/ARM/ARMELFWriterInfo.h
+++ b/lib/Target/ARM/ARMELFWriterInfo.h
@@ -17,6 +17,7 @@
#include "llvm/Target/TargetELFWriterInfo.h"
namespace llvm {
+ class TargetMachine;
class ARMELFWriterInfo : public TargetELFWriterInfo {
public:
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 7872cb90f4e7..5fc0360528cc 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1,4 +1,4 @@
-//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=//
+//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,7 +19,6 @@
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
-#include "ARMRegisterInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -61,7 +60,7 @@ namespace {
void ExpandVST(MachineBasicBlock::iterator &MBBI);
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
- unsigned Opc, bool IsExt, unsigned NumRegs);
+ unsigned Opc, bool IsExt);
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
};
@@ -99,13 +98,20 @@ namespace {
// Entries for NEON load/store information table. The table is sorted by
// PseudoOpc for fast binary-search lookups.
struct NEONLdStTableEntry {
- unsigned PseudoOpc;
- unsigned RealOpc;
+ uint16_t PseudoOpc;
+ uint16_t RealOpc;
bool IsLoad;
- bool HasWriteBack;
+ bool isUpdating;
+ bool hasWritebackOperand;
NEONRegSpacing RegSpacing;
unsigned char NumRegs; // D registers loaded or stored
unsigned char RegElts; // elements per D register; used for lane ops
+ // FIXME: Temporary flag to denote whether the real instruction takes
+ // a single register (like the encoding) or all of the registers in
+ // the list (like the asm syntax and the isel DAG). When all definitions
+ // are converted to take only the single encoded register, this will
+ // go away.
+ bool copyAllListRegs;
// Comparison methods for binary search of the table.
bool operator<(const NEONLdStTableEntry &TE) const {
@@ -122,243 +128,203 @@ namespace {
}
static const NEONLdStTableEntry NEONLdStTable[] = {
-{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, SingleSpc, 2, 4},
-{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, SingleSpc, 2, 4},
-{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, SingleSpc, 2, 2},
-{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, SingleSpc, 2, 2},
-{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, SingleSpc, 2, 8},
-{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, SingleSpc, 2, 8},
-
-{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 },
-{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, EvenDblSpc, 1, 4 },
-{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 },
-{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, EvenDblSpc, 1, 2 },
-{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 },
-{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 },
-
-{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 },
-{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 },
-{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 },
-{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 },
-
-{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 },
-{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 },
-{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 },
-{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 },
-{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 },
-{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 },
-{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 },
-{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 },
-
-{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, SingleSpc, 2, 4},
-{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, SingleSpc, 2, 4},
-{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, SingleSpc, 2, 2},
-{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, SingleSpc, 2, 2},
-{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, SingleSpc, 2, 8},
-{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, SingleSpc, 2, 8},
-
-{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 },
-{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 },
-{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 },
-{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 },
-{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 },
-{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 },
-{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 },
-{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 },
-{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 },
-{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 },
-
-{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 },
-{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 },
-{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 },
-{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 },
-{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 },
-{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 },
-
-{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 },
-{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 },
-{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 },
-{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 },
-{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 },
-{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 },
-
-{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, SingleSpc, 3, 4},
-{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, SingleSpc, 3, 4},
-{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, SingleSpc, 3, 2},
-{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, SingleSpc, 3, 2},
-{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, SingleSpc, 3, 8},
-{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, SingleSpc, 3, 8},
-
-{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 },
-{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 },
-{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 },
-{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 },
-{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 },
-{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 },
-{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 },
-{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 },
-{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 },
-{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 },
-
-{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 },
-{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 },
-{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 },
-{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 },
-{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 },
-{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 },
-
-{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 },
-{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, OddDblSpc, 3, 4 },
-{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 },
-{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 },
-{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, OddDblSpc, 3, 2 },
-{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 },
-{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 },
-{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, OddDblSpc, 3, 8 },
-{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 },
-
-{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, SingleSpc, 4, 4},
-{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, SingleSpc, 4, 4},
-{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, SingleSpc, 4, 2},
-{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, SingleSpc, 4, 2},
-{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, SingleSpc, 4, 8},
-{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, SingleSpc, 4, 8},
-
-{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 },
-{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 },
-{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 },
-{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 },
-{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 },
-{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 },
-{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 },
-{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 },
-{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 },
-{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 },
-
-{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 },
-{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 },
-{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 },
-{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 },
-{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 },
-{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 },
-
-{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 },
-{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, OddDblSpc, 4, 4 },
-{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 },
-{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 },
-{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, OddDblSpc, 4, 2 },
-{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 },
-{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 },
-{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, OddDblSpc, 4, 8 },
-{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 },
-
-{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 },
-{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true, EvenDblSpc, 1, 4 },
-{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, EvenDblSpc, 1, 2 },
-{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true, EvenDblSpc, 1, 2 },
-{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, EvenDblSpc, 1, 8 },
-{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, EvenDblSpc, 1, 8 },
-
-{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 },
-{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 },
-{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 },
-{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 },
-
-{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 },
-{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 },
-{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 },
-{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 },
-{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 },
-{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 },
-{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 },
-{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 },
-
-{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 },
-{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 },
-{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 },
-{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 },
-{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 },
-{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 },
-{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4},
-{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4},
-{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2},
-{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2},
-
-{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 },
-{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 },
-{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 },
-{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 },
-{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 },
-{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 },
-
-{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 },
-{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 },
-{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 },
-{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 },
-{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 },
-{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 },
-
-{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 },
-{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 },
-{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 },
-{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 },
-{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 },
-{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 },
-{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4},
-{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4},
-{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2},
-{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2},
-
-{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 },
-{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 },
-{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 },
-{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 },
-{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 },
-{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 },
-
-{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 },
-{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, OddDblSpc, 3, 4 },
-{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 },
-{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 },
-{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, OddDblSpc, 3, 2 },
-{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 },
-{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 },
-{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, OddDblSpc, 3, 8 },
-{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 },
-
-{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 },
-{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 },
-{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 },
-{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 },
-{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 },
-{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 },
-{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4},
-{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4},
-{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2},
-{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2},
-
-{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 },
-{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 },
-{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 },
-{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 },
-{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 },
-{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 },
-
-{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 },
-{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, OddDblSpc, 4, 4 },
-{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 },
-{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 },
-{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, OddDblSpc, 4, 2 },
-{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 },
-{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 },
-{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, OddDblSpc, 4, 8 },
-{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 }
+{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
+{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true},
+{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true},
+{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true},
+{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true},
+{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true},
+
+{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
+{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
+
+{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
+{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
+{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true},
+{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true},
+{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true},
+{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true},
+{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true},
+{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true},
+{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true},
+{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true},
+
+{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false},
+
+{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true},
+{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true},
+{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true},
+{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true},
+{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true},
+{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true},
+
+{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
+{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true},
+{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
+
+{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
+
+{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true},
+{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true},
+{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
+{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true},
+{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true},
+{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true},
+{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true},
+{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true},
+
+{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true},
+{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true},
+{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true},
+{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true},
+{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true},
+{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true},
+
+{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
+{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true},
+{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
+
+{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
+
+{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true},
+{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true},
+{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
+{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true},
+{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true},
+{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true},
+{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true},
+{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true},
+
+{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true},
+{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true},
+{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true},
+{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true},
+{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true},
+{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true},
+
+{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false},
+
+{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true},
+{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true},
+{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true},
+{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true},
+{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true},
+{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true},
+{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true},
+{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true},
+{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true},
+{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
+
+{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false},
+
+{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
+{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true},
+{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true},
+{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true},
+{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true},
+
+{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
+{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
+{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
+
+{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true},
+{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true},
+{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true},
+{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true},
+{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true},
+{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true},
+{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true},
+{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true},
+{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true},
+
+{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
+{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true},
+{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true},
+{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true},
+{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true},
+
+{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
+{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
+{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
+
+{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true},
+{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true},
+{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true},
+{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true},
+{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true},
+{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true},
+{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true},
+{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true},
+{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true}
};
/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
/// load or store pseudo instruction.
static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
- unsigned NumEntries = array_lengthof(NEONLdStTable);
+ const unsigned NumEntries = array_lengthof(NEONLdStTable);
#ifndef NDEBUG
// Make sure the table is sorted.
@@ -422,21 +388,22 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
- MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
- if (NumRegs > 2)
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 1 && TableEntry->copyAllListRegs)
+ MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2 && TableEntry->copyAllListRegs)
MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
- if (NumRegs > 3)
+ if (NumRegs > 3 && TableEntry->copyAllListRegs)
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
- if (TableEntry->HasWriteBack)
+ if (TableEntry->isUpdating)
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
- if (TableEntry->HasWriteBack)
+ if (TableEntry->hasWritebackOperand)
MIB.addOperand(MI.getOperand(OpIdx++));
// For an instruction writing double-spaced subregs, the pseudo instruction
@@ -481,24 +448,26 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
- if (TableEntry->HasWriteBack)
+ if (TableEntry->isUpdating)
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
- if (TableEntry->HasWriteBack)
+ if (TableEntry->hasWritebackOperand)
MIB.addOperand(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
- MIB.addReg(D0).addReg(D1);
- if (NumRegs > 2)
+ MIB.addReg(D0);
+ if (NumRegs > 1 && TableEntry->copyAllListRegs)
+ MIB.addReg(D1);
+ if (NumRegs > 2 && TableEntry->copyAllListRegs)
MIB.addReg(D2);
- if (NumRegs > 3)
+ if (NumRegs > 3 && TableEntry->copyAllListRegs)
MIB.addReg(D3);
// Copy the predicate operands.
@@ -558,14 +527,14 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
}
- if (TableEntry->HasWriteBack)
+ if (TableEntry->isUpdating)
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
- if (TableEntry->HasWriteBack)
+ if (TableEntry->hasWritebackOperand)
MIB.addOperand(MI.getOperand(OpIdx++));
// Grab the super-register source.
@@ -599,13 +568,15 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
+ // Transfer memoperands.
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI.eraseFromParent();
}
/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
/// register operands to real instructions with D register operands.
void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
- unsigned Opc, bool IsExt, unsigned NumRegs) {
+ unsigned Opc, bool IsExt) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
@@ -621,11 +592,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
- MIB.addReg(D0).addReg(D1);
- if (NumRegs > 2)
- MIB.addReg(D2);
- if (NumRegs > 3)
- MIB.addReg(D3);
+ MIB.addReg(D0);
// Copy the other source register operand.
MIB.addOperand(MI.getOperand(OpIdx++));
@@ -645,7 +612,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
@@ -809,7 +776,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
- case ARM::Int_eh_sjlj_dispatchsetup: {
+ case ARM::Int_eh_sjlj_dispatchsetup:
+ case ARM::Int_eh_sjlj_dispatchsetup_nofp:
+ case ARM::tInt_eh_sjlj_dispatchsetup: {
MachineFunction &MF = *MI.getParent()->getParent();
const ARMBaseInstrInfo *AII =
static_cast<const ARMBaseInstrInfo*>(TII);
@@ -824,15 +793,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
"base pointer without frame pointer?");
if (AFI->isThumb2Function()) {
- llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
+ emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
} else if (AFI->isThumbFunction()) {
- llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, *TII, RI);
+ emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, *TII, RI);
} else {
- llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, ARMCC::AL, 0,
- *TII);
+ emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0,
+ *TII);
}
// If there's dynamic realignment, adjust for it.
if (RI.needsStackRealignment(MF)) {
@@ -996,6 +965,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
+ MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI.eraseFromParent();
return true;
}
@@ -1026,6 +996,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB->addRegisterKilled(SrcReg, TRI, true);
TransferImpOps(MI, MIB, MIB);
+ MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI.eraseFromParent();
return true;
}
@@ -1057,26 +1028,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return true;
}
- case ARM::VLD1q8Pseudo:
- case ARM::VLD1q16Pseudo:
- case ARM::VLD1q32Pseudo:
- case ARM::VLD1q64Pseudo:
- case ARM::VLD1q8Pseudo_UPD:
- case ARM::VLD1q16Pseudo_UPD:
- case ARM::VLD1q32Pseudo_UPD:
- case ARM::VLD1q64Pseudo_UPD:
- case ARM::VLD2d8Pseudo:
- case ARM::VLD2d16Pseudo:
- case ARM::VLD2d32Pseudo:
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
- case ARM::VLD2d8Pseudo_UPD:
- case ARM::VLD2d16Pseudo_UPD:
- case ARM::VLD2d32Pseudo_UPD:
- case ARM::VLD2q8Pseudo_UPD:
- case ARM::VLD2q16Pseudo_UPD:
- case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD2q8PseudoWB_fixed:
+ case ARM::VLD2q16PseudoWB_fixed:
+ case ARM::VLD2q32PseudoWB_fixed:
+ case ARM::VLD2q8PseudoWB_register:
+ case ARM::VLD2q16PseudoWB_register:
+ case ARM::VLD2q32PseudoWB_register:
case ARM::VLD3d8Pseudo:
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:
@@ -1084,7 +1044,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD3d8Pseudo_UPD:
case ARM::VLD3d16Pseudo_UPD:
case ARM::VLD3d32Pseudo_UPD:
- case ARM::VLD1d64TPseudo_UPD:
case ARM::VLD3q8Pseudo_UPD:
case ARM::VLD3q16Pseudo_UPD:
case ARM::VLD3q32Pseudo_UPD:
@@ -1101,7 +1060,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD4d8Pseudo_UPD:
case ARM::VLD4d16Pseudo_UPD:
case ARM::VLD4d32Pseudo_UPD:
- case ARM::VLD1d64QPseudo_UPD:
case ARM::VLD4q8Pseudo_UPD:
case ARM::VLD4q16Pseudo_UPD:
case ARM::VLD4q32Pseudo_UPD:
@@ -1111,18 +1069,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD4q8oddPseudo_UPD:
case ARM::VLD4q16oddPseudo_UPD:
case ARM::VLD4q32oddPseudo_UPD:
- case ARM::VLD1DUPq8Pseudo:
- case ARM::VLD1DUPq16Pseudo:
- case ARM::VLD1DUPq32Pseudo:
- case ARM::VLD1DUPq8Pseudo_UPD:
- case ARM::VLD1DUPq16Pseudo_UPD:
- case ARM::VLD1DUPq32Pseudo_UPD:
- case ARM::VLD2DUPd8Pseudo:
- case ARM::VLD2DUPd16Pseudo:
- case ARM::VLD2DUPd32Pseudo:
- case ARM::VLD2DUPd8Pseudo_UPD:
- case ARM::VLD2DUPd16Pseudo_UPD:
- case ARM::VLD2DUPd32Pseudo_UPD:
case ARM::VLD3DUPd8Pseudo:
case ARM::VLD3DUPd16Pseudo:
case ARM::VLD3DUPd32Pseudo:
@@ -1138,26 +1084,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandVLD(MBBI);
return true;
- case ARM::VST1q8Pseudo:
- case ARM::VST1q16Pseudo:
- case ARM::VST1q32Pseudo:
- case ARM::VST1q64Pseudo:
- case ARM::VST1q8Pseudo_UPD:
- case ARM::VST1q16Pseudo_UPD:
- case ARM::VST1q32Pseudo_UPD:
- case ARM::VST1q64Pseudo_UPD:
- case ARM::VST2d8Pseudo:
- case ARM::VST2d16Pseudo:
- case ARM::VST2d32Pseudo:
case ARM::VST2q8Pseudo:
case ARM::VST2q16Pseudo:
case ARM::VST2q32Pseudo:
- case ARM::VST2d8Pseudo_UPD:
- case ARM::VST2d16Pseudo_UPD:
- case ARM::VST2d32Pseudo_UPD:
- case ARM::VST2q8Pseudo_UPD:
- case ARM::VST2q16Pseudo_UPD:
- case ARM::VST2q32Pseudo_UPD:
+ case ARM::VST2q8PseudoWB_fixed:
+ case ARM::VST2q16PseudoWB_fixed:
+ case ARM::VST2q32PseudoWB_fixed:
+ case ARM::VST2q8PseudoWB_register:
+ case ARM::VST2q16PseudoWB_register:
+ case ARM::VST2q32PseudoWB_register:
case ARM::VST3d8Pseudo:
case ARM::VST3d16Pseudo:
case ARM::VST3d32Pseudo:
@@ -1165,7 +1100,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST3d8Pseudo_UPD:
case ARM::VST3d16Pseudo_UPD:
case ARM::VST3d32Pseudo_UPD:
- case ARM::VST1d64TPseudo_UPD:
+ case ARM::VST1d64TPseudoWB_fixed:
+ case ARM::VST1d64TPseudoWB_register:
case ARM::VST3q8Pseudo_UPD:
case ARM::VST3q16Pseudo_UPD:
case ARM::VST3q32Pseudo_UPD:
@@ -1182,7 +1118,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST4d8Pseudo_UPD:
case ARM::VST4d16Pseudo_UPD:
case ARM::VST4d32Pseudo_UPD:
- case ARM::VST1d64QPseudo_UPD:
+ case ARM::VST1d64QPseudoWB_fixed:
+ case ARM::VST1d64QPseudoWB_register:
case ARM::VST4q8Pseudo_UPD:
case ARM::VST4q16Pseudo_UPD:
case ARM::VST4q32Pseudo_UPD:
@@ -1270,15 +1207,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandLaneOp(MBBI);
return true;
- case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
- case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
- case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
- case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
- case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
- case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+ case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
+ case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
+ case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
+ case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
}
-
- return false;
}
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index dc8e54ddf957..2e1eaca85b5b 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -16,7 +16,6 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMCallingConv.h"
-#include "ARMRegisterInfo.h"
#include "ARMTargetMachine.h"
#include "ARMSubtarget.h"
#include "ARMConstantPoolValue.h"
@@ -37,7 +36,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -90,7 +88,7 @@ class ARMFastISel : public FastISel {
ARMFunctionInfo *AFI;
// Convenience variables to avoid some queries.
- bool isThumb;
+ bool isThumb2;
LLVMContext *Context;
public:
@@ -101,7 +99,7 @@ class ARMFastISel : public FastISel {
TLI(*TM.getTargetLowering()) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
- isThumb = AFI->isThumbFunction();
+ isThumb2 = AFI->isThumbFunction();
Context = &funcInfo.Fn->getContext();
}
@@ -148,6 +146,8 @@ class ARMFastISel : public FastISel {
virtual bool TargetSelectInstruction(const Instruction *I);
virtual unsigned TargetMaterializeConstant(const Constant *C);
virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
+ virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI);
#include "ARMGenFastISel.inc"
@@ -156,27 +156,40 @@ class ARMFastISel : public FastISel {
bool SelectLoad(const Instruction *I);
bool SelectStore(const Instruction *I);
bool SelectBranch(const Instruction *I);
+ bool SelectIndirectBr(const Instruction *I);
bool SelectCmp(const Instruction *I);
bool SelectFPExt(const Instruction *I);
bool SelectFPTrunc(const Instruction *I);
- bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
- bool SelectSIToFP(const Instruction *I);
- bool SelectFPToSI(const Instruction *I);
- bool SelectSDiv(const Instruction *I);
- bool SelectSRem(const Instruction *I);
- bool SelectCall(const Instruction *I);
+ bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectIToFP(const Instruction *I, bool isSigned);
+ bool SelectFPToI(const Instruction *I, bool isSigned);
+ bool SelectDiv(const Instruction *I, bool isSigned);
+ bool SelectRem(const Instruction *I, bool isSigned);
+ bool SelectCall(const Instruction *I, const char *IntrMemName);
+ bool SelectIntrinsicCall(const IntrinsicInst &I);
bool SelectSelect(const Instruction *I);
bool SelectRet(const Instruction *I);
- bool SelectIntCast(const Instruction *I);
+ bool SelectTrunc(const Instruction *I);
+ bool SelectIntExt(const Instruction *I);
// Utility routines.
private:
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
- bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr);
- bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+ bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment = 0, bool isZExt = true,
+ bool allocReg = true);
+
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment = 0);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
- void ARMSimplifyAddress(Address &Addr, EVT VT);
+ void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
+ bool ARMIsMemCpySmall(uint64_t Len);
+ bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len);
+ unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
unsigned ARMMaterializeInt(const Constant *C, EVT VT);
unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
@@ -186,8 +199,6 @@ class ARMFastISel : public FastISel {
// Call handling routines.
private:
- bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
- unsigned &ResultReg);
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
SmallVectorImpl<unsigned> &ArgRegs,
@@ -208,7 +219,7 @@ class ARMFastISel : public FastISel {
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
void AddLoadStoreOperands(EVT VT, Address &Addr,
const MachineInstrBuilder &MIB,
- unsigned Flags);
+ unsigned Flags, bool useAM3);
};
} // end anonymous namespace
@@ -219,8 +230,7 @@ class ARMFastISel : public FastISel {
// we don't care about implicit defs here, just places we'll need to add a
// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.hasOptionalDef())
+ if (!MI->hasOptionalDef())
return false;
// Look to see if our OptionalDef is defining CPSR or CCR.
@@ -290,10 +300,10 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -310,11 +320,11 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill));
@@ -333,12 +343,12 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addReg(Op2, Op2IsKill * RegState::Kill));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
@@ -357,11 +367,11 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addImm(Imm));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addImm(Imm));
@@ -379,11 +389,11 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addFPImm(FPImm));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addFPImm(FPImm));
@@ -402,12 +412,12 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addImm(Imm));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
@@ -425,10 +435,10 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addImm(Imm));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addImm(Imm));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -444,10 +454,10 @@ unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- if (II.getNumDefs() >= 1)
+ if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addImm(Imm1).addImm(Imm2));
- else {
+ } else {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addImm(Imm1).addImm(Imm2));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -464,9 +474,10 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
"Cannot yet extract from physregs");
+
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DL, TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill), Idx));
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx));
return ResultReg;
}
@@ -477,7 +488,7 @@ unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(ARM::VMOVRS), MoveReg)
+ TII.get(ARM::VMOVSR), MoveReg)
.addReg(SrcReg));
return MoveReg;
}
@@ -487,7 +498,7 @@ unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(ARM::VMOVSR), MoveReg)
+ TII.get(ARM::VMOVRS), MoveReg)
.addReg(SrcReg));
return MoveReg;
}
@@ -541,22 +552,42 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
- // For now 32-bit only.
- if (VT != MVT::i32) return false;
-
- unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
+ return false;
// If we can do this in a single instruction without a constant pool entry
// do so now.
const ConstantInt *CI = cast<ConstantInt>(C);
- if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) {
- unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16;
+ if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
+ unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
+ unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Opc), DestReg)
- .addImm(CI->getSExtValue()));
- return DestReg;
+ TII.get(Opc), ImmReg)
+ .addImm(CI->getZExtValue()));
+ return ImmReg;
}
+ // Use MVN to emit negative constants.
+ if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
+ unsigned Imm = (unsigned)~(CI->getSExtValue());
+ bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+ (ARM_AM::getSOImmVal(Imm) != -1);
+ if (UseImm) {
+ unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
+ unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ImmReg)
+ .addImm(Imm));
+ return ImmReg;
+ }
+ }
+
+ // Load from constant pool. For now 32-bit only.
+ if (VT != MVT::i32)
+ return false;
+
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+
// MachineConstantPool wants an explicit alignment.
unsigned Align = TD.getPrefTypeAlignment(C->getType());
if (Align == 0) {
@@ -565,7 +596,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
}
unsigned Idx = MCP.getConstantPoolIndex(C, Align);
- if (isThumb)
+ if (isThumb2)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::t2LDRpci), DestReg)
.addConstantPoolIndex(Idx));
@@ -586,44 +617,69 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
Reloc::Model RelocM = TM.getRelocationModel();
// TODO: Need more magic for ARM PIC.
- if (!isThumb && (RelocM == Reloc::PIC_)) return 0;
-
- // MachineConstantPool wants an explicit alignment.
- unsigned Align = TD.getPrefTypeAlignment(GV->getType());
- if (Align == 0) {
- // TODO: Figure out if this is correct.
- Align = TD.getTypeAllocSize(GV->getType());
- }
+ if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0;
- // Grab index.
- unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
- unsigned Id = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
- ARMCP::CPValue,
- PCAdj);
- unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
-
- // Load value.
- MachineInstrBuilder MIB;
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
- if (isThumb) {
- unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
- .addConstantPoolIndex(Idx);
- if (RelocM == Reloc::PIC_)
- MIB.addImm(Id);
+
+ // Use movw+movt when possible, it avoids constant pool entries.
+ // Darwin targets don't support movt with Reloc::Static, see
+ // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support
+ // static movt relocations.
+ if (Subtarget->useMovt() &&
+ Subtarget->isTargetDarwin() == (RelocM != Reloc::Static)) {
+ unsigned Opc;
+ switch (RelocM) {
+ case Reloc::PIC_:
+ Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
+ break;
+ case Reloc::DynamicNoPIC:
+ Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn;
+ break;
+ default:
+ Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
+ break;
+ }
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg).addGlobalAddress(GV));
} else {
- // The extra immediate is for addrmode2.
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
- DestReg)
- .addConstantPoolIndex(Idx)
- .addImm(0);
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(GV->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(GV->getType());
+ }
+
+ // Grab index.
+ unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
+ (Subtarget->isThumb() ? 4 : 8);
+ unsigned Id = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
+ ARMCP::CPValue,
+ PCAdj);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+ // Load value.
+ MachineInstrBuilder MIB;
+ if (isThumb2) {
+ unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx);
+ if (RelocM == Reloc::PIC_)
+ MIB.addImm(Id);
+ } else {
+ // The extra immediate is for addrmode2.
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
+ DestReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0);
+ }
+ AddOptionalDefs(MIB);
}
- AddOptionalDefs(MIB);
if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+ MachineInstrBuilder MIB;
unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
- if (isThumb)
+ if (isThumb2)
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::t2LDRi12), NewDestReg)
.addReg(DestReg)
@@ -656,6 +712,8 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
return 0;
}
+// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
+
unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
// Don't handle dynamic allocas.
if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
@@ -669,10 +727,10 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
// This will get lowered later into the correct offsets and registers
// via rewriteXFrameIndex.
if (SI != FuncInfo.StaticAllocaMap.end()) {
- TargetRegisterClass* RC = TLI.getRegClassFor(VT);
+ const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
unsigned ResultReg = createResultReg(RC);
- unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addFrameIndex(SI->second)
.addImm(0));
@@ -699,7 +757,7 @@ bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
// If this is a type than can be sign or zero-extended to a basic operation
// go ahead and accept it now.
- if (VT == MVT::i8 || VT == MVT::i16)
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
return true;
return false;
@@ -813,35 +871,33 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
}
}
- // Materialize the global variable's address into a reg which can
- // then be used later to load the variable.
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
- unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType()));
- if (Tmp == 0) return false;
-
- Addr.Base.Reg = Tmp;
- return true;
- }
-
// Try to get this in a register if nothing else has worked.
if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
return Addr.Base.Reg != 0;
}
-void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
assert(VT.isSimple() && "Non-simple types are invalid here!");
bool needsLowering = false;
switch (VT.getSimpleVT().SimpleTy) {
- default:
- assert(false && "Unhandled load/store type!");
+ default: llvm_unreachable("Unhandled load/store type!");
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
- // Integer loads/stores handle 12-bit offsets.
- needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+ if (!useAM3) {
+ // Integer loads/stores handle 12-bit offsets.
+ needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+ // Handle negative offsets.
+ if (needsLowering && isThumb2)
+ needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
+ Addr.Offset > -256);
+ } else {
+ // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
+ needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
+ }
break;
case MVT::f32:
case MVT::f64:
@@ -854,11 +910,11 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
// put the alloca address into a register, set the base type back to
// register and continue. This should almost never happen.
if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
- TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
- ARM::GPRRegisterClass;
+ const TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass
+ : ARM::GPRRegisterClass;
unsigned ResultReg = createResultReg(RC);
- unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addFrameIndex(Addr.Base.FI)
.addImm(0));
@@ -877,7 +933,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
const MachineInstrBuilder &MIB,
- unsigned Flags) {
+ unsigned Flags, bool useAM3) {
// addrmode5 output depends on the selection dag addressing dividing the
// offset by 4 that it then later multiplies. Do this here as well.
if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
@@ -897,60 +953,127 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
// Now add the rest of the operands.
MIB.addFrameIndex(FI);
- // ARM halfword load/stores need an additional operand.
- if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
-
- MIB.addImm(Addr.Offset);
+ // ARM halfword load/stores and signed byte loads need an additional
+ // operand.
+ if (useAM3) {
+ signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
+ MIB.addReg(0);
+ MIB.addImm(Imm);
+ } else {
+ MIB.addImm(Addr.Offset);
+ }
MIB.addMemOperand(MMO);
} else {
// Now add the rest of the operands.
MIB.addReg(Addr.Base.Reg);
- // ARM halfword load/stores need an additional operand.
- if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
-
- MIB.addImm(Addr.Offset);
+ // ARM halfword load/stores and signed byte loads need an additional
+ // operand.
+ if (useAM3) {
+ signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
+ MIB.addReg(0);
+ MIB.addImm(Imm);
+ } else {
+ MIB.addImm(Addr.Offset);
+ }
}
AddOptionalDefs(MIB);
}
-bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) {
-
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment, bool isZExt, bool allocReg) {
assert(VT.isSimple() && "Non-simple types are invalid here!");
unsigned Opc;
- TargetRegisterClass *RC;
+ bool useAM3 = false;
+ bool needVMOV = false;
+ const TargetRegisterClass *RC;
switch (VT.getSimpleVT().SimpleTy) {
// This is mostly going to be Neon/vector support.
default: return false;
- case MVT::i16:
- Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+ case MVT::i1:
+ case MVT::i8:
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
+ else
+ Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
+ } else {
+ if (isZExt) {
+ Opc = ARM::LDRBi12;
+ } else {
+ Opc = ARM::LDRSB;
+ useAM3 = true;
+ }
+ }
RC = ARM::GPRRegisterClass;
break;
- case MVT::i8:
- Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12;
+ case MVT::i16:
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
+ else
+ Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
+ } else {
+ Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
+ useAM3 = true;
+ }
RC = ARM::GPRRegisterClass;
break;
case MVT::i32:
- Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12;
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ Opc = ARM::t2LDRi8;
+ else
+ Opc = ARM::t2LDRi12;
+ } else {
+ Opc = ARM::LDRi12;
+ }
RC = ARM::GPRRegisterClass;
break;
case MVT::f32:
- Opc = ARM::VLDRS;
- RC = TLI.getRegClassFor(VT);
+ if (!Subtarget->hasVFP2()) return false;
+ // Unaligned loads need special handling. Floats require word-alignment.
+ if (Alignment && Alignment < 4) {
+ needVMOV = true;
+ VT = MVT::i32;
+ Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
+ RC = ARM::GPRRegisterClass;
+ } else {
+ Opc = ARM::VLDRS;
+ RC = TLI.getRegClassFor(VT);
+ }
break;
case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ // FIXME: Unaligned loads need special handling. Doublewords require
+ // word-alignment.
+ if (Alignment && Alignment < 4)
+ return false;
+
Opc = ARM::VLDRD;
RC = TLI.getRegClassFor(VT);
break;
}
// Simplify this down to something we can handle.
- ARMSimplifyAddress(Addr, VT);
+ ARMSimplifyAddress(Addr, VT, useAM3);
// Create the base instruction, then add the operands.
- ResultReg = createResultReg(RC);
+ if (allocReg)
+ ResultReg = createResultReg(RC);
+ assert (ResultReg > 255 && "Expected an allocated virtual register.");
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg);
- AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad);
+ AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
+
+ // If we had an unaligned load of a float we've converted it to an regular
+ // load. Now we must move from the GRP to the FP register.
+ if (needVMOV) {
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVSR), MoveReg)
+ .addReg(ResultReg));
+ ResultReg = MoveReg;
+ }
return true;
}
@@ -969,51 +1092,92 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
unsigned ResultReg;
- if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+ if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+ return false;
UpdateValueMap(I, ResultReg);
return true;
}
-bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment) {
unsigned StrOpc;
+ bool useAM3 = false;
switch (VT.getSimpleVT().SimpleTy) {
// This is mostly going to be Neon/vector support.
default: return false;
case MVT::i1: {
- unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass :
+ unsigned Res = createResultReg(isThumb2 ? ARM::tGPRRegisterClass :
ARM::GPRRegisterClass);
- unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+ unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), Res)
.addReg(SrcReg).addImm(1));
SrcReg = Res;
} // Fallthrough here.
case MVT::i8:
- StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12;
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ StrOpc = ARM::t2STRBi8;
+ else
+ StrOpc = ARM::t2STRBi12;
+ } else {
+ StrOpc = ARM::STRBi12;
+ }
break;
case MVT::i16:
- StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ StrOpc = ARM::t2STRHi8;
+ else
+ StrOpc = ARM::t2STRHi12;
+ } else {
+ StrOpc = ARM::STRH;
+ useAM3 = true;
+ }
break;
case MVT::i32:
- StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12;
+ if (isThumb2) {
+ if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+ StrOpc = ARM::t2STRi8;
+ else
+ StrOpc = ARM::t2STRi12;
+ } else {
+ StrOpc = ARM::STRi12;
+ }
break;
case MVT::f32:
if (!Subtarget->hasVFP2()) return false;
- StrOpc = ARM::VSTRS;
+ // Unaligned stores need special handling. Floats require word-alignment.
+ if (Alignment && Alignment < 4) {
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRS), MoveReg)
+ .addReg(SrcReg));
+ SrcReg = MoveReg;
+ VT = MVT::i32;
+ StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
+ } else {
+ StrOpc = ARM::VSTRS;
+ }
break;
case MVT::f64:
if (!Subtarget->hasVFP2()) return false;
+ // FIXME: Unaligned stores need special handling. Doublewords require
+ // word-alignment.
+ if (Alignment && Alignment < 4)
+ return false;
+
StrOpc = ARM::VSTRD;
break;
}
// Simplify this down to something we can handle.
- ARMSimplifyAddress(Addr, VT);
+ ARMSimplifyAddress(Addr, VT, useAM3);
// Create the base instruction, then add the operands.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(StrOpc))
- .addReg(SrcReg, getKillRegState(true));
- AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore);
+ .addReg(SrcReg);
+ AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
return true;
}
@@ -1039,7 +1203,8 @@ bool ARMFastISel::SelectStore(const Instruction *I) {
if (!ARMComputeAddress(I->getOperand(1), Addr))
return false;
- if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
+ if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+ return false;
return true;
}
@@ -1099,30 +1264,8 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// If we can, avoid recomputing the compare - redoing it could lead to wonky
// behavior.
- // TODO: Factor this out.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
- MVT SourceVT;
- Type *Ty = CI->getOperand(0)->getType();
- if (CI->hasOneUse() && (CI->getParent() == I->getParent())
- && isTypeLegal(Ty, SourceVT)) {
- bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
- if (isFloat && !Subtarget->hasVFP2())
- return false;
-
- unsigned CmpOpc;
- switch (SourceVT.SimpleTy) {
- default: return false;
- // TODO: Verify compares.
- case MVT::f32:
- CmpOpc = ARM::VCMPES;
- break;
- case MVT::f64:
- CmpOpc = ARM::VCMPED;
- break;
- case MVT::i32:
- CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
- break;
- }
+ if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
// Get the compare predicate.
// Try to take advantage of fallthrough opportunities.
@@ -1137,23 +1280,11 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// We may not handle every CC for now.
if (ARMPred == ARMCC::AL) return false;
- unsigned Arg1 = getRegForValue(CI->getOperand(0));
- if (Arg1 == 0) return false;
-
- unsigned Arg2 = getRegForValue(CI->getOperand(1));
- if (Arg2 == 0) return false;
-
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CmpOpc))
- .addReg(Arg1).addReg(Arg2));
-
- // For floating point we need to move the result to a comparison register
- // that we can then use for branches.
- if (isFloat)
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(ARM::FMSTAT)));
+ // Emit the compare.
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ return false;
- unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
.addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
FastEmitBranch(FBB, DL);
@@ -1164,7 +1295,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
MVT SourceVT;
if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
(isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
- unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
unsigned OpReg = getRegForValue(TI->getOperand(0));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TstOpc))
@@ -1176,7 +1307,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
CCMode = ARMCC::EQ;
}
- unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
@@ -1184,6 +1315,12 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
FuncInfo.MBB->addSuccessor(TBB);
return true;
}
+ } else if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(BI->getCondition())) {
+ uint64_t Imm = CI->getZExtValue();
+ MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
+ FastEmitBranch(Target, DL);
+ return true;
}
unsigned CmpReg = getRegForValue(BI->getCondition());
@@ -1196,7 +1333,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// Regardless, the compare has been done in the predecessor block,
// and it left a value for us in a virtual register. Ergo, we test
// the one-bit value left in the virtual register.
- unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
.addReg(CmpReg).addImm(1));
@@ -1206,7 +1343,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
CCMode = ARMCC::EQ;
}
- unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
FastEmitBranch(FBB, DL);
@@ -1214,70 +1351,155 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
return true;
}
-bool ARMFastISel::SelectCmp(const Instruction *I) {
- const CmpInst *CI = cast<CmpInst>(I);
+bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
+ unsigned AddrReg = getRegForValue(I->getOperand(0));
+ if (AddrReg == 0) return false;
- MVT VT;
- Type *Ty = CI->getOperand(0)->getType();
- if (!isTypeLegal(Ty, VT))
- return false;
+ unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
+ .addReg(AddrReg));
+ return true;
+}
- bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt) {
+ Type *Ty = Src1Value->getType();
+ EVT SrcVT = TLI.getValueType(Ty, true);
+ if (!SrcVT.isSimple()) return false;
+
+ bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
if (isFloat && !Subtarget->hasVFP2())
return false;
+ // Check to see if the 2nd operand is a constant that we can encode directly
+ // in the compare.
+ int Imm = 0;
+ bool UseImm = false;
+ bool isNegativeImm = false;
+ // FIXME: At -O0 we don't have anything that canonicalizes operand order.
+ // Thus, Src1Value may be a ConstantInt, but we're missing it.
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
+ if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
+ SrcVT == MVT::i1) {
+ const APInt &CIVal = ConstInt->getValue();
+ Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
+ // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
+ // then a cmn, because there is no way to represent 2147483648 as a
+ // signed 32-bit int.
+ if (Imm < 0 && Imm != (int)0x80000000) {
+ isNegativeImm = true;
+ Imm = -Imm;
+ }
+ UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+ (ARM_AM::getSOImmVal(Imm) != -1);
+ }
+ } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
+ if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
+ if (ConstFP->isZero() && !ConstFP->isNegative())
+ UseImm = true;
+ }
+
unsigned CmpOpc;
- unsigned CondReg;
- switch (VT.SimpleTy) {
+ bool isICmp = true;
+ bool needsExt = false;
+ switch (SrcVT.getSimpleVT().SimpleTy) {
default: return false;
// TODO: Verify compares.
case MVT::f32:
- CmpOpc = ARM::VCMPES;
- CondReg = ARM::FPSCR;
+ isICmp = false;
+ CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
break;
case MVT::f64:
- CmpOpc = ARM::VCMPED;
- CondReg = ARM::FPSCR;
+ isICmp = false;
+ CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ needsExt = true;
+ // Intentional fall-through.
case MVT::i32:
- CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
- CondReg = ARM::CPSR;
+ if (isThumb2) {
+ if (!UseImm)
+ CmpOpc = ARM::t2CMPrr;
+ else
+ CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri;
+ } else {
+ if (!UseImm)
+ CmpOpc = ARM::CMPrr;
+ else
+ CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri;
+ }
break;
}
- // Get the compare predicate.
- ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+ unsigned SrcReg1 = getRegForValue(Src1Value);
+ if (SrcReg1 == 0) return false;
- // We may not handle every CC for now.
- if (ARMPred == ARMCC::AL) return false;
+ unsigned SrcReg2 = 0;
+ if (!UseImm) {
+ SrcReg2 = getRegForValue(Src2Value);
+ if (SrcReg2 == 0) return false;
+ }
- unsigned Arg1 = getRegForValue(CI->getOperand(0));
- if (Arg1 == 0) return false;
+ // We have i1, i8, or i16, we need to either zero extend or sign extend.
+ if (needsExt) {
+ SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
+ if (SrcReg1 == 0) return false;
+ if (!UseImm) {
+ SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
+ if (SrcReg2 == 0) return false;
+ }
+ }
- unsigned Arg2 = getRegForValue(CI->getOperand(1));
- if (Arg2 == 0) return false;
+ if (!UseImm) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CmpOpc))
+ .addReg(SrcReg1).addReg(SrcReg2));
+ } else {
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(SrcReg1);
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
- .addReg(Arg1).addReg(Arg2));
+ // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
+ if (isICmp)
+ MIB.addImm(Imm);
+ AddOptionalDefs(MIB);
+ }
// For floating point we need to move the result to a comparison register
// that we can then use for branches.
- if (isFloat)
+ if (Ty->isFloatTy() || Ty->isDoubleTy())
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::FMSTAT)));
+ return true;
+}
+
+bool ARMFastISel::SelectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+
+ // Get the compare predicate.
+ ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ // Emit the compare.
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ return false;
// Now set a register based on the comparison. Explicitly set the predicates
// here.
- unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi;
- TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass
- : ARM::GPRRegisterClass;
+ unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
+ const TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass
+ : ARM::GPRRegisterClass;
unsigned DestReg = createResultReg(RC);
- Constant *Zero
- = ConstantInt::get(Type::getInt32Ty(*Context), 0);
+ Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
unsigned ZeroReg = TargetMaterializeConstant(Zero);
+ // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
.addReg(ZeroReg).addImm(1)
- .addImm(ARMPred).addReg(CondReg);
+ .addImm(ARMPred).addReg(ARM::CPSR);
UpdateValueMap(I, DestReg);
return true;
@@ -1321,7 +1543,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
return true;
}
-bool ARMFastISel::SelectSIToFP(const Instruction *I) {
+bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
// Make sure we have VFP.
if (!Subtarget->hasVFP2()) return false;
@@ -1330,21 +1552,30 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) {
if (!isTypeLegal(Ty, DstVT))
return false;
- // FIXME: Handle sign-extension where necessary.
- if (!I->getOperand(0)->getType()->isIntegerTy(32))
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
return false;
- unsigned Op = getRegForValue(I->getOperand(0));
- if (Op == 0) return false;
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0) return false;
+
+ // Handle sign-extension.
+ if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
+ EVT DestVT = MVT::i32;
+ SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT,
+ /*isZExt*/!isSigned);
+ if (SrcReg == 0) return false;
+ }
// The conversion routine works on fp-reg to fp-reg and the operand above
// was an integer, move it to the fp registers if possible.
- unsigned FP = ARMMoveToFPReg(MVT::f32, Op);
+ unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
if (FP == 0) return false;
unsigned Opc;
- if (Ty->isFloatTy()) Opc = ARM::VSITOS;
- else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
+ if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
+ else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
else return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
@@ -1355,7 +1586,7 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) {
return true;
}
-bool ARMFastISel::SelectFPToSI(const Instruction *I) {
+bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
// Make sure we have VFP.
if (!Subtarget->hasVFP2()) return false;
@@ -1369,11 +1600,11 @@ bool ARMFastISel::SelectFPToSI(const Instruction *I) {
unsigned Opc;
Type *OpTy = I->getOperand(0)->getType();
- if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
- else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
+ if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
+ else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
else return false;
- // f64->s32 or f32->s32 both need an intermediate f32 reg.
+ // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
ResultReg)
@@ -1401,22 +1632,54 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
if (CondReg == 0) return false;
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
- unsigned Op2Reg = getRegForValue(I->getOperand(2));
- if (Op2Reg == 0) return false;
- unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ // Check to see if we can use an immediate in the conditional move.
+ int Imm = 0;
+ bool UseImm = false;
+ bool isNegativeImm = false;
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
+ assert (VT == MVT::i32 && "Expecting an i32.");
+ Imm = (int)ConstInt->getValue().getZExtValue();
+ if (Imm < 0) {
+ isNegativeImm = true;
+ Imm = ~Imm;
+ }
+ UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+ (ARM_AM::getSOImmVal(Imm) != -1);
+ }
+
+ unsigned Op2Reg = 0;
+ if (!UseImm) {
+ Op2Reg = getRegForValue(I->getOperand(2));
+ if (Op2Reg == 0) return false;
+ }
+
+ unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
- .addReg(CondReg).addImm(1));
+ .addReg(CondReg).addImm(0));
+
+ unsigned MovCCOpc;
+ if (!UseImm) {
+ MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
+ } else {
+ if (!isNegativeImm) {
+ MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
+ } else {
+ MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
+ }
+ }
unsigned ResultReg = createResultReg(RC);
- unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
- .addReg(Op1Reg).addReg(Op2Reg)
- .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ if (!UseImm)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+ .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+ .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR);
UpdateValueMap(I, ResultReg);
return true;
}
-bool ARMFastISel::SelectSDiv(const Instruction *I) {
+bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
MVT VT;
Type *Ty = I->getType();
if (!isTypeLegal(Ty, VT))
@@ -1430,21 +1693,21 @@ bool ARMFastISel::SelectSDiv(const Instruction *I) {
// Otherwise emit a libcall.
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i8)
- LC = RTLIB::SDIV_I8;
+ LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
else if (VT == MVT::i16)
- LC = RTLIB::SDIV_I16;
+ LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
else if (VT == MVT::i32)
- LC = RTLIB::SDIV_I32;
+ LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
else if (VT == MVT::i64)
- LC = RTLIB::SDIV_I64;
+ LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
else if (VT == MVT::i128)
- LC = RTLIB::SDIV_I128;
+ LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
return ARMEmitLibcall(I, LC);
}
-bool ARMFastISel::SelectSRem(const Instruction *I) {
+bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
MVT VT;
Type *Ty = I->getType();
if (!isTypeLegal(Ty, VT))
@@ -1452,21 +1715,59 @@ bool ARMFastISel::SelectSRem(const Instruction *I) {
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i8)
- LC = RTLIB::SREM_I8;
+ LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
else if (VT == MVT::i16)
- LC = RTLIB::SREM_I16;
+ LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
else if (VT == MVT::i32)
- LC = RTLIB::SREM_I32;
+ LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
else if (VT == MVT::i64)
- LC = RTLIB::SREM_I64;
+ LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
else if (VT == MVT::i128)
- LC = RTLIB::SREM_I128;
+ LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
return ARMEmitLibcall(I, LC);
}
-bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
+bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ // We can get here in the case when we have a binary operation on a non-legal
+ // type and the target independent selector doesn't know how to handle it.
+ if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+ return false;
+
+ unsigned Opc;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::ADD:
+ Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
+ break;
+ case ISD::OR:
+ Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
+ break;
+ case ISD::SUB:
+ Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
+ break;
+ }
+
+ unsigned SrcReg1 = getRegForValue(I->getOperand(0));
+ if (SrcReg1 == 0) return false;
+
+ // TODO: Often the 2nd operand is an immediate, which can be encoded directly
+ // in the instruction, rather then materializing the value in a register.
+ unsigned SrcReg2 = getRegForValue(I->getOperand(1));
+ if (SrcReg2 == 0) return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(SrcReg1).addReg(SrcReg2));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
EVT VT = TLI.getValueType(I->getType(), true);
// We can get here in the case when we want to use NEON for our fp
@@ -1478,12 +1779,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
if (isFloat && !Subtarget->hasVFP2())
return false;
- unsigned Op1 = getRegForValue(I->getOperand(0));
- if (Op1 == 0) return false;
-
- unsigned Op2 = getRegForValue(I->getOperand(1));
- if (Op2 == 0) return false;
-
unsigned Opc;
bool is64bit = VT == MVT::f64 || VT == MVT::i64;
switch (ISDOpcode) {
@@ -1498,6 +1793,12 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
Opc = is64bit ? ARM::VMULD : ARM::VMULS;
break;
}
+ unsigned Op1 = getRegForValue(I->getOperand(0));
+ if (Op1 == 0) return false;
+
+ unsigned Op2 = getRegForValue(I->getOperand(1));
+ if (Op2 == 0) return false;
+
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
@@ -1508,18 +1809,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
// Call Handling Code
-bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src,
- EVT SrcVT, unsigned &ResultReg) {
- unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
- Src, /*TODO: Kill=*/false);
-
- if (RR != 0) {
- ResultReg = RR;
- return true;
- } else
- return false;
-}
-
// This is largely taken directly from CCAssignFnForNode - we don't support
// varargs in FastISel so that part has been removed.
// TODO: We may not support all of this.
@@ -1536,7 +1825,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
// Use target triple & subtarget features to do actual dispatch.
if (Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2() &&
- FloatABIType == FloatABI::Hard)
+ TM.Options.FloatABIType == FloatABI::Hard)
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
else
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
@@ -1548,11 +1837,6 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
- case CallingConv::GHC:
- if (Return)
- llvm_unreachable("Can't return in GHC call convention");
- else
- return CC_ARM_APCS_GHC;
}
}
@@ -1567,6 +1851,48 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
+ // Check that we can handle all of the arguments. If we can't, then bail out
+ // now before we add code to the MBB.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // We don't handle NEON/vector parameters yet.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+ return false;
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ continue;
+ } else if (VA.needsCustom()) {
+ // TODO: We need custom lowering for vector (v2f64) args.
+ if (VA.getLocVT() != MVT::f64 ||
+ // TODO: Only handle register args for now.
+ !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
+ return false;
+ } else {
+ switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) {
+ default:
+ return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2())
+ return false;
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2())
+ return false;
+ break;
+ }
+ }
+ }
+
+ // At the point, we are able to handle the call's arguments in fast isel.
+
// Get a count of how many bytes are to be pushed on the stack.
NumBytes = CCInfo.getNextStackOffset();
@@ -1582,41 +1908,26 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
unsigned Arg = ArgRegs[VA.getValNo()];
MVT ArgVT = ArgVTs[VA.getValNo()];
- // We don't handle NEON/vector parameters yet.
- if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
- return false;
+ assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
+ "We don't handle NEON/vector parameters yet.");
// Handle arg promotion, etc.
switch (VA.getLocInfo()) {
case CCValAssign::Full: break;
case CCValAssign::SExt: {
- bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
- Emitted = true;
- ArgVT = VA.getLocVT();
+ MVT DestVT = VA.getLocVT();
+ Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
+ assert (Arg != 0 && "Failed to emit a sext");
+ ArgVT = DestVT;
break;
}
+ case CCValAssign::AExt:
+ // Intentional fall-through. Handle AExt and ZExt.
case CCValAssign::ZExt: {
- bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
- Emitted = true;
- ArgVT = VA.getLocVT();
- break;
- }
- case CCValAssign::AExt: {
- bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
- if (!Emitted)
- Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
- if (!Emitted)
- Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
-
- assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
- ArgVT = VA.getLocVT();
+ MVT DestVT = VA.getLocVT();
+ Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
+ assert (Arg != 0 && "Failed to emit a sext");
+ ArgVT = DestVT;
break;
}
case CCValAssign::BCvt: {
@@ -1634,16 +1945,17 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
if (VA.isRegLoc() && !VA.needsCustom()) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
VA.getLocReg())
- .addReg(Arg);
+ .addReg(Arg);
RegArgs.push_back(VA.getLocReg());
} else if (VA.needsCustom()) {
// TODO: We need custom lowering for vector (v2f64) args.
- if (VA.getLocVT() != MVT::f64) return false;
+ assert(VA.getLocVT() == MVT::f64 &&
+ "Custom lowering for v2f64 args not available");
CCValAssign &NextVA = ArgLocs[++i];
- // TODO: Only handle register args for now.
- if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+ assert(VA.isRegLoc() && NextVA.isRegLoc() &&
+ "We only handle register args!");
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::VMOVRRD), VA.getLocReg())
@@ -1659,9 +1971,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
Addr.Base.Reg = ARM::SP;
Addr.Offset = VA.getLocMemOffset();
- if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
+ bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
+ assert(EmitRet && "Could not emit a store for argument!");
}
}
+
return true;
}
@@ -1685,7 +1999,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
// For this move we copy into two registers and then move into the
// double fp reg we want.
EVT DestVT = RVLocs[0].getValVT();
- TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
+ const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
unsigned ResultReg = createResultReg(DstRC);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::VMOVDRR), ResultReg)
@@ -1700,7 +2014,12 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
} else {
assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
EVT CopyVT = RVLocs[0].getValVT();
- TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+
+ // Special handling for extended integers.
+ if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
+ CopyVT = MVT::i32;
+
+ const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
unsigned ResultReg = createResultReg(DstRC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
@@ -1753,13 +2072,26 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
// Only handle register returns for now.
if (!VA.isRegLoc())
return false;
- // TODO: For now, don't try to handle cases where getLocInfo()
- // says Full but the types don't match.
- if (TLI.getValueType(RV->getType()) != VA.getValVT())
- return false;
- // Make the copy.
unsigned SrcReg = Reg + VA.getValNo();
+ EVT RVVT = TLI.getValueType(RV->getType());
+ EVT DestVT = VA.getValVT();
+ // Special handling for extended integers.
+ if (RVVT != DestVT) {
+ if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
+ return false;
+
+ assert(DestVT == MVT::i32 && "ARM should always ext to i32");
+
+ // Perform extension if flagged as either zext or sext. Otherwise, do
+ // nothing.
+ if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
+ SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
+ if (SrcReg == 0) return false;
+ }
+ }
+
+ // Make the copy.
unsigned DstReg = VA.getLocReg();
const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
// Avoid a cross-class copy. This is very unlikely.
@@ -1772,20 +2104,17 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
MRI.addLiveOut(VA.getLocReg());
}
- unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET;
+ unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(RetOpc)));
return true;
}
unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
-
- // Darwin needs the r9 versions of the opcodes.
- bool isDarwin = Subtarget->isTargetDarwin();
- if (isThumb) {
- return isDarwin ? ARM::tBLr9 : ARM::tBL;
+ if (isThumb2) {
+ return ARM::tBL;
} else {
- return isDarwin ? ARM::BLr9 : ARM::BL;
+ return ARM::BL;
}
}
@@ -1844,11 +2173,10 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
return false;
- // Issue the call, BLr9 for darwin, BL otherwise.
- // TODO: Turn this into the table of arm call ops.
+ // Issue the call.
MachineInstrBuilder MIB;
unsigned CallOpc = ARMSelectCallOp(NULL);
- if(isThumb)
+ if (isThumb2)
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc)))
@@ -1863,6 +2191,10 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
@@ -1873,12 +2205,13 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
return true;
}
-bool ARMFastISel::SelectCall(const Instruction *I) {
+bool ARMFastISel::SelectCall(const Instruction *I,
+ const char *IntrMemName = 0) {
const CallInst *CI = cast<CallInst>(I);
const Value *Callee = CI->getCalledValue();
- // Can't handle inline asm or worry about intrinsics yet.
- if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
+ // Can't handle inline asm.
+ if (isa<InlineAsm>(Callee)) return false;
// Only handle global variable Callees.
const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
@@ -1902,7 +2235,8 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
- else if (!isTypeLegal(RetTy, RetVT))
+ else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
+ RetVT != MVT::i8 && RetVT != MVT::i1)
return false;
// TODO: For now if we have long calls specified we don't handle the call.
@@ -1913,16 +2247,18 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
SmallVector<unsigned, 8> ArgRegs;
SmallVector<MVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
- Args.reserve(CS.arg_size());
- ArgRegs.reserve(CS.arg_size());
- ArgVTs.reserve(CS.arg_size());
- ArgFlags.reserve(CS.arg_size());
+ unsigned arg_size = CS.arg_size();
+ Args.reserve(arg_size);
+ ArgRegs.reserve(arg_size);
+ ArgVTs.reserve(arg_size);
+ ArgFlags.reserve(arg_size);
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
- unsigned Arg = getRegForValue(*i);
+ // If we're lowering a memory intrinsic instead of a regular call, skip the
+ // last two arguments, which shouldn't be passed to the underlying function.
+ if (IntrMemName && e-i <= 2)
+ break;
- if (Arg == 0)
- return false;
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
if (CS.paramHasAttr(AttrInd, Attribute::SExt))
@@ -1930,7 +2266,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
Flags.setZExt();
- // FIXME: Only handle *easy* calls for now.
+ // FIXME: Only handle *easy* calls for now.
if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
CS.paramHasAttr(AttrInd, Attribute::Nest) ||
@@ -1939,8 +2275,14 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
Type *ArgTy = (*i)->getType();
MVT ArgVT;
- if (!isTypeLegal(ArgTy, ArgVT))
+ if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
+ ArgVT != MVT::i1)
return false;
+
+ unsigned Arg = getRegForValue(*i);
+ if (Arg == 0)
+ return false;
+
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);
@@ -1956,26 +2298,38 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
return false;
- // Issue the call, BLr9 for darwin, BL otherwise.
- // TODO: Turn this into the table of arm call ops.
+ // Issue the call.
MachineInstrBuilder MIB;
unsigned CallOpc = ARMSelectCallOp(GV);
// Explicitly adding the predicate here.
- if(isThumb)
+ if(isThumb2) {
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CallOpc)))
- .addGlobalAddress(GV, 0, 0);
- else
- // Explicitly adding the predicate here.
- MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CallOpc))
- .addGlobalAddress(GV, 0, 0));
-
+ TII.get(CallOpc)));
+ if (!IntrMemName)
+ MIB.addGlobalAddress(GV, 0, 0);
+ else
+ MIB.addExternalSymbol(IntrMemName, 0);
+ } else {
+ if (!IntrMemName)
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc))
+ .addGlobalAddress(GV, 0, 0));
+ else
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc))
+ .addExternalSymbol(IntrMemName, 0));
+ }
+
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
@@ -1984,83 +2338,187 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
return true;
+}
+bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
+ return Len <= 16;
}
-bool ARMFastISel::SelectIntCast(const Instruction *I) {
- // On ARM, in general, integer casts don't involve legal types; this code
- // handles promotable integers. The high bits for a type smaller than
- // the register size are assumed to be undefined.
- Type *DestTy = I->getType();
- Value *Op = I->getOperand(0);
- Type *SrcTy = Op->getType();
+bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
+ uint64_t Len) {
+ // Make sure we don't bloat code by inlining very large memcpy's.
+ if (!ARMIsMemCpySmall(Len))
+ return false;
- EVT SrcVT, DestVT;
- SrcVT = TLI.getValueType(SrcTy, true);
- DestVT = TLI.getValueType(DestTy, true);
+ // We don't care about alignment here since we just emit integer accesses.
+ while (Len) {
+ MVT VT;
+ if (Len >= 4)
+ VT = MVT::i32;
+ else if (Len >= 2)
+ VT = MVT::i16;
+ else {
+ assert(Len == 1);
+ VT = MVT::i8;
+ }
- if (isa<TruncInst>(I)) {
- if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
- return false;
- if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+ bool RV;
+ unsigned ResultReg;
+ RV = ARMEmitLoad(VT, ResultReg, Src);
+ assert (RV == true && "Should be able to handle this load.");
+ RV = ARMEmitStore(VT, ResultReg, Dest);
+ assert (RV == true && "Should be able to handle this store.");
+ (void)RV;
+
+ unsigned Size = VT.getSizeInBits()/8;
+ Len -= Size;
+ Dest.Offset += Size;
+ Src.Offset += Size;
+ }
+
+ return true;
+}
+
+bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
+ // FIXME: Handle more intrinsics.
+ switch (I.getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove: {
+ const MemTransferInst &MTI = cast<MemTransferInst>(I);
+ // Don't handle volatile.
+ if (MTI.isVolatile())
return false;
- unsigned SrcReg = getRegForValue(Op);
- if (!SrcReg) return false;
+ // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
+ // we would emit dead code because we don't currently handle memmoves.
+ bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
+ if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
+ // Small memcpy's are common enough that we want to do them without a call
+ // if possible.
+ uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
+ if (ARMIsMemCpySmall(Len)) {
+ Address Dest, Src;
+ if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
+ !ARMComputeAddress(MTI.getRawSource(), Src))
+ return false;
+ if (ARMTryEmitSmallMemCpy(Dest, Src, Len))
+ return true;
+ }
+ }
+
+ if (!MTI.getLength()->getType()->isIntegerTy(32))
+ return false;
+
+ if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
+ return false;
- // Because the high bits are undefined, a truncate doesn't generate
- // any code.
- UpdateValueMap(I, SrcReg);
- return true;
+ const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
+ return SelectCall(&I, IntrMemName);
}
- if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ case Intrinsic::memset: {
+ const MemSetInst &MSI = cast<MemSetInst>(I);
+ // Don't handle volatile.
+ if (MSI.isVolatile())
+ return false;
+
+ if (!MSI.getLength()->getType()->isIntegerTy(32))
+ return false;
+
+ if (MSI.getDestAddressSpace() > 255)
+ return false;
+
+ return SelectCall(&I, "memset");
+ }
+ }
+}
+
+bool ARMFastISel::SelectTrunc(const Instruction *I) {
+ // The high bits for a type smaller than the register size are assumed to be
+ // undefined.
+ Value *Op = I->getOperand(0);
+
+ EVT SrcVT, DestVT;
+ SrcVT = TLI.getValueType(Op->getType(), true);
+ DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
+ return false;
+ if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
return false;
+ unsigned SrcReg = getRegForValue(Op);
+ if (!SrcReg) return false;
+
+ // Because the high bits are undefined, a truncate doesn't generate
+ // any code.
+ UpdateValueMap(I, SrcReg);
+ return true;
+}
+
+unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
+ bool isZExt) {
+ if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ return 0;
+
unsigned Opc;
- bool isZext = isa<ZExtInst>(I);
bool isBoolZext = false;
- if (!SrcVT.isSimple())
- return false;
+ if (!SrcVT.isSimple()) return 0;
switch (SrcVT.getSimpleVT().SimpleTy) {
- default: return false;
+ default: return 0;
case MVT::i16:
- if (!Subtarget->hasV6Ops()) return false;
- if (isZext)
- Opc = isThumb ? ARM::t2UXTH : ARM::UXTH;
+ if (!Subtarget->hasV6Ops()) return 0;
+ if (isZExt)
+ Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
else
- Opc = isThumb ? ARM::t2SXTH : ARM::SXTH;
+ Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
break;
case MVT::i8:
- if (!Subtarget->hasV6Ops()) return false;
- if (isZext)
- Opc = isThumb ? ARM::t2UXTB : ARM::UXTB;
+ if (!Subtarget->hasV6Ops()) return 0;
+ if (isZExt)
+ Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
else
- Opc = isThumb ? ARM::t2SXTB : ARM::SXTB;
+ Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
break;
case MVT::i1:
- if (isZext) {
- Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+ if (isZExt) {
+ Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
isBoolZext = true;
break;
}
- return false;
+ return 0;
}
- // FIXME: We could save an instruction in many cases by special-casing
- // load instructions.
- unsigned SrcReg = getRegForValue(Op);
- if (!SrcReg) return false;
-
- unsigned DestReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
MachineInstrBuilder MIB;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
.addReg(SrcReg);
if (isBoolZext)
MIB.addImm(1);
else
MIB.addImm(0);
AddOptionalDefs(MIB);
- UpdateValueMap(I, DestReg);
+ return ResultReg;
+}
+
+bool ARMFastISel::SelectIntExt(const Instruction *I) {
+ // On ARM, in general, integer casts don't involve legal types; this code
+ // handles promotable integers.
+ Type *DestTy = I->getType();
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+
+ EVT SrcVT, DestVT;
+ SrcVT = TLI.getValueType(SrcTy, true);
+ DestVT = TLI.getValueType(DestTy, true);
+
+ bool isZExt = isa<ZExtInst>(I);
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg) return false;
+
+ unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
+ if (ResultReg == 0) return false;
+ UpdateValueMap(I, ResultReg);
return true;
}
@@ -2074,6 +2532,8 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
return SelectStore(I);
case Instruction::Br:
return SelectBranch(I);
+ case Instruction::IndirectBr:
+ return SelectIndirectBr(I);
case Instruction::ICmp:
case Instruction::FCmp:
return SelectCmp(I);
@@ -2082,42 +2542,105 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
case Instruction::FPTrunc:
return SelectFPTrunc(I);
case Instruction::SIToFP:
- return SelectSIToFP(I);
+ return SelectIToFP(I, /*isSigned*/ true);
+ case Instruction::UIToFP:
+ return SelectIToFP(I, /*isSigned*/ false);
case Instruction::FPToSI:
- return SelectFPToSI(I);
+ return SelectFPToI(I, /*isSigned*/ true);
+ case Instruction::FPToUI:
+ return SelectFPToI(I, /*isSigned*/ false);
+ case Instruction::Add:
+ return SelectBinaryIntOp(I, ISD::ADD);
+ case Instruction::Or:
+ return SelectBinaryIntOp(I, ISD::OR);
+ case Instruction::Sub:
+ return SelectBinaryIntOp(I, ISD::SUB);
case Instruction::FAdd:
- return SelectBinaryOp(I, ISD::FADD);
+ return SelectBinaryFPOp(I, ISD::FADD);
case Instruction::FSub:
- return SelectBinaryOp(I, ISD::FSUB);
+ return SelectBinaryFPOp(I, ISD::FSUB);
case Instruction::FMul:
- return SelectBinaryOp(I, ISD::FMUL);
+ return SelectBinaryFPOp(I, ISD::FMUL);
case Instruction::SDiv:
- return SelectSDiv(I);
+ return SelectDiv(I, /*isSigned*/ true);
+ case Instruction::UDiv:
+ return SelectDiv(I, /*isSigned*/ false);
case Instruction::SRem:
- return SelectSRem(I);
+ return SelectRem(I, /*isSigned*/ true);
+ case Instruction::URem:
+ return SelectRem(I, /*isSigned*/ false);
case Instruction::Call:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ return SelectIntrinsicCall(*II);
return SelectCall(I);
case Instruction::Select:
return SelectSelect(I);
case Instruction::Ret:
return SelectRet(I);
case Instruction::Trunc:
+ return SelectTrunc(I);
case Instruction::ZExt:
case Instruction::SExt:
- return SelectIntCast(I);
+ return SelectIntExt(I);
default: break;
}
return false;
}
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction. If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// successful.
+bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(LI->getType(), VT))
+ return false;
+
+ // Combine load followed by zero- or sign-extend.
+ // ldrb r1, [r0] ldrb r1, [r0]
+ // uxtb r2, r1 =>
+ // mov r3, r2 mov r3, r1
+ bool isZExt = true;
+ switch(MI->getOpcode()) {
+ default: return false;
+ case ARM::SXTH:
+ case ARM::t2SXTH:
+ isZExt = false;
+ case ARM::UXTH:
+ case ARM::t2UXTH:
+ if (VT != MVT::i16)
+ return false;
+ break;
+ case ARM::SXTB:
+ case ARM::t2SXTB:
+ isZExt = false;
+ case ARM::UXTB:
+ case ARM::t2UXTB:
+ if (VT != MVT::i8)
+ return false;
+ break;
+ }
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
+
+ unsigned ResultReg = MI->getOperand(0).getReg();
+ if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
+ return false;
+ MI->eraseFromParent();
+ return true;
+}
+
namespace llvm {
- llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
- // Completely untested on non-darwin.
+ FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+ // Completely untested on non-iOS.
const TargetMachine &TM = funcInfo.MF->getTarget();
// Darwin and thumb1 only for now.
const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
- if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() &&
+ if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only() &&
!DisableARMFastISel)
return new ARMFastISel(funcInfo);
return 0;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 412751b84d41..402ecb0c5ffd 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====//
+//===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,31 +15,40 @@
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Function.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<bool>
+SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
+ cl::desc("Align ARM NEON spills in prolog and epilog"));
+
+static MachineBasicBlock::iterator
+skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs);
+
/// hasFP - Return true if the specified function should have a dedicated frame
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- if (STI.isTargetDarwin())
+ // iOS requires FP not to be clobbered for backtracing purpose.
+ if (STI.isTargetIOS())
return true;
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Always eliminate non-leaf frame pointers.
- return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+ return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
+ MFI->hasCalls()) ||
RegInfo->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
@@ -72,7 +81,7 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
}
-static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
for (unsigned i = 0; CSRegs[i]; ++i)
if (Reg == CSRegs[i])
return true;
@@ -81,7 +90,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
static bool isCSRestore(MachineInstr *MI,
const ARMBaseInstrInfo &TII,
- const unsigned *CSRegs) {
+ const uint16_t *CSRegs) {
// Integer spill area is handled with "pop".
if (MI->getOpcode() == ARM::LDMIA_RET ||
MI->getOpcode() == ARM::t2LDMIA_RET ||
@@ -140,10 +149,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
// belongs to which callee-save spill areas.
unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
int FramePtrSpillFI = 0;
-
- // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
- if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
- return;
+ int D8SpillFI = 0;
// Allocate the vararg register save area. This is not counted in NumBytes.
if (VARegSaveSize)
@@ -177,7 +183,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R11:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- if (STI.isTargetDarwin()) {
+ if (STI.isTargetIOS()) {
AFI->addGPRCalleeSavedArea2Frame(FI);
GPRCS2Size += 4;
} else {
@@ -186,8 +192,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
}
break;
default:
- AFI->addDPRCalleeSavedAreaFrame(FI);
- DPRCSSize += 8;
+ // This is a DPR. Exclude the aligned DPRCS2 spills.
+ if (Reg == ARM::D8)
+ D8SpillFI = FI;
+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) {
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
}
}
@@ -195,8 +206,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
if (GPRCS1Size > 0) MBBI++;
// Set FP to point to the stack slot that contains the previous FP.
- // For Darwin, FP is R7, which has now been stored in spill area 1.
- // Otherwise, if this is not Darwin, all the callee-saved registers go
+ // For iOS, FP is R7, which has now been stored in spill area 1.
+ // Otherwise, if this is not iOS, all the callee-saved registers go
// into spill area 1, including the FP in R11. In either case, it is
// now safe to emit this assignment.
bool HasFP = hasFP(MF);
@@ -232,7 +243,17 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MBBI++;
}
- NumBytes = DPRCSOffset;
+ // Move past the aligned DPRCS2 area.
+ if (AFI->getNumAlignedDPRCS2Regs() > 0) {
+ MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());
+ // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
+ // leaves the stack pointer pointing to the DPRCS2 area.
+ //
+ // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
+ NumBytes += MFI->getObjectOffset(D8SpillFI);
+ } else
+ NumBytes = DPRCSOffset;
+
if (NumBytes) {
// Adjust SP after all the callee-save spills.
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
@@ -259,7 +280,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
// If we need dynamic stack realignment, do it here. Be paranoid and make
// sure if we also have VLAs, we have a base pointer for frame access.
- if (RegInfo->needsStackRealignment(MF)) {
+ // If aligned NEON registers were spilled, the stack has already been
+ // realigned.
+ if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
unsigned MaxAlign = MFI->getMaxAlignment();
assert (!AFI->isThumb1OnlyFunction());
if (!AFI->isThumbFunction()) {
@@ -315,8 +338,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->getDesc().isReturn() &&
- "Can only insert epilog into returning blocks");
+ assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -332,16 +354,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
- // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
- if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
- return;
-
if (!AFI->hasStackFrame()) {
if (NumBytes != 0)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
} else {
// Unwind MBBI to point to first LDR / VLDRD.
- const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
if (MBBI != MBB.begin()) {
do
--MBBI;
@@ -365,7 +383,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
ARMCC::AL, 0, TII);
else {
// It's not possible to restore SP from FP in a single instruction.
- // For Darwin, this looks like:
+ // For iOS, this looks like:
// mov sp, r7
// sub sp, #24
// This is bad, if an interrupt is taken after the mov, sp is in an
@@ -404,17 +422,16 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
}
- if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
- RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) {
// Tail call return: adjust the stack pointer and jump to callee.
MBBI = MBB.getLastNonDebugInstr();
MachineOperand &JumpTarget = MBBI->getOperand(0);
// Jump to label or value in register.
- if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
- unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
- ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd)
- : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND);
+ if (RetOpcode == ARM::TCRETURNdi) {
+ unsigned TCOpcode = STI.isThumb() ?
+ (STI.isTargetIOS() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
+ ARM::TAILJMPd;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
if (JumpTarget.isGlobal())
MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
@@ -431,10 +448,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl,
TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
addReg(JumpTarget.getReg(), RegState::Kill);
- } else if (RetOpcode == ARM::TCRETURNriND) {
- BuildMI(MBB, MBBI, dl,
- TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)).
- addReg(JumpTarget.getReg(), RegState::Kill);
}
MachineInstr *NewMI = prior(MBBI);
@@ -481,6 +494,10 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
else if (AFI->isDPRCalleeSavedAreaFrame(FI))
return Offset - AFI->getDPRCalleeSavedAreaOffset();
+ // SP can move around if there are allocas. We may also lose track of SP
+ // when emergency spilling inside a non-reserved call frame setup.
+ bool hasMovingSP = !hasReservedCallFrame(MF);
+
// When dynamically realigning the stack, use the frame pointer for
// parameters, and the stack/base pointer for locals.
if (RegInfo->needsStackRealignment(MF)) {
@@ -488,7 +505,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
if (isFixed) {
FrameReg = RegInfo->getFrameRegister(MF);
Offset = FPOffset;
- } else if (MFI->hasVarSizedObjects()) {
+ } else if (hasMovingSP) {
assert(RegInfo->hasBasePointer(MF) &&
"VLAs and dynamic stack alignment, but missing base pointer!");
FrameReg = RegInfo->getBaseRegister();
@@ -500,11 +517,10 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
if (hasFP(MF) && AFI->hasStackFrame()) {
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs (and thus the SP isn't reliable as a base).
- if (isFixed || (MFI->hasVarSizedObjects() &&
- !RegInfo->hasBasePointer(MF))) {
+ if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
- } else if (MFI->hasVarSizedObjects()) {
+ } else if (hasMovingSP) {
assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
if (AFI->isThumb2Function()) {
// Try to use the frame pointer if we can, else use the base pointer
@@ -551,6 +567,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
unsigned StmOpc, unsigned StrOpc,
bool NoGap,
bool(*Func)(unsigned, bool),
+ unsigned NumAlignedDPRCS2Regs,
unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
@@ -564,7 +581,11 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
unsigned LastReg = 0;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+ if (!(Func)(Reg, STI.isTargetIOS())) continue;
+
+ // D-registers in the aligned area DPRCS2 are NOT spilled here.
+ if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+ continue;
// Add the callee-saved register as live-in unless it's LR and
// @llvm.returnaddress is called. If LR is returned for
@@ -614,16 +635,15 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
const std::vector<CalleeSavedInfo> &CSI,
unsigned LdmOpc, unsigned LdrOpc,
bool isVarArg, bool NoGap,
- bool(*Func)(unsigned, bool)) const {
+ bool(*Func)(unsigned, bool),
+ unsigned NumAlignedDPRCS2Regs) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL = MI->getDebugLoc();
unsigned RetOpcode = MI->getOpcode();
bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
- RetOpcode == ARM::TCRETURNdiND ||
- RetOpcode == ARM::TCRETURNri ||
- RetOpcode == ARM::TCRETURNriND);
+ RetOpcode == ARM::TCRETURNri);
SmallVector<unsigned, 4> Regs;
unsigned i = CSI.size();
@@ -632,7 +652,11 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
bool DeleteRet = false;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+ if (!(Func)(Reg, STI.isTargetIOS())) continue;
+
+ // The aligned reloads from area DPRCS2 are not inserted here.
+ if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+ continue;
if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
Reg = ARM::PC;
@@ -686,6 +710,247 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
}
}
+/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
+/// starting from d8. Also insert stack realignment code and leave the stack
+/// pointer pointing to the d8 spill slot.
+static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ // Mark the D-register spill slots as properly aligned. Since MFI computes
+ // stack slot layout backwards, this can actually mean that the d-reg stack
+ // slot offsets can be wrong. The offset for d8 will always be correct.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned DNum = CSI[i].getReg() - ARM::D8;
+ if (DNum >= 8)
+ continue;
+ int FI = CSI[i].getFrameIdx();
+ // The even-numbered registers will be 16-byte aligned, the odd-numbered
+ // registers will be 8-byte aligned.
+ MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
+
+ // The stack slot for D8 needs to be maximally aligned because this is
+ // actually the point where we align the stack pointer. MachineFrameInfo
+ // computes all offsets relative to the incoming stack pointer which is a
+ // bit weird when realigning the stack. Any extra padding for this
+ // over-alignment is not realized because the code inserted below adjusts
+ // the stack pointer by numregs * 8 before aligning the stack pointer.
+ if (DNum == 0)
+ MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
+ }
+
+ // Move the stack pointer to the d8 spill slot, and align it at the same
+ // time. Leave the stack slot address in the scratch register r4.
+ //
+ // sub r4, sp, #numregs * 8
+ // bic r4, r4, #align - 1
+ // mov sp, r4
+ //
+ bool isThumb = AFI->isThumbFunction();
+ assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
+ AFI->setShouldRestoreSPFromFP(true);
+
+ // sub r4, sp, #numregs * 8
+ // The immediate is <= 64, so it doesn't need any special encoding.
+ unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addReg(ARM::SP)
+ .addImm(8 * NumAlignedDPRCS2Regs)));
+
+ // bic r4, r4, #align-1
+ Opc = isThumb ? ARM::t2BICri : ARM::BICri;
+ unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(MaxAlign - 1)));
+
+ // mov sp, r4
+ // The stack pointer must be adjusted before spilling anything, otherwise
+ // the stack slots could be clobbered by an interrupt handler.
+ // Leave r4 live, it is used below.
+ Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
+ .addReg(ARM::R4);
+ MIB = AddDefaultPred(MIB);
+ if (!isThumb)
+ AddDefaultCC(MIB);
+
+ // Now spill NumAlignedDPRCS2Regs registers starting from d8.
+ // r4 holds the stack slot address.
+ unsigned NextReg = ARM::D8;
+
+ // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
+ // The writeback is only needed when emitting two vst1.64 instructions.
+ if (NumAlignedDPRCS2Regs >= 6) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ ARM::QQPRRegisterClass);
+ MBB.addLiveIn(SupReg);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),
+ ARM::R4)
+ .addReg(ARM::R4, RegState::Kill).addImm(16)
+ .addReg(NextReg)
+ .addReg(SupReg, RegState::ImplicitKill));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // We won't modify r4 beyond this point. It currently points to the next
+ // register to be spilled.
+ unsigned R4BaseReg = NextReg;
+
+ // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
+ if (NumAlignedDPRCS2Regs >= 4) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ ARM::QQPRRegisterClass);
+ MBB.addLiveIn(SupReg);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
+ .addReg(ARM::R4).addImm(16).addReg(NextReg)
+ .addReg(SupReg, RegState::ImplicitKill));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // 16-byte aligned vst1.64 with 2 d-regs.
+ if (NumAlignedDPRCS2Regs >= 2) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ ARM::QPRRegisterClass);
+ MBB.addLiveIn(SupReg);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
+ .addReg(ARM::R4).addImm(16).addReg(SupReg));
+ NextReg += 2;
+ NumAlignedDPRCS2Regs -= 2;
+ }
+
+ // Finally, use a vanilla vstr.64 for the odd last register.
+ if (NumAlignedDPRCS2Regs) {
+ MBB.addLiveIn(NextReg);
+ // vstr.64 uses addrmode5 which has an offset scale of 4.
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
+ .addReg(NextReg)
+ .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2));
+ }
+
+ // The last spill instruction inserted should kill the scratch register r4.
+ llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI);
+}
+
+/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
+/// iterator to the following instruction.
+static MachineBasicBlock::iterator
+skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs) {
+ // sub r4, sp, #numregs * 8
+ // bic r4, r4, #align - 1
+ // mov sp, r4
+ ++MI; ++MI; ++MI;
+ assert(MI->mayStore() && "Expecting spill instruction");
+
+ // These switches all fall through.
+ switch(NumAlignedDPRCS2Regs) {
+ case 7:
+ ++MI;
+ assert(MI->mayStore() && "Expecting spill instruction");
+ default:
+ ++MI;
+ assert(MI->mayStore() && "Expecting spill instruction");
+ case 1:
+ case 2:
+ case 4:
+ assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
+ ++MI;
+ }
+ return MI;
+}
+
+/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
+/// starting from d8. These instructions are assumed to execute while the
+/// stack is still aligned, unlike the code inserted by emitPopInst.
+static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned NumAlignedDPRCS2Regs,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // Find the frame index assigned to d8.
+ int D8SpillFI = 0;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+ if (CSI[i].getReg() == ARM::D8) {
+ D8SpillFI = CSI[i].getFrameIdx();
+ break;
+ }
+
+ // Materialize the address of the d8 spill slot into the scratch register r4.
+ // This can be fairly complicated if the stack frame is large, so just use
+ // the normal frame index elimination mechanism to do it. This code runs as
+ // the initial part of the epilog where the stack and base pointers haven't
+ // been changed yet.
+ bool isThumb = AFI->isThumbFunction();
+ assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
+
+ unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addFrameIndex(D8SpillFI).addImm(0)));
+
+ // Now restore NumAlignedDPRCS2Regs registers starting from d8.
+ unsigned NextReg = ARM::D8;
+
+ // 16-byte aligned vld1.64 with 4 d-regs and writeback.
+ if (NumAlignedDPRCS2Regs >= 6) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ ARM::QQPRRegisterClass);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
+ .addReg(ARM::R4, RegState::Define)
+ .addReg(ARM::R4, RegState::Kill).addImm(16)
+ .addReg(SupReg, RegState::ImplicitDefine));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // We won't modify r4 beyond this point. It currently points to the next
+ // register to be spilled.
+ unsigned R4BaseReg = NextReg;
+
+ // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
+ if (NumAlignedDPRCS2Regs >= 4) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ ARM::QQPRRegisterClass);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
+ .addReg(ARM::R4).addImm(16)
+ .addReg(SupReg, RegState::ImplicitDefine));
+ NextReg += 4;
+ NumAlignedDPRCS2Regs -= 4;
+ }
+
+ // 16-byte aligned vld1.64 with 2 d-regs.
+ if (NumAlignedDPRCS2Regs >= 2) {
+ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+ ARM::QPRRegisterClass);
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
+ .addReg(ARM::R4).addImm(16));
+ NextReg += 2;
+ NumAlignedDPRCS2Regs -= 2;
+ }
+
+ // Finally, use a vanilla vldr.64 for the remaining odd register.
+ if (NumAlignedDPRCS2Regs)
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
+ .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg)));
+
+ // Last store kills r4.
+ llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI);
+}
+
bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -700,12 +965,19 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
unsigned PushOneOpc = AFI->isThumbFunction() ?
ARM::t2STR_PRE : ARM::STR_PRE_IMM;
unsigned FltOpc = ARM::VSTMDDB_UPD;
- emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
+ unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
MachineInstr::FrameSetup);
- emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
MachineInstr::FrameSetup);
emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
- MachineInstr::FrameSetup);
+ NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
+
+ // The code above does not insert spill code for the aligned DPRCS2 registers.
+ // The stack realignment code will be inserted between the push instructions
+ // and these spills.
+ if (NumAlignedDPRCS2Regs)
+ emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
return true;
}
@@ -720,15 +992,22 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+
+ // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
+ // registers. Do that here instead.
+ if (NumAlignedDPRCS2Regs)
+ emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
unsigned FltOpc = ARM::VLDMDIA_UPD;
- emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register);
+ emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
+ NumAlignedDPRCS2Regs);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
- &isARMArea2Register);
+ &isARMArea2Register, 0);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
- &isARMArea1Register);
+ &isARMArea1Register, 0);
return true;
}
@@ -852,6 +1131,55 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
return Limit;
}
+// In functions that realign the stack, it can be an advantage to spill the
+// callee-saved vector registers after realigning the stack. The vst1 and vld1
+// instructions take alignment hints that can improve performance.
+//
+static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
+ MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
+ if (!SpillAlignedNEONRegs)
+ return;
+
+ // Naked functions don't spill callee-saved registers.
+ if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+ return;
+
+ // We are planning to use NEON instructions vst1 / vld1.
+ if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON())
+ return;
+
+ // Don't bother if the default stack alignment is sufficiently high.
+ if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8)
+ return;
+
+ // Aligned spills require stack realignment.
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ if (!RegInfo->canRealignStack(MF))
+ return;
+
+ // We always spill contiguous d-registers starting from d8. Count how many
+ // needs spilling. The register allocator will almost always use the
+ // callee-saved registers in order, but it can happen that there are holes in
+ // the range. Registers above the hole will be spilled to the standard DPRCS
+ // area.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned NumSpills = 0;
+ for (; NumSpills < 8; ++NumSpills)
+ if (!MRI.isPhysRegOrOverlapUsed(ARM::D8 + NumSpills))
+ break;
+
+ // Don't do this for just one d-register. It's not worth it.
+ if (NumSpills < 2)
+ return;
+
+ // Spill the first NumSpills D-registers after realigning the stack.
+ MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
+
+ // A scratch register is required for the vst1 / vld1 instructions.
+ MF.getRegInfo().setPhysRegUsed(ARM::R4);
+}
+
void
ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
@@ -898,28 +1226,22 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MF.getRegInfo().setPhysRegUsed(ARM::R4);
}
+ // See if we can spill vector registers to aligned stack.
+ checkNumAlignedDPRCS2Regs(MF);
+
// Spill the BasePtr if it's used.
if (RegInfo->hasBasePointer(MF))
MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
- const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
- if (MF.getRegInfo().isPhysRegUsed(Reg)) {
+ if (MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
Spilled = true;
CanEliminateFrame = false;
- } else {
- // Check alias registers too.
- for (const unsigned *Aliases =
- RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) {
- if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
- Spilled = true;
- CanEliminateFrame = false;
- }
- }
}
if (!ARM::GPRRegisterClass->contains(Reg))
@@ -928,7 +1250,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (Spilled) {
NumGPRSpills++;
- if (!STI.isTargetDarwin()) {
+ if (!STI.isTargetIOS()) {
if (Reg == ARM::LR)
LRSpilled = true;
CS1Spilled = true;
@@ -948,7 +1270,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
break;
}
} else {
- if (!STI.isTargetDarwin()) {
+ if (!STI.isTargetIOS()) {
UnspilledCS1GPRs.push_back(Reg);
continue;
}
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 61bb8afa40f2..a1c2b93562c9 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -63,12 +63,13 @@ public:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
unsigned StrOpc, bool NoGap,
- bool(*Func)(unsigned, bool),
+ bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs,
unsigned MIFlags = 0) const;
void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
unsigned LdrOpc, bool isVarArg, bool NoGap,
- bool(*Func)(unsigned, bool)) const;
+ bool(*Func)(unsigned, bool),
+ unsigned NumAlignedDPRCS2Regs) const;
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp
deleted file mode 100644
index 5f863ea241ca..000000000000
--- a/lib/Target/ARM/ARMGlobalMerge.cpp
+++ /dev/null
@@ -1,219 +0,0 @@
-//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This pass merges globals with internal linkage into one. This way all the
-// globals which were merged into a biggest one can be addressed using offsets
-// from the same base pointer (no need for separate base pointer for each of the
-// global). Such a transformation can significantly reduce the register pressure
-// when many globals are involved.
-//
-// For example, consider the code which touches several global variables at
-// once:
-//
-// static int foo[N], bar[N], baz[N];
-//
-// for (i = 0; i < N; ++i) {
-// foo[i] = bar[i] * baz[i];
-// }
-//
-// On ARM the addresses of 3 arrays should be kept in the registers, thus
-// this code has quite large register pressure (loop body):
-//
-// ldr r1, [r5], #4
-// ldr r2, [r6], #4
-// mul r1, r2, r1
-// str r1, [r0], #4
-//
-// Pass converts the code to something like:
-//
-// static struct {
-// int foo[N];
-// int bar[N];
-// int baz[N];
-// } merged;
-//
-// for (i = 0; i < N; ++i) {
-// merged.foo[i] = merged.bar[i] * merged.baz[i];
-// }
-//
-// and in ARM code this becomes:
-//
-// ldr r0, [r5, #40]
-// ldr r1, [r5, #80]
-// mul r0, r1, r0
-// str r0, [r5], #4
-//
-// note that we saved 2 registers here almostly "for free".
-// ===---------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm-global-merge"
-#include "ARM.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Attributes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-using namespace llvm;
-
-namespace {
- class ARMGlobalMerge : public FunctionPass {
- /// TLI - Keep a pointer of a TargetLowering to consult for determining
- /// target type sizes.
- const TargetLowering *TLI;
-
- bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
- Module &M, bool isConst) const;
-
- public:
- static char ID; // Pass identification, replacement for typeid.
- explicit ARMGlobalMerge(const TargetLowering *tli)
- : FunctionPass(ID), TLI(tli) {}
-
- virtual bool doInitialization(Module &M);
- virtual bool runOnFunction(Function &F);
-
- const char *getPassName() const {
- return "Merge internal globals";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- FunctionPass::getAnalysisUsage(AU);
- }
-
- struct GlobalCmp {
- const TargetData *TD;
-
- GlobalCmp(const TargetData *td) : TD(td) { }
-
- bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
- Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
- Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
-
- return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
- }
- };
- };
-} // end anonymous namespace
-
-char ARMGlobalMerge::ID = 0;
-
-bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
- Module &M, bool isConst) const {
- const TargetData *TD = TLI->getTargetData();
-
- // FIXME: Infer the maximum possible offset depending on the actual users
- // (these max offsets are different for the users inside Thumb or ARM
- // functions)
- unsigned MaxOffset = TLI->getMaximalGlobalOffset();
-
- // FIXME: Find better heuristics
- std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
-
- Type *Int32Ty = Type::getInt32Ty(M.getContext());
-
- for (size_t i = 0, e = Globals.size(); i != e; ) {
- size_t j = 0;
- uint64_t MergedSize = 0;
- std::vector<Type*> Tys;
- std::vector<Constant*> Inits;
- for (j = i; j != e; ++j) {
- Type *Ty = Globals[j]->getType()->getElementType();
- MergedSize += TD->getTypeAllocSize(Ty);
- if (MergedSize > MaxOffset) {
- break;
- }
- Tys.push_back(Ty);
- Inits.push_back(Globals[j]->getInitializer());
- }
-
- StructType *MergedTy = StructType::get(M.getContext(), Tys);
- Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
- GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
- GlobalValue::InternalLinkage,
- MergedInit, "_MergedGlobals");
- for (size_t k = i; k < j; ++k) {
- Constant *Idx[2] = {
- ConstantInt::get(Int32Ty, 0),
- ConstantInt::get(Int32Ty, k-i)
- };
- Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
- Globals[k]->replaceAllUsesWith(GEP);
- Globals[k]->eraseFromParent();
- }
- i = j;
- }
-
- return true;
-}
-
-
-bool ARMGlobalMerge::doInitialization(Module &M) {
- SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
- const TargetData *TD = TLI->getTargetData();
- unsigned MaxOffset = TLI->getMaximalGlobalOffset();
- bool Changed = false;
-
- // Grab all non-const globals.
- for (Module::global_iterator I = M.global_begin(),
- E = M.global_end(); I != E; ++I) {
- // Merge is safe for "normal" internal globals only
- if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
- continue;
-
- // Ignore fancy-aligned globals for now.
- unsigned Alignment = I->getAlignment();
- Type *Ty = I->getType()->getElementType();
- if (Alignment > TD->getABITypeAlignment(Ty))
- continue;
-
- // Ignore all 'special' globals.
- if (I->getName().startswith("llvm.") ||
- I->getName().startswith(".llvm."))
- continue;
-
- if (TD->getTypeAllocSize(Ty) < MaxOffset) {
- const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering();
- if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal())
- BSSGlobals.push_back(I);
- else if (I->isConstant())
- ConstGlobals.push_back(I);
- else
- Globals.push_back(I);
- }
- }
-
- if (Globals.size() > 1)
- Changed |= doMerge(Globals, M, false);
- if (BSSGlobals.size() > 1)
- Changed |= doMerge(BSSGlobals, M, false);
-
- // FIXME: This currently breaks the EH processing due to way how the
- // typeinfo detection works. We might want to detect the TIs and ignore
- // them in the future.
- // if (ConstGlobals.size() > 1)
- // Changed |= doMerge(ConstGlobals, M, true);
-
- return Changed;
-}
-
-bool ARMGlobalMerge::runOnFunction(Function &F) {
- return false;
-}
-
-FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
- return new ARMGlobalMerge(tli);
-}
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 787f6a279187..a5fd15b6bb97 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -21,7 +21,7 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
// FIXME: Detect integer instructions properly.
const MCInstrDesc &MCID = MI->getDesc();
unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
- if (MCID.mayStore())
+ if (MI->mayStore())
return false;
unsigned Opcode = MCID.getOpcode();
if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
@@ -38,9 +38,6 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *MI = SU->getInstr();
if (!MI->isDebugValue()) {
- if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1])
- return Hazard;
-
// Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
// a VMLA / VMLS will cause 4 cycle stall.
const MCInstrDesc &MCID = MI->getDesc();
@@ -48,9 +45,9 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *DefMI = LastMI;
const MCInstrDesc &LastMCID = LastMI->getDesc();
// Skip over one non-VFP / NEON instruction.
- if (!LastMCID.isBarrier() &&
+ if (!LastMI->isBarrier() &&
// On A9, AGU and NEON/FPU are muxed.
- !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) &&
+ !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) &&
(LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
MachineBasicBlock::iterator I = LastMI;
if (I != LastMI->getParent()->begin()) {
@@ -76,30 +73,11 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
void ARMHazardRecognizer::Reset() {
LastMI = 0;
FpMLxStalls = 0;
- ITBlockSize = 0;
ScoreboardHazardRecognizer::Reset();
}
void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
MachineInstr *MI = SU->getInstr();
- unsigned Opcode = MI->getOpcode();
- if (ITBlockSize) {
- --ITBlockSize;
- } else if (Opcode == ARM::t2IT) {
- unsigned Mask = MI->getOperand(1).getImm();
- unsigned NumTZ = CountTrailingZeros_32(Mask);
- assert(NumTZ <= 3 && "Invalid IT mask!");
- ITBlockSize = 4 - NumTZ;
- MachineBasicBlock::iterator I = MI;
- for (unsigned i = 0; i < ITBlockSize; ++i) {
- // Advance to the next instruction, skipping any dbg_value instructions.
- do {
- ++I;
- } while (I->isDebugValue());
- ITBlockMIs[ITBlockSize-1-i] = &*I;
- }
- }
-
if (!MI->isDebugValue()) {
LastMI = MI;
FpMLxStalls = 0;
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index 2bc218d8566b..98bfc4cf0cc5 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -23,6 +23,10 @@ class ARMBaseRegisterInfo;
class ARMSubtarget;
class MachineInstr;
+/// ARMHazardRecognizer handles special constraints that are not expressed in
+/// the scheduling itinerary. This is only used during postRA scheduling. The
+/// ARM preRA scheduler uses an unspecialized instance of the
+/// ScoreboardHazardRecognizer.
class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
const ARMBaseInstrInfo &TII;
const ARMBaseRegisterInfo &TRI;
@@ -30,8 +34,6 @@ class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
MachineInstr *LastMI;
unsigned FpMLxStalls;
- unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled.
- MachineInstr *ITBlockMIs[4];
public:
ARMHazardRecognizer(const InstrItineraryData *ItinData,
@@ -40,7 +42,7 @@ public:
const ARMSubtarget &sti,
const ScheduleDAG *DAG) :
ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
- TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {}
+ TRI(tri), STI(sti), LastMI(0) {}
virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void Reset();
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 5ee009c04c5b..1eafbbc8f64d 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -244,6 +244,7 @@ private:
/// SelectCMOVOp - Select CMOV instructions for ARM.
SDNode *SelectCMOVOp(SDNode *N);
+ SDNode *SelectConditionalOp(SDNode *N);
SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
@@ -923,7 +924,7 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
// The maximum alignment is equal to the memory size being referenced.
unsigned LSNAlign = LSN->getAlignment();
unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
- if (LSNAlign > MemSize && MemSize > 1)
+ if (LSNAlign >= MemSize && MemSize > 1)
Alignment = MemSize;
} else {
// All other uses of addrmode6 are for intrinsics. For now just record
@@ -1549,6 +1550,52 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
return CurDAG->getTargetConstant(Alignment, MVT::i32);
}
+// Get the register stride update opcode of a VLD/VST instruction that
+// is otherwise equivalent to the given fixed stride updating instruction.
+static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
+ switch (Opc) {
+ default: break;
+ case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
+ case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
+ case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
+ case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
+ case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
+ case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
+ case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
+ case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
+
+ case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
+ case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
+ case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
+ case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
+ case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
+ case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
+ case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
+ case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
+ case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
+ case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
+
+ case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
+ case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
+ case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
+ case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
+ case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
+ case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
+
+ case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
+ case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
+ case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
+ case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
+ case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
+ case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
+
+ case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
+ case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
+ case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
+ }
+ return Opc; // If not one we handle, return it unchanged.
+}
+
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
@@ -1612,7 +1659,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
+ // case entirely when the rest are updated to that form, too.
+ if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
+ // check for that explicitly too. Horribly hacky, but temporary.
+ if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) ||
+ !isa<ConstantSDNode>(Inc.getNode()))
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
}
Ops.push_back(Pred);
Ops.push_back(Reg0);
@@ -1754,7 +1809,15 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
+ // case entirely when the rest are updated to that form, too.
+ if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
+ // check for that explicitly too. Horribly hacky, but temporary.
+ if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) ||
+ !isa<ConstantSDNode>(Inc.getNode()))
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
}
Ops.push_back(SrcReg);
Ops.push_back(Pred);
@@ -1977,8 +2040,14 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
Ops.push_back(MemAddr);
Ops.push_back(Align);
if (isUpdating) {
+ // fixed-stride update instructions don't have an explicit writeback
+ // operand. It's implicit in the opcode itself.
SDValue Inc = N->getOperand(2);
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ if (!isa<ConstantSDNode>(Inc.getNode()))
+ Ops.push_back(Inc);
+ // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
+ else if (NumVecs > 2)
+ Ops.push_back(Reg0);
}
Ops.push_back(Pred);
Ops.push_back(Reg0);
@@ -2116,7 +2185,6 @@ SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
default:
llvm_unreachable("Unknown so_reg opcode!");
- break;
}
SDValue SOShImm =
CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
@@ -2227,9 +2295,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
// Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
// Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
// Pattern complexity = 18 cost = 1 size = 0
- SDValue CPTmp0;
- SDValue CPTmp1;
- SDValue CPTmp2;
if (Subtarget->isThumb()) {
SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
CCVal, CCR, InFlag);
@@ -2286,8 +2351,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
- default: assert(false && "Illegal conditional move type!");
- break;
+ default: llvm_unreachable("Illegal conditional move type!");
case MVT::i32:
Opc = Subtarget->isThumb()
? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
@@ -2303,6 +2367,115 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
}
+SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
+ SDValue FalseVal = N->getOperand(0);
+ SDValue TrueVal = N->getOperand(1);
+ ARMCC::CondCodes CCVal =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ SDValue CCR = N->getOperand(3);
+ assert(CCR.getOpcode() == ISD::Register);
+ SDValue InFlag = N->getOperand(4);
+ SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ if (Subtarget->isThumb()) {
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break;
+ case ARMISD::COR: Opc = ARM::t2ORRCCrs; break;
+ case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break;
+ }
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
+ }
+
+ ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+ if (T) {
+ unsigned TrueImm = T->getZExtValue();
+ if (is_t2_so_imm(TrueImm)) {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ARMISD::CAND: Opc = ARM::t2ANDCCri; break;
+ case ARMISD::COR: Opc = ARM::t2ORRCCri; break;
+ case ARMISD::CXOR: Opc = ARM::t2EORCCri; break;
+ }
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+ SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+ }
+ }
+
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break;
+ case ARMISD::COR: Opc = ARM::t2ORRCCrr; break;
+ case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break;
+ }
+ SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+ }
+
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ARMISD::CAND: Opc = ARM::ANDCCrsi; break;
+ case ARMISD::COR: Opc = ARM::ORRCCrsi; break;
+ case ARMISD::CXOR: Opc = ARM::EORCCrsi; break;
+ }
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
+ }
+
+ if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ARMISD::CAND: Opc = ARM::ANDCCrsr; break;
+ case ARMISD::COR: Opc = ARM::ORRCCrsr; break;
+ case ARMISD::CXOR: Opc = ARM::EORCCrsr; break;
+ }
+ SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
+ }
+
+ ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+ if (T) {
+ unsigned TrueImm = T->getZExtValue();
+ if (is_so_imm(TrueImm)) {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ARMISD::CAND: Opc = ARM::ANDCCri; break;
+ case ARMISD::COR: Opc = ARM::ORRCCri; break;
+ case ARMISD::CXOR: Opc = ARM::EORCCri; break;
+ }
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+ SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+ }
+ }
+
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ARMISD::CAND: Opc = ARM::ANDCCrr; break;
+ case ARMISD::COR: Opc = ARM::ORRCCrr; break;
+ case ARMISD::CXOR: Opc = ARM::EORCCrr; break;
+ }
+ SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+}
+
/// Target-specific DAG combining for ISD::XOR.
/// Target-independent combining lowers SELECT_CC nodes of the form
/// select_cc setg[ge] X, 0, X, -X
@@ -2316,7 +2489,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
SDValue XORSrc0 = N->getOperand(0);
SDValue XORSrc1 = N->getOperand(1);
- DebugLoc DL = N->getDebugLoc();
EVT VT = N->getValueType(0);
if (DisableARMIntABS)
@@ -2641,6 +2813,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::CMOV:
return SelectCMOVOp(N);
+ case ARMISD::CAND:
+ case ARMISD::COR:
+ case ARMISD::CXOR:
+ return SelectConditionalOp(N);
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
@@ -2649,7 +2825,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case MVT::v8i8: Opc = ARM::VZIPd8; break;
case MVT::v4i16: Opc = ARM::VZIPd16; break;
case MVT::v2f32:
- case MVT::v2i32: Opc = ARM::VZIPd32; break;
+ // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+ case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VZIPq8; break;
case MVT::v8i16: Opc = ARM::VZIPq16; break;
case MVT::v4f32:
@@ -2668,7 +2845,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case MVT::v8i8: Opc = ARM::VUZPd8; break;
case MVT::v4i16: Opc = ARM::VUZPd16; break;
case MVT::v2f32:
- case MVT::v2i32: Opc = ARM::VUZPd32; break;
+ // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+ case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VUZPq8; break;
case MVT::v8i16: Opc = ARM::VUZPq16; break;
case MVT::v4f32:
@@ -2715,8 +2893,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2DUP: {
- unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
- ARM::VLD2DUPd32Pseudo };
+ unsigned Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
+ ARM::VLD2DUPd32 };
return SelectVLDDup(N, false, 2, Opcodes);
}
@@ -2733,8 +2911,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2DUP_UPD: {
- unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD,
- ARM::VLD2DUPd32Pseudo_UPD };
+ unsigned Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed,
+ ARM::VLD2DUPd32wb_fixed };
return SelectVLDDup(N, true, 2, Opcodes);
}
@@ -2751,24 +2929,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD1_UPD: {
- unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD,
- ARM::VLD1d32_UPD, ARM::VLD1d64_UPD };
- unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD,
- ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VLD1d8wb_fixed, ARM::VLD1d16wb_fixed,
+ ARM::VLD1d32wb_fixed, ARM::VLD1d64wb_fixed };
+ unsigned QOpcodes[] = { ARM::VLD1q8wb_fixed,
+ ARM::VLD1q16wb_fixed,
+ ARM::VLD1q32wb_fixed,
+ ARM::VLD1q64wb_fixed };
return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
}
case ARMISD::VLD2_UPD: {
- unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
- ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
- ARM::VLD2q32Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VLD2d8wb_fixed,
+ ARM::VLD2d16wb_fixed,
+ ARM::VLD2d32wb_fixed,
+ ARM::VLD1q64wb_fixed};
+ unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
+ ARM::VLD2q16PseudoWB_fixed,
+ ARM::VLD2q32PseudoWB_fixed };
return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
}
case ARMISD::VLD3_UPD: {
unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
- ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD };
+ ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64wb_fixed};
unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
ARM::VLD3q16Pseudo_UPD,
ARM::VLD3q32Pseudo_UPD };
@@ -2780,7 +2963,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ARMISD::VLD4_UPD: {
unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
- ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD };
+ ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64wb_fixed};
unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
ARM::VLD4q16Pseudo_UPD,
ARM::VLD4q32Pseudo_UPD };
@@ -2815,24 +2998,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VST1_UPD: {
- unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD,
- ARM::VST1d32_UPD, ARM::VST1d64_UPD };
- unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD,
- ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VST1d8wb_fixed, ARM::VST1d16wb_fixed,
+ ARM::VST1d32wb_fixed, ARM::VST1d64wb_fixed };
+ unsigned QOpcodes[] = { ARM::VST1q8wb_fixed,
+ ARM::VST1q16wb_fixed,
+ ARM::VST1q32wb_fixed,
+ ARM::VST1q64wb_fixed };
return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
}
case ARMISD::VST2_UPD: {
- unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
- ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
- unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
- ARM::VST2q32Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VST2d8wb_fixed,
+ ARM::VST2d16wb_fixed,
+ ARM::VST2d32wb_fixed,
+ ARM::VST1q64wb_fixed};
+ unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
+ ARM::VST2q16PseudoWB_fixed,
+ ARM::VST2q32PseudoWB_fixed };
return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
}
case ARMISD::VST3_UPD: {
unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
- ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+ ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed};
unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
ARM::VST3q16Pseudo_UPD,
ARM::VST3q32Pseudo_UPD };
@@ -2844,7 +3032,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ARMISD::VST4_UPD: {
unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
- ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+ ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed};
unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
ARM::VST4q16Pseudo_UPD,
ARM::VST4q32Pseudo_UPD };
@@ -2993,14 +3181,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vld1: {
unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
ARM::VLD1d32, ARM::VLD1d64 };
- unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
- ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
+ ARM::VLD1q32, ARM::VLD1q64};
return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld2: {
- unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
- ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
+ unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
+ ARM::VLD2d32, ARM::VLD1q64 };
unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
ARM::VLD2q32Pseudo };
return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
@@ -3054,14 +3242,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vst1: {
unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
ARM::VST1d32, ARM::VST1d64 };
- unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
- ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
+ ARM::VST1q32, ARM::VST1q64 };
return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
- unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
- ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+ unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
+ ARM::VST2d32, ARM::VST1q64 };
unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
ARM::VST2q32Pseudo };
return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
@@ -3122,14 +3310,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
break;
case Intrinsic::arm_neon_vtbl2:
- return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo);
+ return SelectVTBL(N, false, 2, ARM::VTBL2);
case Intrinsic::arm_neon_vtbl3:
return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
case Intrinsic::arm_neon_vtbl4:
return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
case Intrinsic::arm_neon_vtbx2:
- return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo);
+ return SelectVTBL(N, true, 2, ARM::VTBX2);
case Intrinsic::arm_neon_vtbx3:
return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
case Intrinsic::arm_neon_vtbx4:
@@ -3163,7 +3351,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
Ops.push_back(N->getOperand(2));
Ops.push_back(getAL(CurDAG)); // Predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
- return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT,
+ return CurDAG->getMachineNode(ARM::VTBL2, dl, VT,
Ops.data(), Ops.size());
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index f60d177a920b..a103c94cede4 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -13,13 +13,12 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-isel"
+#include "ARMISelLowering.h"
#include "ARM.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
-#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMPerfectShuffle.h"
-#include "ARMRegisterInfo.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
@@ -40,18 +39,15 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/VectorExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include <sstream>
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
@@ -73,7 +69,7 @@ ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
-namespace llvm {
+namespace {
class ARMCCState : public CCState {
public:
ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
@@ -89,7 +85,7 @@ namespace llvm {
}
// The APCS parameter registers.
-static const unsigned GPRArgRegs[] = {
+static const uint16_t GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
@@ -108,8 +104,14 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
EVT ElemTy = VT.getVectorElementType();
if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
- if (ElemTy != MVT::i32) {
+ if (ElemTy == MVT::i32) {
+ setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
+ } else {
setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
@@ -121,18 +123,12 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand);
if (VT.isInteger()) {
setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
- for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
- setTruncStoreAction(VT.getSimpleVT(),
- (MVT::SimpleValueType)InnerVT, Expand);
}
- setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
// Promote all bit-wise operations.
if (VT.isInteger() && VT != PromotedBitwiseVT) {
@@ -263,7 +259,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setLibcallName(RTLIB::SRL_I128, 0);
setLibcallName(RTLIB::SRA_I128, 0);
- if (Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
// Double-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 2
setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
@@ -388,8 +384,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// Long long helper functions
// RTABI chapter 4.2, Table 9
setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul");
- setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
- setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
@@ -405,21 +399,28 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv");
setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv");
setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
// Memory operations
// RTABI chapter 4.3.4
setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy");
setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
setLibcallName(RTLIB::MEMSET, "__aeabi_memset");
+ setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS);
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
@@ -433,7 +434,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
else
addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
if (!Subtarget->isFPOnlySP())
addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
@@ -441,6 +443,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
}
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+ for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction((MVT::SimpleValueType)VT,
+ (MVT::SimpleValueType)InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
addDRTypeForNEON(MVT::v8i8);
@@ -457,13 +472,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// v2f64 is legal so that QR subregs can be extracted as f64 elements, but
// neither Neon nor VFP support any arithmetic operations on it.
+ // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
+ // supported for v4f32.
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ // FIXME: Code duplication: FDIV and FREM are expanded always, see
+ // ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+ // FIXME: Create unittest.
+ // In another words, find a way when "copysign" appears in DAG with vector
+ // operands.
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+ // FIXME: Code duplication: SETCC has custom operation action, see
+ // ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
+ // FIXME: Create unittest for FNEG and for FABS.
setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
@@ -476,13 +501,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+ // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
- setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
@@ -498,9 +533,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
// Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
- // a destination type that is wider than the source.
+ // a destination type that is wider than the source, and nor does
+ // it have a FP_TO_[SU]INT instruction with a narrower destination than
+ // source.
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
@@ -519,6 +558,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);
+
+ // It is legal to extload from v4i8 to v4i16 or v4i32.
+ MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
+ MVT::v4i16, MVT::v2i16,
+ MVT::v2i32};
+ for (unsigned i = 0; i < 6; ++i) {
+ setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
+ setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
+ }
}
computeRegisterProperties();
@@ -576,6 +625,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ // These just redirect to CTTZ and CTLZ on ARM.
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
+
// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -606,10 +659,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
- setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
- setExceptionPointerRegister(ARM::R0);
- setExceptionSelectorRegister(ARM::R1);
+
+ if (!Subtarget->isTargetDarwin()) {
+ // Non-Darwin platforms may return values in these registers via the
+ // personality function.
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setExceptionPointerRegister(ARM::R0);
+ setExceptionSelectorRegister(ARM::R1);
+ }
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
@@ -664,7 +722,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
@@ -676,7 +735,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (Subtarget->isTargetDarwin()) {
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
- setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
}
@@ -703,18 +761,21 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
}
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
- setOperationAction(ISD::FMA, MVT::f64, Expand);
- setOperationAction(ISD::FMA, MVT::f32, Expand);
+ if (!Subtarget->hasVFP4()) {
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+ }
// Various VFP goodness
- if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
// int <-> fp are custom expanded into bit_convert + ARMISD ops.
if (Subtarget->hasVFP2()) {
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
@@ -735,20 +796,27 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::MUL);
- if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON())
- setTargetDAGCombine(ISD::OR);
- if (Subtarget->hasNEON())
+ if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) {
setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::XOR);
+ }
+
+ if (Subtarget->hasV6Ops())
+ setTargetDAGCombine(ISD::SRL);
setStackPointerRegisterToSaveRestore(ARM::SP);
- if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+ if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
+ !Subtarget->hasVFP2())
setSchedulingPreference(Sched::RegPressure);
else
setSchedulingPreference(Sched::Hybrid);
//// temporary - rewrite interface to use type
maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
+ maxStoresPerMemset = 16;
+ maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
@@ -828,7 +896,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
+
case ARMISD::CMOV: return "ARMISD::CMOV";
+ case ARMISD::CAND: return "ARMISD::CAND";
+ case ARMISD::COR: return "ARMISD::COR";
+ case ARMISD::CXOR: return "ARMISD::CXOR";
case ARMISD::RBIT: return "ARMISD::RBIT";
@@ -851,7 +923,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
- case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
@@ -899,6 +970,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
+ case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
case ARMISD::VDUP: return "ARMISD::VDUP";
case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
case ARMISD::VEXT: return "ARMISD::VEXT";
@@ -949,7 +1021,7 @@ EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
-TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
// Map v4i64 to QQ registers but do not make the type legal. Similarly map
// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
// load / store 4 to 8 consecutive D registers.
@@ -984,7 +1056,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
if (VT == MVT::Glue || VT == MVT::Other)
continue;
if (VT.isFloatingPoint() || VT.isVector())
- return Sched::Latency;
+ return Sched::ILP;
}
if (!N->isMachineOpcode())
@@ -999,7 +1071,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
return Sched::RegPressure;
if (!Itins->isEmpty() &&
Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
- return Sched::Latency;
+ return Sched::ILP;
return Sched::RegPressure;
}
@@ -1081,18 +1153,19 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
if (!Subtarget->isAAPCS_ABI())
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
else if (Subtarget->hasVFP2() &&
- FloatABIType == FloatABI::Hard && !isVarArg)
+ getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
+ !isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
}
case CallingConv::ARM_AAPCS_VFP:
- return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ if (!isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ // Fallthrough
case CallingConv::ARM_AAPCS:
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
- case CallingConv::GHC:
- return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
}
}
@@ -1215,7 +1288,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
SDValue
ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -1334,7 +1407,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
@@ -1350,12 +1423,10 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
MVT::i32);
- // TODO: Disable AlwaysInline when it becomes possible
- // to emit a nested call sequence.
MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
Flags.getByValAlign(),
/*isVolatile=*/false,
- /*AlwaysInline=*/true,
+ /*AlwaysInline=*/false,
MachinePointerInfo(0),
MachinePointerInfo(0)));
@@ -1429,7 +1500,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
@@ -1444,7 +1515,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
}
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
@@ -1465,7 +1536,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
@@ -1494,7 +1565,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
@@ -1513,12 +1584,20 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
+ else if (doesNotRet && isDirect && !isARMFunc &&
+ Subtarget->hasRAS() && !Subtarget->isThumb1Only())
+ // "mov lr, pc; b _foo" to avoid confusing the RSP
+ CallOpc = ARMISD::CALL_NOLINK;
else
CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
} else {
- CallOpc = (isDirect || Subtarget->hasV5TOps())
- ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
- : ARMISD::CALL_NOLINK;
+ if (!isDirect && !Subtarget->hasV5TOps()) {
+ CallOpc = ARMISD::CALL_NOLINK;
+ } else if (doesNotRet && isDirect && Subtarget->hasRAS())
+ // "mov lr, pc; b _foo" to avoid confusing the RSP
+ CallOpc = ARMISD::CALL_NOLINK;
+ else
+ CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
}
std::vector<SDValue> Ops;
@@ -1531,6 +1610,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -1558,7 +1643,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
/// and then confiscate the rest of the parameter registers to insure
/// this.
void
-llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
unsigned reg = State->AllocateReg(GPRArgRegs, 4);
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
@@ -1588,7 +1673,7 @@ llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
static
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
- const ARMInstrInfo *TII) {
+ const TargetInstrInfo *TII) {
unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
@@ -1723,8 +1808,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const ARMInstrInfo *TII =
- ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
@@ -1852,63 +1936,72 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
return result;
}
-bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (N->getNumValues() != 1)
return false;
if (!N->hasNUsesOfValue(1, 0))
return false;
- unsigned NumCopies = 0;
- SDNode* Copies[2];
- SDNode *Use = *N->use_begin();
- if (Use->getOpcode() == ISD::CopyToReg) {
- Copies[NumCopies++] = Use;
- } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
+ SDValue TCChain = Chain;
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() == ISD::CopyToReg) {
+ // If the copy has a glue operand, we conservatively assume it isn't safe to
+ // perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
+ TCChain = Copy->getOperand(0);
+ } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
+ SDNode *VMov = Copy;
// f64 returned in a pair of GPRs.
- for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
+ SmallPtrSet<SDNode*, 2> Copies;
+ for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() != ISD::CopyToReg)
return false;
- Copies[UI.getUse().getResNo()] = *UI;
- ++NumCopies;
+ Copies.insert(*UI);
}
- } else if (Use->getOpcode() == ISD::BITCAST) {
+ if (Copies.size() > 2)
+ return false;
+
+ for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
+ UI != UE; ++UI) {
+ SDValue UseChain = UI->getOperand(0);
+ if (Copies.count(UseChain.getNode()))
+ // Second CopyToReg
+ Copy = *UI;
+ else
+ // First CopyToReg
+ TCChain = UseChain;
+ }
+ } else if (Copy->getOpcode() == ISD::BITCAST) {
// f32 returned in a single GPR.
- if (!Use->hasNUsesOfValue(1, 0))
+ if (!Copy->hasOneUse())
return false;
- Use = *Use->use_begin();
- if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
+ Copy = *Copy->use_begin();
+ if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
return false;
- Copies[NumCopies++] = Use;
+ Chain = Copy->getOperand(0);
} else {
return false;
}
- if (NumCopies != 1 && NumCopies != 2)
- return false;
-
bool HasRet = false;
- for (unsigned i = 0; i < NumCopies; ++i) {
- SDNode *Copy = Copies[i];
- for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
- UI != UE; ++UI) {
- if (UI->getOpcode() == ISD::CopyToReg) {
- SDNode *Use = *UI;
- if (Use == Copies[0] || Use == Copies[1])
- continue;
- return false;
- }
- if (UI->getOpcode() != ARMISD::RET_FLAG)
- return false;
- HasRet = true;
- }
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != ARMISD::RET_FLAG)
+ return false;
+ HasRet = true;
}
- return HasRet;
+ if (!HasRet)
+ return false;
+
+ Chain = TCChain;
+ return true;
}
bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
- if (!EnableARMTailCalls)
+ if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
return false;
if (!CI->isTailCall())
@@ -1965,7 +2058,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
if (RelocM == Reloc::Static)
return Result;
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -1989,7 +2082,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
SDValue Chain = Argument.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -2005,7 +2098,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
std::pair<SDValue, SDValue> CallResult =
LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()),
false, false, false, false,
- 0, CallingConv::C, false, /*isReturnValueUsed=*/true,
+ 0, CallingConv::C, /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
return CallResult.first;
}
@@ -2037,7 +2131,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
Chain = Offset.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -2045,7 +2139,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
} else {
// local exec model
ARMConstantPoolValue *CPV =
@@ -2054,7 +2148,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
}
// The address of the thread local variable is the add of the thread
@@ -2092,13 +2186,14 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
SDValue Chain = Result.getValue(1);
SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
if (!UseGOTOFF)
Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- MachinePointerInfo::getGOT(), false, false, 0);
+ MachinePointerInfo::getGOT(),
+ false, false, false, 0);
return Result;
}
@@ -2115,7 +2210,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
}
}
@@ -2128,7 +2223,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- // FIXME: Enable this for static codegen when tool issues are fixed.
+ // FIXME: Enable this for static codegen when tool issues are fixed. Also
+ // update ARMFastISel::ARMMaterializeGV.
if (Subtarget->useMovt() && RelocM != Reloc::Static) {
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
@@ -2143,7 +2239,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
DAG.getTargetGlobalAddress(GV, dl, PtrVT));
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, 0);
+ MachinePointerInfo::getGOT(),
+ false, false, false, 0);
return Result;
}
@@ -2163,7 +2260,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
@@ -2173,7 +2270,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
- false, false, 0);
+ false, false, false, 0);
return Result;
}
@@ -2195,20 +2292,12 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
SDValue
-ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
- const {
- DebugLoc dl = Op.getDebugLoc();
- return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
- Op.getOperand(0), Op.getOperand(1));
-}
-
-SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
SDValue Val = DAG.getConstant(0, MVT::i32);
@@ -2253,7 +2342,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
SDValue Result =
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
MachinePointerInfo::getConstantPool(),
- false, false, 0);
+ false, false, false, 0);
if (RelocM == Reloc::PIC_) {
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -2366,7 +2455,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- TargetRegisterClass *RC;
+ const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
RC = ARM::tGPRRegisterClass;
else
@@ -2385,7 +2474,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
MachinePointerInfo::getFixedStack(FI),
- false, false, 0);
+ false, false, false, 0);
} else {
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
@@ -2452,7 +2541,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
SmallVector<SDValue, 4> MemOps;
for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
- TargetRegisterClass *RC;
+ const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
RC = ARM::tGPRRegisterClass;
else
@@ -2521,7 +2610,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
- false, false, 0);
+ false, false, false, 0);
} else {
ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
Chain, DAG, dl);
@@ -2535,7 +2624,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
} else {
- TargetRegisterClass *RC;
+ const TargetRegisterClass *RC;
if (RegVT == MVT::f32)
RC = ARM::SPRRegisterClass;
@@ -2612,7 +2701,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ false, false, false, 0));
}
lastInsIndex = index;
}
@@ -2777,6 +2866,11 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
+ // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
+ // undefined bits before doing a full-word comparison with zero.
+ Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
return DAG.getSelectCC(dl, Cond,
DAG.getConstant(0, Cond.getValueType()),
SelectTrue, SelectFalse, ISD::SETNE);
@@ -2847,7 +2941,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->getAlignment());
+ Ld->isInvariant(), Ld->getAlignment());
llvm_unreachable("Unknown VFP cmp argument!");
}
@@ -2866,7 +2960,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
Ld->getChain(), Ptr,
Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->getAlignment());
+ Ld->isInvariant(), Ld->getAlignment());
EVT PtrType = Ptr.getValueType();
unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
@@ -2876,7 +2970,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
Ld->getChain(), NewPtr,
Ld->getPointerInfo().getWithOffset(4),
Ld->isVolatile(), Ld->isNonTemporal(),
- NewAlign);
+ Ld->isInvariant(), NewAlign);
return;
}
@@ -2894,12 +2988,11 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(4);
DebugLoc dl = Op.getDebugLoc();
- bool SeenZero = false;
- if (canChangeToInt(LHS, SeenZero, Subtarget) &&
- canChangeToInt(RHS, SeenZero, Subtarget) &&
- // If one of the operand is zero, it's safe to ignore the NaN case since
- // we only care about equality comparisons.
- (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
+ bool LHSSeenZero = false;
+ bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
+ bool RHSSeenZero = false;
+ bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
+ if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
// If unsafe fp math optimization is enabled and there are no other uses of
// the CMP operands, and the condition code is EQ or NE, we can optimize it
// to an integer comparison.
@@ -2908,10 +3001,13 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
else if (CC == ISD::SETUNE)
CC = ISD::SETNE;
+ SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
SDValue ARMcc;
if (LHS.getValueType() == MVT::f32) {
- LHS = bitcastf32Toi32(LHS, DAG);
- RHS = bitcastf32Toi32(RHS, DAG);
+ LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
+ bitcastf32Toi32(LHS, DAG), Mask);
+ RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
+ bitcastf32Toi32(RHS, DAG), Mask);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
@@ -2922,6 +3018,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
SDValue RHS1, RHS2;
expandf64Toi32(LHS, DAG, LHS1, LHS2);
expandf64Toi32(RHS, DAG, RHS1, RHS2);
+ LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
+ RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -2950,7 +3048,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
- if (UnsafeFPMath &&
+ if (getTargetMachine().Options.UnsafeFPMath &&
(CC == ISD::SETEQ || CC == ISD::SETOEQ ||
CC == ISD::SETNE || CC == ISD::SETUNE)) {
SDValue Result = OptimizeVFPBrcond(Op, DAG);
@@ -3000,25 +3098,48 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
MachinePointerInfo::getJumpTable(),
- false, false, 0);
+ false, false, false, 0);
Chain = Addr.getValue(1);
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
} else {
Addr = DAG.getLoad(PTy, dl, Chain, Addr,
- MachinePointerInfo::getJumpTable(), false, false, 0);
+ MachinePointerInfo::getJumpTable(),
+ false, false, false, 0);
Chain = Addr.getValue(1);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
}
}
+static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getValueType().getVectorElementType() == MVT::i32) {
+ if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
+ return Op;
+ return DAG.UnrollVectorOp(Op.getNode());
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
+ "Invalid type for custom lowering!");
+ if (VT != MVT::v4i16)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
+}
+
static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return LowerVectorFP_TO_INT(Op, DAG);
+
DebugLoc dl = Op.getDebugLoc();
unsigned Opc;
switch (Op.getOpcode()) {
- default:
- assert(0 && "Invalid opcode!");
+ default: llvm_unreachable("Invalid opcode!");
case ISD::FP_TO_SINT:
Opc = ARMISD::FTOSI;
break;
@@ -3034,6 +3155,12 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
+ if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
+ if (VT.getVectorElementType() == MVT::f32)
+ return Op;
+ return DAG.UnrollVectorOp(Op.getNode());
+ }
+
assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
"Invalid type for custom lowering!");
if (VT != MVT::v4f32)
@@ -3042,8 +3169,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
unsigned CastOpc;
unsigned Opc;
switch (Op.getOpcode()) {
- default:
- assert(0 && "Invalid opcode!");
+ default: llvm_unreachable("Invalid opcode!");
case ISD::SINT_TO_FP:
CastOpc = ISD::SIGN_EXTEND;
Opc = ISD::SINT_TO_FP;
@@ -3067,8 +3193,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
unsigned Opc;
switch (Op.getOpcode()) {
- default:
- assert(0 && "Invalid opcode!");
+ default: llvm_unreachable("Invalid opcode!");
case ISD::SINT_TO_FP:
Opc = ARMISD::SITOF;
break;
@@ -3176,7 +3301,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
SDValue Offset = DAG.getConstant(4, MVT::i32);
return DAG.getLoad(VT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
}
// Return LR, which contains the return address. Mark it an implicit live-in.
@@ -3197,7 +3322,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
return FrameAddr;
}
@@ -3442,7 +3567,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
if (Op.getOperand(1).getValueType().isFloatingPoint()) {
switch (SetCCOpcode) {
- default: llvm_unreachable("Illegal FP comparison"); break;
+ default: llvm_unreachable("Illegal FP comparison");
case ISD::SETUNE:
case ISD::SETNE: Invert = true; // Fallthrough
case ISD::SETOEQ:
@@ -3481,7 +3606,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
} else {
// Integer comparisons.
switch (SetCCOpcode) {
- default: llvm_unreachable("Illegal integer comparison"); break;
+ default: llvm_unreachable("Illegal integer comparison");
case ISD::SETNE: Invert = true;
case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
case ISD::SETLT: Swap = true;
@@ -3688,14 +3813,65 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
default:
llvm_unreachable("unexpected size for isNEONModifiedImm");
- return SDValue();
}
unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
return DAG.getTargetConstant(EncodedVal, MVT::i32);
}
-static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
+SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const {
+ if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+ return SDValue();
+
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+ assert(Op.getValueType() == MVT::f32 &&
+ "ConstantFP custom lowering should only occur for f32.");
+
+ // Try splatting with a VMOV.f32...
+ APFloat FPVal = CFP->getValueAPF();
+ int ImmVal = ARM_AM::getFP32Imm(FPVal);
+ if (ImmVal != -1) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
+ SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
+ NewVal);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ // If that fails, try a VMOV.i32
+ EVT VMovVT;
+ unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
+ SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
+ VMOVModImm);
+ if (NewVal != SDValue()) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
+ NewVal);
+ SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
+ VecConstant);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ // Finally, try a VMVN.i32
+ NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
+ VMVNModImm);
+ if (NewVal != SDValue()) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
+ SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
+ VecConstant);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ return SDValue();
+}
+
+
+static bool isVEXTMask(ArrayRef<int> M, EVT VT,
bool &ReverseVEXT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
ReverseVEXT = false;
@@ -3734,8 +3910,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
/// isVREVMask - Check if a vector shuffle corresponds to a VREV
/// instruction with the specified blocksize. (The order of the elements
/// within each block of the vector is reversed.)
-static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
- unsigned BlockSize) {
+static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64");
@@ -3761,15 +3936,14 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
-static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) {
+static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
// We can handle <8 x i8> vector shuffles. If the index in the mask is out of
// range, then 0 is placed into the resulting vector. So pretty much any mask
// of 8 elements can work here.
return VT == MVT::v8i8 && M.size() == 8;
}
-static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
- unsigned &WhichResult) {
+static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
@@ -3787,8 +3961,7 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
-static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
- unsigned &WhichResult) {
+static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
@@ -3803,8 +3976,7 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
-static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
- unsigned &WhichResult) {
+static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
@@ -3827,8 +3999,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
-static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
- unsigned &WhichResult) {
+static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
@@ -3852,8 +4023,7 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
-static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
- unsigned &WhichResult) {
+static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
@@ -3878,8 +4048,7 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
-static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
- unsigned &WhichResult) {
+static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
@@ -3955,6 +4124,15 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
}
+
+ // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
+ if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
+ int ImmVal = ARM_AM::getFP32Imm(SplatBits);
+ if (ImmVal != -1) {
+ SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
+ return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
+ }
+ }
}
}
@@ -4302,7 +4480,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
}
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
- SmallVectorImpl<int> &ShuffleMask,
+ ArrayRef<int> ShuffleMask,
SelectionDAG &DAG) {
// Check to see if we can use the VTBL instruction.
SDValue V1 = Op.getOperand(0);
@@ -4310,7 +4488,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
DebugLoc DL = Op.getDebugLoc();
SmallVector<SDValue, 8> VTBLMask;
- for (SmallVectorImpl<int>::iterator
+ for (ArrayRef<int>::iterator
I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
@@ -4330,7 +4508,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
- SmallVector<int, 8> ShuffleMask;
// Convert shuffles that are directly supported on NEON to target-specific
// DAG nodes, instead of keeping them as shuffles and matching them again
@@ -4338,7 +4515,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// of inconsistencies between legalization and selection.
// FIXME: floating-point vectors should be canonicalized to integer vectors
// of the same time so that they get CSEd properly.
- SVN->getMask(ShuffleMask);
+ ArrayRef<int> ShuffleMask = SVN->getMask();
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (EltSize <= 32) {
@@ -4347,9 +4524,24 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// If this is undef splat, generate it via "just" vdup, if possible.
if (Lane == -1) Lane = 0;
+ // Test if V1 is a SCALAR_TO_VECTOR.
if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
}
+ // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
+ // (and probably will turn into a SCALAR_TO_VECTOR once legalization
+ // reaches it).
+ if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
+ !isa<ConstantSDNode>(V1.getOperand(0))) {
+ bool IsScalarToVector = true;
+ for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
+ if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
+ IsScalarToVector = false;
+ break;
+ }
+ if (IsScalarToVector)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ }
return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
DAG.getConstant(Lane, MVT::i32));
}
@@ -4450,6 +4642,15 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ // INSERT_VECTOR_ELT is legal only for immediate indexes.
+ SDValue Lane = Op.getOperand(2);
+ if (!isa<ConstantSDNode>(Lane))
+ return SDValue();
+
+ return Op;
+}
+
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
// EXTRACT_VECTOR_ELT is legal only for immediate indexes.
SDValue Lane = Op.getOperand(1);
@@ -4526,11 +4727,10 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
unsigned HalfSize = EltSize / 2;
if (isSigned) {
- int64_t SExtVal = C->getSExtValue();
- if ((SExtVal >> HalfSize) != (SExtVal >> EltSize))
+ if (!isIntN(HalfSize, C->getSExtValue()))
return false;
} else {
- if ((C->getZExtValue() >> HalfSize) != 0)
+ if (!isUIntN(HalfSize, C->getZExtValue()))
return false;
}
continue;
@@ -4569,7 +4769,8 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ LD->isNonTemporal(), LD->isInvariant(),
+ LD->getAlignment());
// Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
// have been legalized as a BITCAST from v4i32.
if (N->getOpcode() == ISD::BITCAST) {
@@ -4874,7 +5075,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
unsigned Opc;
bool ExtraOp = false;
switch (Op.getOpcode()) {
- default: assert(0 && "Invalid code");
+ default: llvm_unreachable("Invalid code");
case ISD::ADDC: Opc = ARMISD::ADDC; break;
case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
case ISD::SUBC: Opc = ARMISD::SUBC; break;
@@ -4959,7 +5160,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
- case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
Subtarget);
case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
@@ -4971,8 +5171,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::SETCC: return LowerVSETCC(Op, DAG);
+ case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
@@ -4986,7 +5188,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
}
- return SDValue();
}
/// ReplaceNodeResults - Replace the results of node with an illegal result
@@ -4998,7 +5199,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default:
llvm_unreachable("Don't know how to custom expand this!");
- break;
case ISD::BITCAST:
Res = ExpandBITCAST(N, DAG);
break;
@@ -5194,7 +5394,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- TargetRegisterClass *TRC =
+ const TargetRegisterClass *TRC =
isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
@@ -5304,7 +5504,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- TargetRegisterClass *TRC =
+ const TargetRegisterClass *TRC =
isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned scratch2 = MRI.createVirtualRegister(TRC);
@@ -5414,7 +5614,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
- TargetRegisterClass *TRC =
+ const TargetRegisterClass *TRC =
isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
unsigned storesuccess = MRI.createVirtualRegister(TRC);
@@ -5500,52 +5700,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
return BB;
}
-/// EmitBasePointerRecalculation - For functions using a base pointer, we
-/// rematerialize it (via the frame pointer).
-void ARMTargetLowering::
-EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
- MachineBasicBlock *DispatchBB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
- MachineFunction &MF = *MI->getParent()->getParent();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
-
- if (!RI.hasBasePointer(MF)) return;
-
- MachineBasicBlock::iterator MBBI = MI;
-
- int32_t NumBytes = AFI->getFramePtrSpillOffset();
- unsigned FramePtr = RI.getFrameRegister(MF);
- assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
- "Base pointer without frame pointer?");
-
- if (AFI->isThumb2Function())
- llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
- else if (AFI->isThumbFunction())
- llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, *AII, RI);
- else
- llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
-
- if (!RI.needsStackRealignment(MF)) return;
-
- // If there's dynamic realignment, adjust for it.
- MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned MaxAlign = MFI->getMaxAlignment();
- assert(!AFI->isThumb1OnlyFunction());
-
- // Emit bic r6, r6, MaxAlign
- unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri;
- AddDefaultCC(
- AddDefaultPred(
- BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6)
- .addReg(ARM::R6, RegState::Kill)
- .addImm(MaxAlign - 1)));
-}
-
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
void ARMTargetLowering::
@@ -5580,8 +5734,6 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore, 4, 4);
- EmitBasePointerRecalculation(MI, MBB, DispatchBB);
-
// Load the address of the dispatch MBB into the jump buffer.
if (isThumb2) {
// Incoming value: jbuf
@@ -5683,7 +5835,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
unsigned MaxCSNum = 0;
MachineModuleInfo &MMI = MF->getMMI();
- for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) {
+ for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
+ ++BB) {
if (!BB->isLandingPad()) continue;
// FIXME: We should assert that the EH_LABEL is the first MI in the landing
@@ -5741,12 +5894,10 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
DispatchBB->addSuccessor(DispContBB);
- // Insert and renumber MBBs.
- MachineBasicBlock *Last = &MF->back();
+ // Insert and MBBs.
MF->insert(MF->end(), DispatchBB);
MF->insert(MF->end(), DispContBB);
MF->insert(MF->end(), TrapBB);
- MF->RenumberBlocks(Last);
// Insert code into the entry block that creates and registers the function
// context.
@@ -5757,35 +5908,63 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MachineMemOperand::MOLoad |
MachineMemOperand::MOVolatile, 4, 4);
+ if (AFI->isThumb1OnlyFunction())
+ BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup));
+ else if (!Subtarget->hasVFP2())
+ BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp));
+ else
+ BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
+
+ unsigned NumLPads = LPadList.size();
if (Subtarget->isThumb2()) {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(FIMMOLd));
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
- .addReg(NewVReg1)
- .addImm(LPadList.size()));
+
+ if (NumLPads < 256) {
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size()));
+ } else {
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
+ .addImm(NumLPads & 0xFFFF));
+
+ unsigned VReg2 = VReg1;
+ if ((NumLPads & 0xFFFF0000) != 0) {
+ VReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
+ .addReg(VReg1)
+ .addImm(NumLPads >> 16));
+ }
+
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg2));
+ }
+
BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
.addMBB(TrapBB)
.addImm(ARMCC::HI)
.addReg(ARM::CPSR);
- unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg2)
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
.addJumpTableIndex(MJTI)
.addImm(UId));
- unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
AddDefaultCC(
AddDefaultPred(
- BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg3)
- .addReg(NewVReg2, RegState::Kill)
+ BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
+ .addReg(NewVReg3, RegState::Kill)
.addReg(NewVReg1)
.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
- .addReg(NewVReg3, RegState::Kill)
+ .addReg(NewVReg4, RegState::Kill)
.addReg(NewVReg1)
.addJumpTableIndex(MJTI)
.addImm(UId);
@@ -5796,9 +5975,30 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
.addImm(1)
.addMemOperand(FIMMOLd));
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
- .addReg(NewVReg1)
- .addImm(LPadList.size()));
+ if (NumLPads < 256) {
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
+ .addReg(NewVReg1)
+ .addImm(NumLPads));
+ } else {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = getTargetData()->getTypeAllocSize(C->getType());
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
+ .addReg(VReg1, RegState::Define)
+ .addConstantPoolIndex(Idx));
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
+ .addReg(NewVReg1)
+ .addReg(VReg1));
+ }
+
BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
.addMBB(TrapBB)
.addImm(ARMCC::HI)
@@ -5847,38 +6047,77 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(FIMMOLd));
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
- .addReg(NewVReg1)
- .addImm(LPadList.size()));
+
+ if (NumLPads < 256) {
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
+ .addReg(NewVReg1)
+ .addImm(NumLPads));
+ } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
+ .addImm(NumLPads & 0xFFFF));
+
+ unsigned VReg2 = VReg1;
+ if ((NumLPads & 0xFFFF0000) != 0) {
+ VReg2 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
+ .addReg(VReg1)
+ .addImm(NumLPads >> 16));
+ }
+
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg2));
+ } else {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = getTargetData()->getTypeAllocSize(C->getType());
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+ unsigned VReg1 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
+ .addReg(VReg1, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+ AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg1, RegState::Kill));
+ }
+
BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
.addMBB(TrapBB)
.addImm(ARMCC::HI)
.addReg(ARM::CPSR);
- unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+ unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
AddDefaultCC(
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg2)
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
.addReg(NewVReg1)
.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
- unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg3)
+ unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
.addJumpTableIndex(MJTI)
.addImm(UId));
MachineMemOperand *JTMMOLd =
MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
MachineMemOperand::MOLoad, 4, 4);
- unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+ unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
AddDefaultPred(
- BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg4)
- .addReg(NewVReg2, RegState::Kill)
- .addReg(NewVReg3)
+ BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
+ .addReg(NewVReg3, RegState::Kill)
+ .addReg(NewVReg4)
.addImm(0)
.addMemOperand(JTMMOLd));
BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
- .addReg(NewVReg4, RegState::Kill)
- .addReg(NewVReg3)
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg4)
.addJumpTableIndex(MJTI)
.addImm(UId);
}
@@ -5893,21 +6132,24 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
PrevMBB = CurMBB;
}
+ // N.B. the order the invoke BBs are processed in doesn't matter here.
const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
- const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF);
+ const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF);
+ SmallVector<MachineBasicBlock*, 64> MBBLPads;
for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
MachineBasicBlock *BB = *I;
// Remove the landing pad successor from the invoke block and replace it
// with the new dispatch block.
- for (MachineBasicBlock::succ_iterator
- SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock *SMBB = *SI;
+ SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
+ BB->succ_end());
+ while (!Successors.empty()) {
+ MachineBasicBlock *SMBB = Successors.pop_back_val();
if (SMBB->isLandingPad()) {
BB->removeSuccessor(SMBB);
- SMBB->setIsLandingPad(false);
+ MBBLPads.push_back(SMBB);
}
}
@@ -5919,7 +6161,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
// executed.
for (MachineBasicBlock::reverse_iterator
II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
- if (!II->getDesc().isCall()) continue;
+ if (!II->isCall()) continue;
DenseMap<unsigned, bool> DefRegs;
for (MachineInstr::mop_iterator
@@ -5932,15 +6174,31 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MachineInstrBuilder MIB(&*II);
for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
- if (!TRC->contains(SavedRegs[i])) continue;
- if (!DefRegs[SavedRegs[i]])
- MIB.addReg(SavedRegs[i], RegState::ImplicitDefine | RegState::Dead);
+ unsigned Reg = SavedRegs[i];
+ if (Subtarget->isThumb2() &&
+ !ARM::tGPRRegisterClass->contains(Reg) &&
+ !ARM::hGPRRegisterClass->contains(Reg))
+ continue;
+ else if (Subtarget->isThumb1Only() &&
+ !ARM::tGPRRegisterClass->contains(Reg))
+ continue;
+ else if (!Subtarget->isThumb() &&
+ !ARM::GPRRegisterClass->contains(Reg))
+ continue;
+ if (!DefRegs[Reg])
+ MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
}
break;
}
}
+ // Mark all former landing pads as non-landing pads. The dispatch is the only
+ // landing pad now.
+ for (SmallVectorImpl<MachineBasicBlock*>::iterator
+ I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
+ (*I)->setIsLandingPad(false);
+
// The instruction is gone now.
MI->eraseFromParent();
@@ -6222,20 +6480,28 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return BB;
}
+ case ARM::Int_eh_sjlj_setjmp:
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ case ARM::tInt_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ EmitSjLjDispatchBlock(MI, BB);
+ return BB;
+
case ARM::ABS:
case ARM::t2ABS: {
// To insert an ABS instruction, we have to insert the
// diamond control-flow pattern. The incoming instruction knows the
// source vreg to test against 0, the destination vreg to set,
// the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
+ // true/false values to select between, and a branch opcode to use.
// It transforms
// V1 = ABS V0
// into
// V2 = MOVS V0
// BCC (branch to SinkBB if V0 >= 0)
// RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
- // SinkBB: V1 = PHI(V2, V3)
+ // SinkBB: V1 = PHI(V2, V3)
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator BBI = BB;
++BBI;
@@ -6276,19 +6542,19 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
.addReg(ARM::CPSR, RegState::Define);
// insert a bcc with opposite CC to ARMCC::MI at the end of BB
- BuildMI(BB, dl,
+ BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
.addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
// insert rsbri in RSBBB
// Note: BCC and rsbri will be converted into predicated rsbmi
// by if-conversion pass
- BuildMI(*RSBBB, RSBBB->begin(), dl,
+ BuildMI(*RSBBB, RSBBB->begin(), dl,
TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
.addReg(NewMovDstReg, RegState::Kill)
.addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
- // insert PHI in SinkBB,
+ // insert PHI in SinkBB,
// reuse ABSDstReg to not change uses of ABS instruction
BuildMI(*SinkBB, SinkBB->begin(), dl,
TII->get(ARM::PHI), ABSDstReg)
@@ -6296,7 +6562,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
.addReg(NewMovDstReg).addMBB(BB);
// remove ABS instruction
- MI->eraseFromParent();
+ MI->eraseFromParent();
// return last added BB
return SinkBB;
@@ -6306,32 +6572,40 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.hasPostISelHook()) {
+ if (!MI->hasPostISelHook()) {
assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
"Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
return;
}
+ const MCInstrDesc *MCID = &MI->getDesc();
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
// operand is still set to noreg. If needed, set the optional operand's
// register to CPSR, and remove the redundant implicit def.
//
- // e.g. ADCS (...opt:%noreg, CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
+ // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
// Rename pseudo opcodes.
unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
if (NewOpc) {
const ARMBaseInstrInfo *TII =
static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
- MI->setDesc(TII->get(NewOpc));
+ MCID = &TII->get(NewOpc);
+
+ assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
+ "converted opcode should be the same except for cc_out");
+
+ MI->setDesc(*MCID);
+
+ // Add the optional cc_out operand
+ MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
}
- unsigned ccOutIdx = MCID.getNumOperands() - 1;
+ unsigned ccOutIdx = MCID->getNumOperands() - 1;
// Any ARM instruction that sets the 's' bit should specify an optional
// "cc_out" operand in the last operand position.
- if (!MCID.hasOptionalDef() || !MCID.OpInfo[ccOutIdx].isOptionalDef()) {
+ if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
assert(!NewOpc && "Optional cc_out operand required");
return;
}
@@ -6339,7 +6613,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// since we already have an optional CPSR def.
bool definesCPSR = false;
bool deadCPSR = false;
- for (unsigned i = MCID.getNumOperands(), e = MI->getNumOperands();
+ for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();
i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
@@ -6513,7 +6787,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
default:
- assert(0 && "Invalid vector element type for padd optimization.");
+ llvm_unreachable("Invalid vector element type for padd optimization.");
}
SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
@@ -6632,41 +6906,115 @@ static SDValue PerformMULCombine(SDNode *N,
if (!C)
return SDValue();
- uint64_t MulAmt = C->getZExtValue();
+ int64_t MulAmt = C->getSExtValue();
unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+
ShiftAmt = ShiftAmt & (32 - 1);
SDValue V = N->getOperand(0);
DebugLoc DL = N->getDebugLoc();
SDValue Res;
MulAmt >>= ShiftAmt;
- if (isPowerOf2_32(MulAmt - 1)) {
- // (mul x, 2^N + 1) => (add (shl x, N), x)
- Res = DAG.getNode(ISD::ADD, DL, VT,
- V, DAG.getNode(ISD::SHL, DL, VT,
- V, DAG.getConstant(Log2_32(MulAmt-1),
- MVT::i32)));
- } else if (isPowerOf2_32(MulAmt + 1)) {
- // (mul x, 2^N - 1) => (sub (shl x, N), x)
- Res = DAG.getNode(ISD::SUB, DL, VT,
- DAG.getNode(ISD::SHL, DL, VT,
- V, DAG.getConstant(Log2_32(MulAmt+1),
- MVT::i32)),
- V);
- } else
- return SDValue();
+
+ if (MulAmt >= 0) {
+ if (isPowerOf2_32(MulAmt - 1)) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmt - 1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmt + 1)) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmt + 1),
+ MVT::i32)),
+ V);
+ } else
+ return SDValue();
+ } else {
+ uint64_t MulAmtAbs = -MulAmt;
+ if (isPowerOf2_32(MulAmtAbs + 1)) {
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmtAbs + 1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmtAbs - 1)) {
+ // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmtAbs-1),
+ MVT::i32)));
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, MVT::i32),Res);
+
+ } else
+ return SDValue();
+ }
if (ShiftAmt != 0)
- Res = DAG.getNode(ISD::SHL, DL, VT, Res,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ Res = DAG.getNode(ISD::SHL, DL, VT,
+ Res, DAG.getConstant(ShiftAmt, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
return SDValue();
}
+static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) {
+ if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse())
+ return false;
+
+ SDValue FalseVal = N.getOperand(0);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal);
+ if (!C)
+ return false;
+ if (AllOnes)
+ return C->isAllOnesValue();
+ return C->isNullValue();
+}
+
+/// formConditionalOp - Combine an operation with a conditional move operand
+/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y)
+/// (and x, (cmov -1, y, cond)) => (and.cond, x, y)
+static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG,
+ bool Commutable) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ bool isAND = N->getOpcode() == ISD::AND;
+ bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND);
+ if (!isCand && Commutable) {
+ isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND);
+ if (isCand)
+ std::swap(N0, N1);
+ }
+ if (!isCand)
+ return SDValue();
+
+ unsigned Opc = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected node");
+ case ISD::AND: Opc = ARMISD::CAND; break;
+ case ISD::OR: Opc = ARMISD::COR; break;
+ case ISD::XOR: Opc = ARMISD::CXOR; break;
+ }
+ return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0,
+ N1.getOperand(1), N1.getOperand(2), N1.getOperand(3),
+ N1.getOperand(4));
+}
+
static SDValue PerformANDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
// Attempt to use immediate-form VBIC
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
@@ -6697,6 +7045,13 @@ static SDValue PerformANDCombine(SDNode *N,
}
}
+ if (!Subtarget->isThumb1Only()) {
+ // (and x, (cmov -1, y, cond)) => (and.cond x, y)
+ SDValue CAND = formConditionalOp(N, DAG, true);
+ if (CAND.getNode())
+ return CAND;
+ }
+
return SDValue();
}
@@ -6733,6 +7088,13 @@ static SDValue PerformORCombine(SDNode *N,
}
}
+ if (!Subtarget->isThumb1Only()) {
+ // (or x, (cmov 0, y, cond)) => (or.cond x, y)
+ SDValue COR = formConditionalOp(N, DAG, true);
+ if (COR.getNode())
+ return COR;
+ }
+
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::AND)
return SDValue();
@@ -6881,6 +7243,25 @@ static SDValue PerformORCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformXORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ if (!Subtarget->isThumb1Only()) {
+ // (xor x, (cmov 0, y, cond)) => (xor.cond x, y)
+ SDValue CXOR = formConditionalOp(N, DAG, true);
+ if (CXOR.getNode())
+ return CXOR;
+ }
+
+ return SDValue();
+}
+
/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
/// the bits being cleared by the AND are not demanded by the BFI.
static SDValue PerformBFICombine(SDNode *N,
@@ -6926,13 +7307,14 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
SDValue BasePtr = LD->getBasePtr();
SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
LD->getPointerInfo(), LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ LD->isNonTemporal(), LD->isInvariant(),
+ LD->getAlignment());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, MVT::i32));
SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
LD->getPointerInfo(), LD->isVolatile(),
- LD->isNonTemporal(),
+ LD->isNonTemporal(), LD->isInvariant(),
std::min(4U, LD->getAlignment() / 2));
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
@@ -6967,15 +7349,99 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
/// ISD::STORE.
static SDValue PerformSTORECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- // Bitcast an i64 store extracted from a vector to f64.
- // Otherwise, the i64 value will be legalized to a pair of i32 values.
StoreSDNode *St = cast<StoreSDNode>(N);
+ if (St->isVolatile())
+ return SDValue();
+
+ // Optimize trunc store (of multiple scalars) to shuffle and store. First,
+ // pack all of the elements in one place. Next, store to memory in fewer
+ // chunks.
SDValue StVal = St->getValue();
- if (!ISD::isNormalStore(St) || St->isVolatile())
+ EVT VT = StVal.getValueType();
+ if (St->isTruncatingStore() && VT.isVector()) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT StVT = St->getMemoryVT();
+ unsigned NumElems = VT.getVectorNumElements();
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
+
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
+
+ unsigned SizeRatio = FromEltSz / ToEltSz;
+ assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle.
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
+ NumElems*SizeRatio);
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ DebugLoc DL = St->getDebugLoc();
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
+
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
+ DAG.getUNDEF(WideVec.getValueType()),
+ ShuffleVec.data());
+ // At this point all of the data is stored at the bottom of the
+ // register. We now need to save it to mem.
+
+ // Find the largest store unit
+ MVT StoreType = MVT::i8;
+ for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+ tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+ MVT Tp = (MVT::SimpleValueType)tp;
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
+ StoreType = Tp;
+ }
+ // Didn't find a legal store type.
+ if (!TLI.isTypeLegal(StoreType))
+ return SDValue();
+
+ // Bitcast the original vector into a vector of store-size units
+ EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
+ assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue BasePtr = St->getBasePtr();
+
+ // Perform one or more big stores into memory.
+ unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
+ for (unsigned I = 0; I < E; I++) {
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ StoreType, ShuffWide,
+ DAG.getIntPtrConstant(I));
+ SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+ Increment);
+ Chains.push_back(Ch);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0],
+ Chains.size());
+ }
+
+ if (!ISD::isNormalStore(St))
return SDValue();
+ // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
+ // ARM stores of arguments in the same cache line.
if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
- StVal.getNode()->hasOneUse() && !St->isVolatile()) {
+ StVal.getNode()->hasOneUse()) {
SelectionDAG &DAG = DCI.DAG;
DebugLoc DL = St->getDebugLoc();
SDValue BasePtr = St->getBasePtr();
@@ -6996,6 +7462,8 @@ static SDValue PerformSTORECombine(SDNode *N,
StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
+ // Bitcast an i64 store extracted from a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
SelectionDAG &DAG = DCI.DAG;
DebugLoc dl = StVal.getDebugLoc();
SDValue IntVec = StVal.getOperand(0);
@@ -7177,7 +7645,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
if (isIntrinsic) {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntNo) {
- default: assert(0 && "unexpected intrinsic for Neon base update");
+ default: llvm_unreachable("unexpected intrinsic for Neon base update");
case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
NumVecs = 1; break;
case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
@@ -7210,7 +7678,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
} else {
isLaneOp = true;
switch (N->getOpcode()) {
- default: assert(0 && "unexpected opcode for Neon base update");
+ default: llvm_unreachable("unexpected opcode for Neon base update");
case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
@@ -7703,6 +8171,18 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
+ if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
+ // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
+ // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
+ SDValue N1 = N->getOperand(1);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ SDValue N0 = N->getOperand(0);
+ if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
+ DAG.MaskedValueIsZero(N0.getOperand(0),
+ APInt::getHighBitsSet(32, 16)))
+ return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1);
+ }
+ }
// Nothing to be done for scalar shifts.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -7824,7 +8304,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
// will return -0, so vmin can only be used for unsafe math or if one of
// the operands is known to be nonzero.
if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
- !UnsafeFPMath &&
+ !DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
@@ -7846,7 +8326,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
// will return +0, so vmax can only be used for unsafe math or if one of
// the operands is known to be nonzero.
if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
- !UnsafeFPMath &&
+ !DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
@@ -7906,8 +8386,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
if (Res.getNode()) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne);
// Capture demanded bits information that would be otherwise lost.
if (KnownZero == 0xfffffffe)
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
@@ -7931,7 +8410,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
- case ISD::AND: return PerformANDCombine(N, DCI);
+ case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
+ case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
case ARMISD::BFI: return PerformBFICombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
@@ -8001,6 +8481,41 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
}
}
+static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
+ unsigned AlignCheck) {
+ return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
+ (DstAlign == 0 || DstAlign % AlignCheck == 0));
+}
+
+EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsZeroVal,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ const Function *F = MF.getFunction();
+
+ // See if we can use NEON instructions for this...
+ if (IsZeroVal &&
+ !F->hasFnAttr(Attribute::NoImplicitFloat) &&
+ Subtarget->hasNEON()) {
+ if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
+ return MVT::v4i32;
+ } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
+ return MVT::v2i32;
+ }
+ }
+
+ // Lowering to i32/i16 if the size permits.
+ if (Size >= 4) {
+ return MVT::i32;
+ } else if (Size >= 2) {
+ return MVT::i16;
+ }
+
+ // Let the target-independent logic figure it out.
+ return MVT::Other;
+}
+
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;
@@ -8188,7 +8703,6 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
if (Scale & 1) return false;
return isPowerOf2_32(Scale);
}
- break;
}
return true;
}
@@ -8198,10 +8712,12 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
/// a register against the immediate without having to materialize the
/// immediate into a register.
bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ // Thumb2 and ARM modes can use cmn for negative immediates.
if (!Subtarget->isThumb())
- return ARM_AM::getSOImmVal(Imm) != -1;
+ return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;
if (Subtarget->isThumb2())
- return ARM_AM::getT2SOImmVal(Imm) != -1;
+ return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;
+ // Thumb1 doesn't have cmn, and only 8-bit immediates.
return Imm >= 0 && Imm <= 255;
}
@@ -8388,22 +8904,20 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
}
void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
- KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
switch (Op.getOpcode()) {
default: break;
case ARMISD::CMOV: {
// Bits are known zero/one if known on the LHS and RHS.
- DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+ DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
if (KnownZero == 0 && KnownOne == 0) return;
APInt KnownZeroRHS, KnownOneRHS;
- DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
- KnownZeroRHS, KnownOneRHS, Depth+1);
+ DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
KnownZero &= KnownZeroRHS;
KnownOne &= KnownOneRHS;
return;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 5da9b27fca68..352d98001ddc 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -15,6 +15,7 @@
#ifndef ARMISELLOWERING_H
#define ARMISELLOWERING_H
+#include "ARM.h"
#include "ARMSubtarget.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -56,7 +57,11 @@ namespace llvm {
CMPFP, // ARM VFP compare instruction, sets FPSCR.
CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
FMSTAT, // ARM fmstat instruction.
+
CMOV, // ARM conditional move instructions.
+ CAND, // ARM conditional and instructions.
+ COR, // ARM conditional or instructions.
+ CXOR, // ARM conditional xor instructions.
BCC_i64,
@@ -81,7 +86,6 @@ namespace llvm {
EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
- EH_SJLJ_DISPATCHSETUP, // SjLj exception handling dispatch setup.
TC_RETURN, // Tail call return pseudo.
@@ -146,6 +150,9 @@ namespace llvm {
VMOVIMM,
VMVNIMM,
+ // Vector move f32 immediate:
+ VMOVFPIMM,
+
// Vector duplicate:
VDUP,
VDUPLANE,
@@ -266,9 +273,14 @@ namespace llvm {
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses. of the specified type.
- /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
+ virtual EVT getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsZeroVal,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const;
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
@@ -303,7 +315,6 @@ namespace llvm {
SelectionDAG &DAG) const;
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
@@ -338,7 +349,7 @@ namespace llvm {
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
- virtual TargetRegisterClass *getRegClassFor(EVT VT) const;
+ virtual const TargetRegisterClass *getRegClassFor(EVT VT) const;
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
/// be used for loads / stores from the global.
@@ -402,7 +413,6 @@ namespace llvm {
ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -424,6 +434,8 @@ namespace llvm {
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const;
@@ -452,7 +464,7 @@ namespace llvm {
virtual SDValue
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -481,7 +493,7 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
- virtual bool isUsedByReturnOnly(SDNode *N) const;
+ virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
@@ -512,9 +524,6 @@ namespace llvm {
bool signExtend,
ARMCC::CondCodes Cond) const;
- void EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
- MachineBasicBlock *DispatchBB) const;
-
void SetupEntryBlockForSjLj(MachineInstr *MI,
MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB, int FI) const;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 7cbc9111dec3..1d38bcf9e843 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1,4 +1,4 @@
-//===- ARMInstrFormats.td - ARM Instruction Formats ----------*- tablegen -*-=//
+//===-- ARMInstrFormats.td - ARM Instruction Formats -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -174,7 +174,7 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
// ARM special operands for disassembly only.
//
-def SetEndAsmOperand : AsmOperandClass {
+def SetEndAsmOperand : ImmAsmOperand {
let Name = "SetEndImm";
let ParserMethod = "parseSetEndImm";
}
@@ -201,21 +201,29 @@ def msr_mask : Operand<i32> {
// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
// 64 64 - <imm> is encoded in imm6<5:0>
+def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; }
def shr_imm8 : Operand<i32> {
let EncoderMethod = "getShiftRight8Imm";
let DecoderMethod = "DecodeShiftRight8Imm";
+ let ParserMatchClass = shr_imm8_asm_operand;
}
+def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; }
def shr_imm16 : Operand<i32> {
let EncoderMethod = "getShiftRight16Imm";
let DecoderMethod = "DecodeShiftRight16Imm";
+ let ParserMatchClass = shr_imm16_asm_operand;
}
+def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; }
def shr_imm32 : Operand<i32> {
let EncoderMethod = "getShiftRight32Imm";
let DecoderMethod = "DecodeShiftRight32Imm";
+ let ParserMatchClass = shr_imm32_asm_operand;
}
+def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; }
def shr_imm64 : Operand<i32> {
let EncoderMethod = "getShiftRight64Imm";
let DecoderMethod = "DecodeShiftRight64Imm";
+ let ParserMatchClass = shr_imm64_asm_operand;
}
//===----------------------------------------------------------------------===//
@@ -231,6 +239,14 @@ class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
+class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>;
+
+
+class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+ Requires<[HasVFP2]>;
+class NEONMnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+ Requires<[HasNEON]>;
//===----------------------------------------------------------------------===//
// ARM Instruction templates.
@@ -274,6 +290,14 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
class Encoding {
field bits<32> Inst;
+ // Mask of bits that cause an encoding to be UNPREDICTABLE.
+ // If a bit is set, then if the corresponding bit in the
+ // target encoding differs from its value in the "Inst" field,
+ // the instruction is UNPREDICTABLE (SoftFail in abstract parlance).
+ field bits<32> Unpredictable = 0;
+ // SoftFail is the generic name for this field, but we alias it so
+ // as to make it more obvious what it means in ARM-land.
+ field bits<32> SoftFail = Unpredictable;
}
class InstARM<AddrMode am, int sz, IndexMode im,
@@ -290,6 +314,32 @@ class InstThumb<AddrMode am, int sz, IndexMode im,
let DecoderNamespace = "Thumb";
}
+// Pseudo-instructions for alternate assembly syntax (never used by codegen).
+// These are aliases that require C++ handling to convert to the target
+// instruction, while InstAliases can be handled directly by tblgen.
+class AsmPseudoInst<string asm, dag iops>
+ : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain,
+ "", NoItinerary> {
+ let OutOperandList = (outs);
+ let InOperandList = iops;
+ let Pattern = [];
+ let isCodeGenOnly = 0; // So we get asm matcher for it.
+ let AsmString = asm;
+ let isPseudo = 1;
+}
+
+class ARMAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[IsARM]>;
+class tAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[IsThumb]>;
+class t2AsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[IsThumb2]>;
+class VFP2AsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[HasVFP2]>;
+class NEONAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+ Requires<[HasNEON]>;
+
+// Pseudo instructions for the code generator.
class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
: InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo,
GenericDomain, "", itin> {
@@ -481,6 +531,8 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
let Inst{15-12} = Rt;
let Inst{11-4} = 0b00001001;
let Inst{3-0} = Rt2;
+
+ let DecoderMethod = "DecodeSwap";
}
// addrmode1 instructions
@@ -792,7 +844,7 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
}
// PKH instructions
-def PKHLSLAsmOperand : AsmOperandClass {
+def PKHLSLAsmOperand : ImmAsmOperand {
let Name = "PKHLSLImm";
let ParserMethod = "parsePKHLSLImm";
}
@@ -1550,8 +1602,11 @@ class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
dag oops, dag iops, InstrItinClass itin, string opc, string asm,
list<dag> pattern>
: AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
+ bits<5> fbits;
// size (fixed-point number): sx == 0 ? 16 : 32
let Inst{7} = op5; // sx
+ let Inst{5} = fbits{0};
+ let Inst{3-0} = fbits{4-1};
}
// VFP conversion instructions, if no NEON
@@ -1963,3 +2018,54 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [HasNEON,UseNEONForFP];
}
+
+// VFP/NEON Instruction aliases for type suffices.
+class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> :
+ InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>;
+
+multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> {
+ def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+}
+
+multiclass NEONDTAnyInstAlias<string opc, string asm, dag Result> {
+ let Predicates = [HasNEON] in {
+ def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+}
+}
+
+// The same alias classes using AsmPseudo instead, for the more complex
+// stuff in NEON that InstAlias can't quite handle.
+// Note that we can't use anonymous defm references here like we can
+// above, as we care about the ultimate instruction enum names generated, unlike
+// for instalias defs.
+class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> :
+ AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>;
+
+// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM.
+def : TokenAlias<".s8", ".i8">;
+def : TokenAlias<".u8", ".i8">;
+def : TokenAlias<".s16", ".i16">;
+def : TokenAlias<".u16", ".i16">;
+def : TokenAlias<".s32", ".i32">;
+def : TokenAlias<".u32", ".i32">;
+def : TokenAlias<".s64", ".i64">;
+def : TokenAlias<".u64", ".i64">;
+
+def : TokenAlias<".i8", ".8">;
+def : TokenAlias<".i16", ".16">;
+def : TokenAlias<".i32", ".32">;
+def : TokenAlias<".i64", ".64">;
+
+def : TokenAlias<".p8", ".8">;
+def : TokenAlias<".p16", ".16">;
+
+def : TokenAlias<".f32", ".32">;
+def : TokenAlias<".f64", ".64">;
+def : TokenAlias<".f", ".f32">;
+def : TokenAlias<".d", ".f64">;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 48da03f63bb9..b8f607eb4c55 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//===-- ARMInstrInfo.cpp - ARM Instruction Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -21,12 +21,29 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
using namespace llvm;
ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI(*this, STI) {
}
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ if (hasNOP()) {
+ NopInst.setOpcode(ARM::NOP);
+ NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+ } else {
+ NopInst.setOpcode(ARM::MOVr);
+ NopInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ NopInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+ }
+}
+
unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
switch (Opc) {
default: break;
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index f2c7bdc31be9..5d3e059b7038 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -1,4 +1,4 @@
-//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
+//===-- ARMInstrInfo.h - ARM Instruction Information ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,11 +14,10 @@
#ifndef ARMINSTRUCTIONINFO_H
#define ARMINSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMRegisterInfo.h"
#include "ARMSubtarget.h"
-#include "ARM.h"
namespace llvm {
class ARMSubtarget;
@@ -28,6 +27,9 @@ class ARMInstrInfo : public ARMBaseInstrInfo {
public:
explicit ARMInstrInfo(const ARMSubtarget &STI);
+ /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+ void getNoopForMachoTarget(MCInst &NopInst) const;
+
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 2cf0f09ffc64..3caaa2366123 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -58,8 +58,6 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
-def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-
def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
@@ -143,9 +141,6 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
-def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP",
- SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>;
-
def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
[SDNPHasChain]>;
@@ -184,6 +179,8 @@ def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
AssemblerPredicate<"FeatureVFP2">;
def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
AssemblerPredicate<"FeatureVFP3">;
+def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
+ AssemblerPredicate<"FeatureVFP4">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
@@ -211,16 +208,20 @@ def IsARClass : Predicate<"!Subtarget->isMClass()">,
AssemblerPredicate<"!FeatureMClass">;
def IsARM : Predicate<"!Subtarget->isThumb()">,
AssemblerPredicate<"!ModeThumb">;
-def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
-def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
-def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">,
- AssemblerPredicate<"ModeNaCl">;
+def IsIOS : Predicate<"Subtarget->isTargetIOS()">;
+def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">;
+def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
// FIXME: Eventually this will be just "hasV6T2Ops".
def UseMovt : Predicate<"Subtarget->useMovt()">;
def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
+// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
+// But only select them if more precision in FP computation is allowed.
+def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision">;
+def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4()">;
+
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -244,25 +245,28 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
}]>;
-/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
-def imm1_15 : ImmLeaf<i32, [{
- return (int32_t)Imm >= 1 && (int32_t)Imm < 16;
-}]>;
-
/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
def imm16_31 : ImmLeaf<i32, [{
return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
}]>;
-def so_imm_neg :
- PatLeaf<(imm), [{
- return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1;
- }], so_imm_neg_XFORM>;
+def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
+def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
+ int64_t Value = -(int)N->getZExtValue();
+ return Value && ARM_AM::getSOImmVal(Value) != -1;
+ }], so_imm_neg_XFORM> {
+ let ParserMatchClass = so_imm_neg_asmoperand;
+}
-def so_imm_not :
- PatLeaf<(imm), [{
+// Note: this pattern doesn't require an encoder method and such, as it's
+// only used on aliases (Pat<> and InstAlias<>). The actual encoding
+// is handled by the destination instructions, which use so_imm.
+def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; }
+def so_imm_not : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
- }], so_imm_not_XFORM>;
+ }], so_imm_not_XFORM> {
+ let ParserMatchClass = so_imm_not_asmoperand;
+}
// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
def sext_16_node : PatLeaf<(i32 GPR:$a), [{
@@ -279,14 +283,6 @@ def lo16AllZero : PatLeaf<(i32 imm), [{
return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
}], hi16>;
-/// imm0_65535 - An immediate is in the range [0.65535].
-def Imm0_65535AsmOperand: AsmOperandClass { let Name = "Imm0_65535"; }
-def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
- return Imm >= 0 && Imm < 65536;
-}]> {
- let ParserMatchClass = Imm0_65535AsmOperand;
-}
-
class BinOpWithFlagFrag<dag res> :
PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>;
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
@@ -321,6 +317,9 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
// Operand Definitions.
//
+// Immediate operands with a shared generic asm render method.
+class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; }
+
// Branch target.
// FIXME: rename brtarget to t2_brtarget
def brtarget : Operand<OtherVT> {
@@ -352,13 +351,11 @@ def bltarget : Operand<i32> {
// Call target for ARM. Handles conditional/unconditional
// FIXME: rename bl_target to t2_bltarget?
def bl_target : Operand<i32> {
- // Encoded the same as branch targets.
- let EncoderMethod = "getARMBranchTargetOpValue";
+ let EncoderMethod = "getARMBLTargetOpValue";
let OperandType = "OPERAND_PCREL";
}
def blx_target : Operand<i32> {
- // Encoded the same as branch targets.
let EncoderMethod = "getARMBLXTargetOpValue";
let OperandType = "OPERAND_PCREL";
}
@@ -475,6 +472,7 @@ def shift_so_reg_reg : Operand<i32>, // reg reg imm
let EncoderMethod = "getSORegRegOpValue";
let PrintMethod = "printSORegRegOperand";
let DecoderMethod = "DecodeSORegRegOperand";
+ let ParserMatchClass = ShiftedRegAsmOperand;
let MIOperandInfo = (ops GPR, GPR, i32imm);
}
@@ -485,13 +483,14 @@ def shift_so_reg_imm : Operand<i32>, // reg reg imm
let EncoderMethod = "getSORegImmOpValue";
let PrintMethod = "printSORegImmOperand";
let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
let MIOperandInfo = (ops GPR, i32imm);
}
// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits.
-def SOImmAsmOperand: AsmOperandClass { let Name = "ARMSOImm"; }
+def SOImmAsmOperand: ImmAsmOperand { let Name = "ARMSOImm"; }
def so_imm : Operand<i32>, ImmLeaf<i32, [{
return ARM_AM::getSOImmVal(Imm) != -1;
}]> {
@@ -515,16 +514,60 @@ def arm_i32imm : PatLeaf<(imm), [{
return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
}]>;
+/// imm0_1 predicate - Immediate in the range [0,1].
+def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; }
+def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
+
+/// imm0_3 predicate - Immediate in the range [0,3].
+def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
+def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
+
/// imm0_7 predicate - Immediate in the range [0,7].
-def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; }
+def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 8;
}]> {
let ParserMatchClass = Imm0_7AsmOperand;
}
+/// imm8 predicate - Immediate is exactly 8.
+def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; }
+def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> {
+ let ParserMatchClass = Imm8AsmOperand;
+}
+
+/// imm16 predicate - Immediate is exactly 16.
+def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; }
+def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> {
+ let ParserMatchClass = Imm16AsmOperand;
+}
+
+/// imm32 predicate - Immediate is exactly 32.
+def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; }
+def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
+ let ParserMatchClass = Imm32AsmOperand;
+}
+
+/// imm1_7 predicate - Immediate in the range [1,7].
+def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
+def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
+ let ParserMatchClass = Imm1_7AsmOperand;
+}
+
+/// imm1_15 predicate - Immediate in the range [1,15].
+def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; }
+def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> {
+ let ParserMatchClass = Imm1_15AsmOperand;
+}
+
+/// imm1_31 predicate - Immediate in the range [1,31].
+def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; }
+def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
+ let ParserMatchClass = Imm1_31AsmOperand;
+}
+
/// imm0_15 predicate - Immediate in the range [0,15].
-def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; }
+def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; }
def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 16;
}]> {
@@ -532,33 +575,57 @@ def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
}
/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
-def Imm0_31AsmOperand: AsmOperandClass { let Name = "Imm0_31"; }
+def Imm0_31AsmOperand: ImmAsmOperand { let Name = "Imm0_31"; }
def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 32;
}]> {
let ParserMatchClass = Imm0_31AsmOperand;
}
+/// imm0_32 predicate - True if the 32-bit immediate is in the range [0,32].
+def Imm0_32AsmOperand: ImmAsmOperand { let Name = "Imm0_32"; }
+def imm0_32 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 32;
+}]> {
+ let ParserMatchClass = Imm0_32AsmOperand;
+}
+
+/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63].
+def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; }
+def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 64;
+}]> {
+ let ParserMatchClass = Imm0_63AsmOperand;
+}
+
/// imm0_255 predicate - Immediate in the range [0,255].
-def Imm0_255AsmOperand : AsmOperandClass { let Name = "Imm0_255"; }
+def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
let ParserMatchClass = Imm0_255AsmOperand;
}
+/// imm0_65535 - An immediate is in the range [0.65535].
+def Imm0_65535AsmOperand: ImmAsmOperand { let Name = "Imm0_65535"; }
+def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 65536;
+}]> {
+ let ParserMatchClass = Imm0_65535AsmOperand;
+}
+
// imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference
// a relocatable expression.
//
// FIXME: This really needs a Thumb version separate from the ARM version.
// While the range is the same, and can thus use the same match class,
// the encoding is different so it should have a different encoder method.
-def Imm0_65535ExprAsmOperand: AsmOperandClass { let Name = "Imm0_65535Expr"; }
+def Imm0_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm0_65535Expr"; }
def imm0_65535_expr : Operand<i32> {
let EncoderMethod = "getHiLo16ImmOpValue";
let ParserMatchClass = Imm0_65535ExprAsmOperand;
}
/// imm24b - True if the 32-bit immediate is encodable in 24 bits.
-def Imm24bitAsmOperand: AsmOperandClass { let Name = "Imm24bit"; }
+def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; }
def imm24b : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm <= 0xffffff;
}]> {
@@ -572,6 +639,7 @@ def BitfieldAsmOperand : AsmOperandClass {
let Name = "Bitfield";
let ParserMethod = "parseBitfield";
}
+
def bf_inv_mask_imm : Operand<i32>,
PatLeaf<(imm), [{
return ARM::isBitFieldInvertedMask(N->getZExtValue());
@@ -670,7 +738,7 @@ def postidx_reg : Operand<i32> {
let DecoderMethod = "DecodePostIdxReg";
let PrintMethod = "printPostIdxRegOperand";
let ParserMatchClass = PostIdxRegAsmOperand;
- let MIOperandInfo = (ops GPR, i32imm);
+ let MIOperandInfo = (ops GPRnopc, i32imm);
}
@@ -699,7 +767,7 @@ def am2offset_reg : Operand<i32>,
let PrintMethod = "printAddrMode2OffsetOperand";
// When using this for assembly, it's always as a post-index offset.
let ParserMatchClass = PostIdxRegShiftedAsmOperand;
- let MIOperandInfo = (ops GPR, i32imm);
+ let MIOperandInfo = (ops GPRnopc, i32imm);
}
// FIXME: am2offset_imm should only need the immediate, not the GPR. Having
@@ -711,7 +779,7 @@ def am2offset_imm : Operand<i32>,
let EncoderMethod = "getAddrMode2OffsetOpValue";
let PrintMethod = "printAddrMode2OffsetOperand";
let ParserMatchClass = AM2OffsetImmAsmOperand;
- let MIOperandInfo = (ops GPR, i32imm);
+ let MIOperandInfo = (ops GPRnopc, i32imm);
}
@@ -799,6 +867,9 @@ def addrmode6dup : Operand<i32>,
let PrintMethod = "printAddrMode6Operand";
let MIOperandInfo = (ops GPR:$addr, i32imm);
let EncoderMethod = "getAddrMode6DupAddressOpValue";
+ // FIXME: This is close, but not quite right. The alignment specifier is
+ // different.
+ let ParserMatchClass = AddrMode6AsmOperand;
}
// addrmodepc := pc + reg
@@ -1041,69 +1112,58 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
}
-/// AsI1_rbin_s_is - Same as AsI1_rbin_s_is except it sets 's' bit by default.
+/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default.
///
/// These opcodes will be converted to the real non-S opcodes by
-/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
-let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
-multiclass AsI1_rbin_s_is<bits<4> opcod, string opc,
- InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Commutable = 0> {
- def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
- iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>;
-
- def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
- iir, opc, "\t$Rd, $Rn, $Rm",
- [/* pattern left blank */]>;
-
- def rsi : AsI1<opcod, (outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
- iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn))]>;
-
- def rsr : AsI1<opcod, (outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
- iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn))]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> shift;
- let Inst{25} = 0;
- let Inst{19-16} = Rn;
- let Inst{15-12} = Rd;
- let Inst{11-8} = shift{11-8};
- let Inst{7} = 0;
- let Inst{6-5} = shift{6-5};
- let Inst{4} = 1;
- let Inst{3-0} = shift{3-0};
+/// AdjustInstrPostInstrSelection after giving them an optional CPSR operand.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis, PatFrag opnode,
+ bit Commutable = 0> {
+ def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
+ 4, iii,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>;
+
+ def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p),
+ 4, iir,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]> {
+ let isCommutable = Commutable;
}
+ def rsi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
+ so_reg_imm:$shift))]>;
+
+ def rsr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
+ so_reg_reg:$shift))]>;
}
}
-/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default.
-///
-/// These opcodes will be converted to the real non-S opcodes by
-/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
-let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
-multiclass AsI1_bin_s_irs<bits<4> opcod, string opc,
- InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Commutable = 0> {
- def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
- iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>;
- def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
- iir, opc, "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>;
- def rsi : AsI1<opcod, (outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
- iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift))]>;
+/// AsI1_rbin_s_is - Same as AsI1_bin_s_irs, except selection DAG
+/// operands are reversed.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis, PatFrag opnode,
+ bit Commutable = 0> {
+ def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
+ 4, iii,
+ [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>;
- def rsr : AsI1<opcod, (outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
- iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift))]>;
+ def rsi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift,
+ GPR:$Rn))]>;
+
+ def rsr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
+ 4, iis,
+ [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift,
+ GPR:$Rn))]>;
}
}
@@ -1272,10 +1332,10 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{4} = 0;
let Inst{3-0} = shift{3-0};
}
- def rsr : AsI1<opcod, (outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_reg:$shift),
+ def rsr : AsI1<opcod, (outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, so_reg_reg:$shift),
DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift, CPSR))]>,
+ [(set GPRnopc:$Rd, CPSR, (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>,
Requires<[IsARM]> {
bits<4> Rd;
bits<4> Rn;
@@ -1309,7 +1369,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
cc_out:$s)>,
Requires<[IsARM]>;
def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+ (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPRnopc:$Rdn, GPRnopc:$Rdn,
so_reg_reg:$shift, pred:$p,
cc_out:$s)>,
Requires<[IsARM]>;
@@ -1550,7 +1610,7 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
}
// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops.
-// (These psuedos use a hand-written selection code).
+// (These pseudos use a hand-written selection code).
let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in {
def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2),
@@ -1652,7 +1712,7 @@ class CPS<dag iops, string asm_ops>
let Inst{27-20} = 0b00010000;
let Inst{19-18} = imod;
let Inst{17} = M; // Enabled if mode is set;
- let Inst{16} = 0;
+ let Inst{16-9} = 0b00000000;
let Inst{8-6} = iflags;
let Inst{5} = 0;
let Inst{4-0} = mode;
@@ -1839,20 +1899,17 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
}
}
-// All calls clobber the non-callee saved registers. SP is marked as
-// a use to prevent stack-pointer assignments that appear immediately
-// before calls from potentially appearing dead.
+// SP is marked as a use to prevent stack-pointer assignments that appear
+// immediately before calls from potentially appearing dead.
let isCall = 1,
- // On non-Darwin platforms R9 is callee-saved.
// FIXME: Do we really need a non-predicated version? If so, it should
// at least be a pseudo instruction expanding to the predicated version
// at MC lowering time.
- Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
- Uses = [SP] in {
+ Defs = [LR], Uses = [SP] in {
def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
IIC_Br, "bl\t$func",
[(ARMcall tglobaladdr:$func)]>,
- Requires<[IsARM, IsNotDarwin]> {
+ Requires<[IsARM]> {
let Inst{31-28} = 0b1110;
bits<24> func;
let Inst{23-0} = func;
@@ -1862,7 +1919,7 @@ let isCall = 1,
def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops),
IIC_Br, "bl", "\t$func",
[(ARMcall_pred tglobaladdr:$func)]>,
- Requires<[IsARM, IsNotDarwin]> {
+ Requires<[IsARM]> {
bits<24> func;
let Inst{23-0} = func;
let DecoderMethod = "DecodeBranchImmInstruction";
@@ -1872,7 +1929,7 @@ let isCall = 1,
def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
IIC_Br, "blx\t$func",
[(ARMcall GPR:$func)]>,
- Requires<[IsARM, HasV5T, IsNotDarwin]> {
+ Requires<[IsARM, HasV5T]> {
bits<4> func;
let Inst{31-4} = 0b1110000100101111111111110011;
let Inst{3-0} = func;
@@ -1881,7 +1938,7 @@ let isCall = 1,
def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
IIC_Br, "blx", "\t$func",
[(ARMcall_pred GPR:$func)]>,
- Requires<[IsARM, HasV5T, IsNotDarwin]> {
+ Requires<[IsARM, HasV5T]> {
bits<4> func;
let Inst{27-4} = 0b000100101111111111110011;
let Inst{3-0} = func;
@@ -1891,55 +1948,19 @@ let isCall = 1,
// Note: Restrict $func to the tGPR regclass to prevent it being in LR.
def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, HasV4T, IsNotDarwin]>;
+ Requires<[IsARM, HasV4T]>;
// ARMv4
def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, NoV4T, IsNotDarwin]>;
-}
-
-let isCall = 1,
- // On Darwin R9 is call-clobbered.
- // R7 is marked as a use to prevent frame-pointer assignments from being
- // moved above / below calls.
- Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
- Uses = [R7, SP] in {
- def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops),
- 4, IIC_Br,
- [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>,
- Requires<[IsARM, IsDarwin]>;
-
- def BLr9_pred : ARMPseudoExpand<(outs),
- (ins bl_target:$func, pred:$p, variable_ops),
- 4, IIC_Br,
- [(ARMcall_pred tglobaladdr:$func)],
- (BL_pred bl_target:$func, pred:$p)>,
- Requires<[IsARM, IsDarwin]>;
-
- // ARMv5T and above
- def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops),
- 4, IIC_Br,
- [(ARMcall GPR:$func)],
- (BLX GPR:$func)>,
- Requires<[IsARM, HasV5T, IsDarwin]>;
-
- def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops),
- 4, IIC_Br,
- [(ARMcall_pred GPR:$func)],
- (BLX_pred GPR:$func, pred:$p)>,
- Requires<[IsARM, HasV5T, IsDarwin]>;
+ Requires<[IsARM, NoV4T]>;
- // ARMv4T
- // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
- def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
- 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, HasV4T, IsDarwin]>;
-
- // ARMv4
- def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
- 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, NoV4T, IsDarwin]>;
+ // mov lr, pc; b if callee is marked noreturn to avoid confusing the
+ // return stack predictor.
+ def BMOVPCB_CALL : ARMPseudoInst<(outs),
+ (ins bl_target:$func, variable_ops),
+ 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
+ Requires<[IsARM]>;
}
let isBranch = 1, isTerminator = 1 in {
@@ -2006,47 +2027,22 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
// Tail calls.
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
- // Darwin versions.
- let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
- Uses = [SP] in {
- def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
- IIC_Br, []>, Requires<[IsDarwin]>;
-
- def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
- IIC_Br, []>, Requires<[IsDarwin]>;
-
- def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops),
- 4, IIC_Br, [],
- (Bcc br_target:$dst, (ops 14, zero_reg))>,
- Requires<[IsARM, IsDarwin]>;
-
- def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
- 4, IIC_Br, [],
- (BX GPR:$dst)>,
- Requires<[IsARM, IsDarwin]>;
-
- }
-
- // Non-Darwin versions (the difference is R9).
- let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
- Uses = [SP] in {
- def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
- IIC_Br, []>, Requires<[IsNotDarwin]>;
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
+ def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+ IIC_Br, []>;
- def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
- IIC_Br, []>, Requires<[IsNotDarwin]>;
+ def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+ IIC_Br, []>;
- def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops),
- 4, IIC_Br, [],
- (Bcc br_target:$dst, (ops 14, zero_reg))>,
- Requires<[IsARM, IsNotDarwin]>;
+ def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (Bcc br_target:$dst, (ops 14, zero_reg))>,
+ Requires<[IsARM]>;
- def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
- 4, IIC_Br, [],
- (BX GPR:$dst)>,
- Requires<[IsARM, IsNotDarwin]>;
- }
+ def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+ 4, IIC_Br, [],
+ (BX GPR:$dst)>,
+ Requires<[IsARM]>;
}
// Secure Monitor Call is a system instruction.
@@ -2145,7 +2141,7 @@ def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> {
}
//===----------------------------------------------------------------------===//
-// Load / store Instructions.
+// Load / Store Instructions.
//
// Load
@@ -2197,9 +2193,10 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
}
// Indexed loads
-multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
+multiclass AI2_ldridx<bit isByte, string opc,
+ InstrItinClass iii, InstrItinClass iir> {
def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins addrmode_imm12:$addr), IndexModePre, LdFrm, itin,
+ (ins addrmode_imm12:$addr), IndexModePre, LdFrm, iii,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<17> addr;
let Inst{25} = 0;
@@ -2211,7 +2208,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
}
def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins ldst_so_reg:$addr), IndexModePre, LdFrm, itin,
+ (ins ldst_so_reg:$addr), IndexModePre, LdFrm, iir,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<17> addr;
let Inst{25} = 1;
@@ -2225,7 +2222,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$addr, am2offset_reg:$offset),
- IndexModePost, LdFrm, itin,
+ IndexModePost, LdFrm, iir,
opc, "\t$Rt, $addr, $offset",
"$addr.base = $Rn_wb", []> {
// {12} isAdd
@@ -2242,7 +2239,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
def _POST_IMM : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$addr, am2offset_imm:$offset),
- IndexModePost, LdFrm, itin,
+ IndexModePost, LdFrm, iii,
opc, "\t$Rt, $addr, $offset",
"$addr.base = $Rn_wb", []> {
// {12} isAdd
@@ -2260,8 +2257,10 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
}
let mayLoad = 1, neverHasSideEffects = 1 in {
-defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_ru>;
-defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>;
+// FIXME: for LDR_PRE_REG etc. the itineray should be either IIC_iLoad_ru or
+// IIC_iLoad_siu depending on whether it the offset register is shifted.
+defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_iu, IIC_iLoad_ru>;
+defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>;
}
multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
@@ -2416,7 +2415,7 @@ multiclass AI3ldrT<bits<4> op, string opc> {
let Inst{3-0} = offset{3-0};
let AsmMatchConverter = "cvtLdExtTWriteBackImm";
}
- def r : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb),
+ def r : AI3ldstidxT<op, 1, (outs GPRnopc:$Rt, GPRnopc:$base_wb),
(ins addr_offset_none:$addr, postidx_reg:$Rm),
IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc,
"\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> {
@@ -2424,8 +2423,10 @@ multiclass AI3ldrT<bits<4> op, string opc> {
let Inst{23} = Rm{4};
let Inst{22} = 0;
let Inst{11-8} = 0;
+ let Unpredictable{11-8} = 0b1111;
let Inst{3-0} = Rm{3-0};
let AsmMatchConverter = "cvtLdExtTWriteBackReg";
+ let DecoderMethod = "DecodeLDR";
}
}
@@ -2451,10 +2452,11 @@ def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
}
// Indexed stores
-multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
+multiclass AI2_stridx<bit isByte, string opc,
+ InstrItinClass iii, InstrItinClass iir> {
def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre,
- StFrm, itin,
+ StFrm, iii,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<17> addr;
let Inst{25} = 0;
@@ -2467,7 +2469,7 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
(ins GPR:$Rt, ldst_so_reg:$addr),
- IndexModePre, StFrm, itin,
+ IndexModePre, StFrm, iir,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<17> addr;
let Inst{25} = 1;
@@ -2480,7 +2482,7 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
}
def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
- IndexModePost, StFrm, itin,
+ IndexModePost, StFrm, iir,
opc, "\t$Rt, $addr, $offset",
"$addr.base = $Rn_wb", []> {
// {12} isAdd
@@ -2497,7 +2499,7 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
def _POST_IMM : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
- IndexModePost, StFrm, itin,
+ IndexModePost, StFrm, iii,
opc, "\t$Rt, $addr, $offset",
"$addr.base = $Rn_wb", []> {
// {12} isAdd
@@ -2514,8 +2516,10 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
}
let mayStore = 1, neverHasSideEffects = 1 in {
-defm STR : AI2_stridx<0, "str", IIC_iStore_ru>;
-defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_ru>;
+// FIXME: for STR_PRE_REG etc. the itineray should be either IIC_iStore_ru or
+// IIC_iStore_siu depending on whether it the offset register is shifted.
+defm STR : AI2_stridx<0, "str", IIC_iStore_iu, IIC_iStore_ru>;
+defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_iu, IIC_iStore_bh_ru>;
}
def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr,
@@ -2745,23 +2749,25 @@ defm STRHT : AI3strT<0b1011, "strht">;
// Load / store multiple Instructions.
//
-multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
+multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
InstrItinClass itin, InstrItinClass itin_upd> {
// IA is the default, so no need for an explicit suffix on the
// mnemonic here. Without it is the cannonical spelling.
def IA :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def IA_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2770,16 +2776,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def DA :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b00; // Decrement After
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def DA_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b00; // Decrement After
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2788,16 +2796,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def DB :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def DB_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2806,16 +2816,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def IB :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b11; // Increment Before
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def IB_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b11; // Increment Before
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2826,10 +2838,12 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
let neverHasSideEffects = 1 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
-defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>;
+defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m,
+ IIC_iLoad_mu>;
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
+defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m,
+ IIC_iStore_mu>;
} // neverHasSideEffects
@@ -2843,6 +2857,16 @@ def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
(LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
RegConstraint<"$Rn = $wb">;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m,
+ IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m,
+ IIC_iStore_mu>;
+
+
+
//===----------------------------------------------------------------------===//
// Move Instructions.
//
@@ -2860,7 +2884,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
let Inst{15-12} = Rd;
}
-def : ARMInstAlias<"movs${p} $Rd, $Rm",
+def : ARMInstAlias<"movs${p} $Rd, $Rm",
(MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
// A version for the smaller set of tail call registers.
@@ -3080,20 +3104,18 @@ defm SUB : AsI1_bin_irs<0b0010, "sub",
// ADD and SUB with 's' bit set.
//
-// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the
-// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by
+// Currently, ADDS/SUBS are pseudo opcodes that exist only in the
+// selection DAG. They are "lowered" to real ADD/SUB opcodes by
// AdjustInstrPostInstrSelection where we determine whether or not to
// set the "s" bit based on CPSR liveness.
//
-// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
+// FIXME: Eliminate ADDS/SUBS pseudo opcodes after adding tablegen
// support for an optional CPSR definition that corresponds to the DAG
// node's second value. We can then eliminate the implicit def of CPSR.
-defm ADDS : AsI1_bin_s_irs<0b0100, "add",
- IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
-defm SUBS : AsI1_bin_s_irs<0b0010, "sub",
- IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
defm ADC : AI1_adde_sube_irs<0b0101, "adc",
BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>,
@@ -3108,9 +3130,8 @@ defm RSB : AsI1_rbin_irs <0b0011, "rsb",
// FIXME: Eliminate them if we can write def : Pat patterns which defines
// CPSR and the implicit def of CPSR is not needed.
-defm RSBS : AsI1_rbin_s_is<0b0011, "rsb",
- IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
defm RSC : AI1_rsc_irs<0b0111, "rsc",
BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
@@ -3153,6 +3174,8 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
let Inst{3-0} = Rm;
+
+ let Unpredictable{11-8} = 0b1111;
}
// Saturating add/subtract
@@ -3445,19 +3468,20 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
// property. Remove them when it's possible to add those properties
// on an individual MachineInstr, not just an instuction description.
let isCommutable = 1 in {
-def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm),
IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
+ [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>,
Requires<[IsARM, HasV6]> {
let Inst{15-12} = 0b0000;
+ let Unpredictable{15-12} = 0b1111;
}
let Constraints = "@earlyclobber $Rd" in
-def MULv5: ARMPseudoExpand<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm,
pred:$p, cc_out:$s),
4, IIC_iMUL32,
- [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))],
- (MUL GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
+ (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
Requires<[IsARM, NoV6]>;
}
@@ -3952,10 +3976,13 @@ def BCCZi64 : PseudoInst<(outs),
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
+
+let isCommutable = 1 in
def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
4, IIC_iCMOVr,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd">;
+
def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$false, so_reg_imm:$shift, pred:$p),
4, IIC_iCMOVsr,
@@ -3996,8 +4023,44 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
4, IIC_iCMOVi,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd">;
+
+// Conditional instructions
+multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi,
+ Instruction irsr,
+ InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis> {
+ def ri : ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s),
+ 4, iii, [],
+ (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ def rr : ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, iir, [],
+ (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ def rsi : ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s),
+ 4, iis, [],
+ (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s),
+ 4, iis, [],
+ (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+}
+
+defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+
} // neverHasSideEffects
+
//===----------------------------------------------------------------------===//
// Atomic operations intrinsics
//
@@ -4076,10 +4139,10 @@ let usesCustomInserter = 1 in {
[(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+ [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+ [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_ADD_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
@@ -4106,10 +4169,10 @@ let usesCustomInserter = 1 in {
[(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+ [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+ [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_ADD_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
@@ -4136,10 +4199,10 @@ let usesCustomInserter = 1 in {
[(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+ [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+ [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>;
def ATOMIC_SWAP_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
@@ -4185,14 +4248,14 @@ def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>;
def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
-}
-
-let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
+let hasExtraSrcRegAllocReq = 1 in
def STREXD : AIstrex<0b01, (outs GPR:$Rd),
(ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr),
NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> {
let DecoderMethod = "DecodeDoubleRegStore";
}
+}
+
def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>,
Requires<[IsARM, HasV7]> {
@@ -4451,10 +4514,16 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
(outs GPR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
@@ -4488,10 +4557,16 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm",
+ (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
(outs GPR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm",
+ (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
imm:$CRm, imm:$opc2),
@@ -4635,7 +4710,8 @@ let isCall = 1,
// no encoding information is necessary.
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
- QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], hasSideEffects = 1, isBarrier = 1 in {
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ],
+ hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
NoItinerary,
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
@@ -4644,31 +4720,37 @@ let Defs =
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
- hasSideEffects = 1, isBarrier = 1 in {
+ hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
NoItinerary,
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, NoVFP]>;
}
-// FIXME: Non-Darwin version(s)
+// FIXME: Non-IOS version(s)
let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
Defs = [ R7, LR, SP ] in {
def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
NoItinerary,
[(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
- Requires<[IsARM, IsDarwin]>;
+ Requires<[IsARM, IsIOS]>;
}
-// eh.sjlj.dispatchsetup pseudo-instruction.
-// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
+// eh.sjlj.dispatchsetup pseudo-instructions.
+// These pseudos are used for both ARM and Thumb2. Any differences are
// handled when the pseudo is expanded (which happens before any passes
// that need the instruction size).
-let isBarrier = 1, hasSideEffects = 1 in
-def Int_eh_sjlj_dispatchsetup :
- PseudoInst<(outs), (ins GPR:$src), NoItinerary,
- [(ARMeh_sjlj_dispatchsetup GPR:$src)]>,
- Requires<[IsDarwin]>;
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ],
+ isBarrier = 1 in
+def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>;
+
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
+ isBarrier = 1 in
+def Int_eh_sjlj_dispatchsetup_nofp : PseudoInst<(outs), (ins), NoItinerary, []>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -4725,30 +4807,15 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// TODO: add,sub,and, 3-instr forms?
-// Tail calls
-def : ARMPat<(ARMtcret tcGPR:$dst),
- (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
- (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
- (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret tcGPR:$dst),
- (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
- (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
- (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+// Tail calls. These patterns also apply to Thumb mode.
+def : Pat<(ARMtcret tcGPR:$dst), (TCRETURNri tcGPR:$dst)>;
+def : Pat<(ARMtcret (i32 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>;
+def : Pat<(ARMtcret (i32 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>;
// Direct calls
-def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
- Requires<[IsARM, IsNotDarwin]>;
-def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
- Requires<[IsARM, IsDarwin]>;
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>;
+def : ARMPat<(ARMcall_nolink texternalsym:$func),
+ (BMOVPCB_CALL texternalsym:$func)>;
// zextload i1 -> zextload i8
def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
@@ -4992,13 +5059,113 @@ def : MnemonicAlias<"uqsubaddx", "uqsax">;
// USAX == USUBADDX
def : MnemonicAlias<"usubaddx", "usax">;
-// LDRSBT/LDRHT/LDRSHT post-index offset if optional.
-// Note that the write-back output register is a dummy operand for MC (it's
-// only meaningful for codegen), so we just pass zero here.
-// FIXME: tblgen not cooperating with argument conversions.
-//def : InstAlias<"ldrsbt${p} $Rt, $addr",
-// (LDRSBTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0,pred:$p)>;
-//def : InstAlias<"ldrht${p} $Rt, $addr",
-// (LDRHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>;
-//def : InstAlias<"ldrsht${p} $Rt, $addr",
-// (LDRSHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>;
+// "mov Rd, so_imm_not" can be handled via "mvn" in assembly, just like
+// for isel.
+def : ARMInstAlias<"mov${s}${p} $Rd, $imm",
+ (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
+ (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+// Same for AND <--> BIC
+def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+ (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
+ (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
+ (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
+ (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+
+// Likewise, "add Rd, so_imm_neg" -> sub
+def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"add${s}${p} $Rd, $imm",
+ (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via so_imm_neg
+def : ARMInstAlias<"cmp${p} $Rd, $imm",
+ (CMNzri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+def : ARMInstAlias<"cmn${p} $Rd, $imm",
+ (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+
+// The shifter forms of the MOV instruction are aliased to the ASR, LSL,
+// LSR, ROR, and RRX instructions.
+// FIXME: We need C++ parser hooks to map the alias to the MOV
+// encoding. It seems we should be able to do that sort of thing
+// in tblgen, but it could get ugly.
+def ASRi : ARMAsmPseudo<"asr${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p,
+ cc_out:$s)>;
+def LSRi : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p,
+ cc_out:$s)>;
+def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p,
+ cc_out:$s)>;
+def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm",
+ (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p,
+ cc_out:$s)>;
+def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def LSRr : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def LSLr : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def RORr : ARMAsmPseudo<"ror${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+// shifter instructions also support a two-operand form.
+def : ARMInstAlias<"asr${s}${p} $Rm, $imm",
+ (ASRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"lsr${s}${p} $Rm, $imm",
+ (LSRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"lsl${s}${p} $Rm, $imm",
+ (LSLi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"ror${s}${p} $Rm, $imm",
+ (RORi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"asr${s}${p} $Rn, $Rm",
+ (ASRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def : ARMInstAlias<"lsr${s}${p} $Rn, $Rm",
+ (LSRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def : ARMInstAlias<"lsl${s}${p} $Rn, $Rm",
+ (LSLr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def : ARMInstAlias<"ror${s}${p} $Rn, $Rm",
+ (RORr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+
+
+// 'mul' instruction can be specified with only two operands.
+def : ARMInstAlias<"mul${s}${p} $Rn, $Rm",
+ (MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p, cc_out:$s)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
+ (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>;
+
+// Pre-v6, 'mov r0, r0' was used as a NOP encoding.
+def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>,
+ Requires<[IsARM, NoV6]>;
+
+// UMULL/SMULL are available on all arches, but the instruction definitions
+// need difference constraints pre-v6. Use these aliases for the assembly
+// parsing on pre-v6.
+def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+ (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+ (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+
+// 'it' blocks in ARM mode just validate the predicates. The IT itself
+// is discarded.
+def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 7aad18695bb3..c7219a60f6c3 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1,4 +1,4 @@
-//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===//
+//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,6 +15,45 @@
//===----------------------------------------------------------------------===//
// NEON-specific Operands.
//===----------------------------------------------------------------------===//
+def nModImm : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+}
+
+def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
+def nImmSplatI8 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI8AsmOperand;
+}
+def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
+def nImmSplatI16 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI16AsmOperand;
+}
+def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
+def nImmSplatI32 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI32AsmOperand;
+}
+def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
+def nImmVMOVI32 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMOVI32AsmOperand;
+}
+def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
+def nImmVMOVI32Neg : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMOVI32NegAsmOperand;
+}
+def nImmVMOVF32 : Operand<i32> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
+}
+def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
+def nImmSplatI64 : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmSplatI64AsmOperand;
+}
+
def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
@@ -40,6 +79,326 @@ def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
let MIOperandInfo = (ops i32imm);
}
+// Register list of one D register.
+def VecListOneDAsmOperand : AsmOperandClass {
+ let Name = "VecListOneD";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
+ let ParserMatchClass = VecListOneDAsmOperand;
+}
+// Register list of two sequential D registers.
+def VecListDPairAsmOperand : AsmOperandClass {
+ let Name = "VecListDPair";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
+ let ParserMatchClass = VecListDPairAsmOperand;
+}
+// Register list of three sequential D registers.
+def VecListThreeDAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeD";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
+ let ParserMatchClass = VecListThreeDAsmOperand;
+}
+// Register list of four sequential D registers.
+def VecListFourDAsmOperand : AsmOperandClass {
+ let Name = "VecListFourD";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
+ let ParserMatchClass = VecListFourDAsmOperand;
+}
+// Register list of two D registers spaced by 2 (two sequential Q registers).
+def VecListDPairSpacedAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairSpaced";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
+ let ParserMatchClass = VecListDPairSpacedAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three Q registers).
+def VecListThreeQAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQ";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
+ let ParserMatchClass = VecListThreeQAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three Q registers).
+def VecListFourQAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQ";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
+ let ParserMatchClass = VecListFourQAsmOperand;
+}
+
+// Register list of one D register, with "all lanes" subscripting.
+def VecListOneDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
+ let ParserMatchClass = VecListOneDAllLanesAsmOperand;
+}
+// Register list of two D registers, with "all lanes" subscripting.
+def VecListDPairAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPairAllLanes : RegisterOperand<DPair,
+ "printVectorListTwoAllLanes"> {
+ let ParserMatchClass = VecListDPairAllLanesAsmOperand;
+}
+// Register list of two D registers spaced by 2 (two sequential Q registers).
+def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairSpacedAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListDPairSpacedAllLanes : RegisterOperand<DPair,
+ "printVectorListTwoSpacedAllLanes"> {
+ let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
+}
+// Register list of three D registers, with "all lanes" subscripting.
+def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeDAllLanes : RegisterOperand<DPR,
+ "printVectorListThreeAllLanes"> {
+ let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three sequential Q regs).
+def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListThreeQAllLanes : RegisterOperand<DPR,
+ "printVectorListThreeSpacedAllLanes"> {
+ let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
+}
+// Register list of four D registers, with "all lanes" subscripting.
+def VecListFourDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
+ let ParserMatchClass = VecListFourDAllLanesAsmOperand;
+}
+// Register list of four D registers spaced by 2 (four sequential Q regs).
+def VecListFourQAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListFourQAllLanes : RegisterOperand<DPR,
+ "printVectorListFourSpacedAllLanes"> {
+ let ParserMatchClass = VecListFourQAllLanesAsmOperand;
+}
+
+
+// Register list of one D register, with byte lane subscripting.
+def VecListOneDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListOneDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+// Register list of two D registers with byte lane subscripting.
+def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of two Q registers with half-word lane subscripting.
+def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoQHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoQHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoQWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoQWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+
+// Register list of three D registers with byte lane subscripting.
+def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of three Q registers with half-word lane subscripting.
+def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeQHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListThreeQWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeQWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+// Register list of four D registers with byte lane subscripting.
+def VecListFourDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListFourDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of four Q registers with half-word lane subscripting.
+def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourQHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListFourQWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListFourQWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourQWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListFourQWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+
//===----------------------------------------------------------------------===//
// NEON-specific DAG Nodes.
//===----------------------------------------------------------------------===//
@@ -103,6 +462,7 @@ def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
@@ -164,30 +524,22 @@ def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
}]>;
//===----------------------------------------------------------------------===//
-// NEON operand definitions
-//===----------------------------------------------------------------------===//
-
-def nModImm : Operand<i32> {
- let PrintMethod = "printNEONModImmOperand";
-}
-
-//===----------------------------------------------------------------------===//
// NEON load / store instructions
//===----------------------------------------------------------------------===//
// Use VLDM to load a Q register as a D register pair.
// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
def VLDMQIA
- : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
+ : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
IIC_fpLoad_m, "",
- [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
+ [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>;
// Use VSTM to store a Q register as a D register pair.
// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
def VSTMQIA
- : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
+ : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
IIC_fpStore_m, "",
- [(store (v2f64 QPR:$src), GPR:$Rn)]>;
+ [(store (v2f64 DPair:$src), GPR:$Rn)]>;
// Classes for VLD* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
@@ -197,12 +549,31 @@ class VLDQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
+class VLDQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr), itin,
+ "$addr.addr = $wb">;
+class VLDQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset), itin,
+ "$addr.addr = $wb">;
+
class VLDQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
class VLDQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
+class VLDQQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr), itin,
+ "$addr.addr = $wb">;
+class VLDQQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset), itin,
+ "$addr.addr = $wb">;
+
+
class VLDQQQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
"$src = $dst">;
@@ -215,17 +586,17 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd),
+ : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
(ins addrmode6:$Rn), IIC_VLD1,
- "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDInstruction";
}
class VLD1Q<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2),
+ : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
(ins addrmode6:$Rn), IIC_VLD1x2,
- "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
@@ -241,59 +612,82 @@ def VLD1q16 : VLD1Q<{0,1,?,?}, "16">;
def VLD1q32 : VLD1Q<{1,0,?,?}, "32">;
def VLD1q64 : VLD1Q<{1,1,?,?}, "64">;
-def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>;
-def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>;
-def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>;
-def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>;
-
// ...with address register writeback:
-class VLD1DWB<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u,
- "vld1", Dt, "\\{$Vd\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD1DWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), IIC_VLD1u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-class VLD1QWB<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u,
- "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD1QWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">;
-def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">;
-def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">;
-def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">;
-
-def VLD1q8_UPD : VLD1QWB<{0,0,?,?}, "8">;
-def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">;
-def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">;
-def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">;
-
-def VLD1q8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
-def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
-def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
-def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">;
+defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">;
+defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">;
+defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">;
+defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">;
+defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">;
+defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">;
+defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">;
-// ...with 3 registers (some of these are only for the disassembler):
+// ...with 3 registers
class VLD1D3<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
(ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
- "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDInstruction";
}
-class VLD1D3WB<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt,
- "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
@@ -301,31 +695,40 @@ def VLD1d16T : VLD1D3<{0,1,0,?}, "16">;
def VLD1d32T : VLD1D3<{1,0,0,?}, "32">;
def VLD1d64T : VLD1D3<{1,1,0,?}, "64">;
-def VLD1d8T_UPD : VLD1D3WB<{0,0,0,?}, "8">;
-def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">;
-def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">;
-def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">;
+defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">;
+defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">;
+defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;
+defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;
-def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
-def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>;
+def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
-// ...with 4 registers (some of these are only for the disassembler):
+// ...with 4 registers
class VLD1D4<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
(ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
- "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
}
-class VLD1D4WB<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0010,op7_4,
- (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt,
- "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb",
- []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
@@ -333,91 +736,80 @@ def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">;
def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">;
def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">;
-def VLD1d8Q_UPD : VLD1D4WB<{0,0,?,?}, "8">;
-def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">;
-def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">;
-def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">;
+defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">;
+defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">;
+defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">;
+defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">;
-def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
-def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>;
+def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
// VLD2 : Vector Load (multiple 2-element structures)
-class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
- (ins addrmode6:$Rn), IIC_VLD2,
- "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
- let Rm = 0b1111;
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
-}
-class VLD2Q<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b10, 0b0011, op7_4,
- (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$Rn), IIC_VLD2x2,
- "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ InstrItinClass itin>
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
+ (ins addrmode6:$Rn), itin,
+ "vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
}
-def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">;
-def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16">;
-def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32">;
+def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>;
+def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>;
+def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>;
-def VLD2q8 : VLD2Q<{0,0,?,?}, "8">;
-def VLD2q16 : VLD2Q<{0,1,?,?}, "16">;
-def VLD2q32 : VLD2Q<{1,0,?,?}, "32">;
-
-def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>;
-def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
-def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>;
+def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
+def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
+def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
// ...with address register writeback:
-class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
- "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
-}
-class VLD2QWB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b10, 0b0011, op7_4,
- (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
- "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy, InstrItinClass itin> {
+ def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), itin,
+ "vld2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), itin,
+ "vld2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">;
-def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">;
-def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">;
+defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>;
+defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>;
+defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>;
-def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8">;
-def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">;
-def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">;
+defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
-def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
-def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-
-// ...with double-spaced registers (for disassembly only):
-def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8">;
-def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16">;
-def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32">;
-def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8">;
-def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">;
-def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">;
+// ...with double-spaced registers
+def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>;
+def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>;
+def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>;
+defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>;
+defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>;
+defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -601,12 +993,11 @@ def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
}
def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
let Inst{7-6} = lane{1-0};
- let Inst{4} = Rn{4};
+ let Inst{5-4} = Rn{5-4};
}
def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
let Inst{7} = lane{0};
- let Inst{5} = Rn{4};
- let Inst{4} = Rn{4};
+ let Inst{5-4} = Rn{5-4};
}
def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
@@ -776,7 +1167,7 @@ def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
- let Inst{7} = lane{0};
+ let Inst{7} = lane{0};
}
def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
@@ -787,7 +1178,7 @@ def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
- let Inst{7} = lane{0};
+ let Inst{7} = lane{0};
}
def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
@@ -802,7 +1193,7 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
let Rm = 0b1111;
- let Inst{4} = Rn{4};
+ let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4LN";
}
@@ -813,7 +1204,7 @@ def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
- let Inst{7} = lane{0};
+ let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
@@ -826,7 +1217,7 @@ def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
- let Inst{7} = lane{0};
+ let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
@@ -854,7 +1245,7 @@ def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
- let Inst{7} = lane{0};
+ let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
@@ -866,7 +1257,7 @@ def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
}
def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
- let Inst{7} = lane{0};
+ let Inst{7} = lane{0};
let Inst{5} = Rn{5};
}
@@ -877,117 +1268,142 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
// VLD1DUP : Vector Load (single element to all lanes)
class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
- IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
- [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
+ (ins addrmode6dup:$Rn),
+ IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
+ [(set VecListOneDAllLanes:$Vd,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
-class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
- let Pattern = [(set QPR:$dst,
- (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
-}
-
def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
-def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>;
-def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
-def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
-
def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPd32 addrmode6:$addr)>;
-def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
- (VLD1DUPq32Pseudo addrmode6:$addr)>;
-
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-class VLD1QDUP<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
+class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
(ins addrmode6dup:$Rn), IIC_VLD1dup,
- "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "",
+ [(set VecListDPairAllLanes:$Vd,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
-def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">;
-def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
-def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
+def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>;
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPq32 addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
-class VLD1DUPWB<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
- "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLD1DupInstruction";
+multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-class VLD1QDUPWB<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
- (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
- "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLD1DupInstruction";
+multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListDPairAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListDPairAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">;
-def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
-def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
-
-def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">;
-def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
-def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
+defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">;
+defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">;
+defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">;
-def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
+defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
+defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
-class VLD2DUP<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2),
+class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
+ : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
(ins addrmode6dup:$Rn), IIC_VLD2dup,
- "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ "vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
}
-def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">;
-def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">;
-def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">;
-
-def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>;
-def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
-def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>;
-// ...with double-spaced registers (not used for codegen):
-def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8">;
-def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">;
-def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">;
+// ...with double-spaced registers
+def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
// ...with address register writeback:
-class VLD2DUPWB<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
- (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu,
- "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLD2DupInstruction";
+multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
+ def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
+ (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD2dupu,
+ "vld2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1101, op7_4,
+ (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu,
+ "vld2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD2DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">;
-def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">;
-def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">;
-
-def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">;
-def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">;
-def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">;
+defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>;
+defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>;
+defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>;
-def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
-def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
-def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>;
+defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
+defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
class VLD3DUP<bits<4> op7_4, string Dt>
@@ -1008,9 +1424,9 @@ def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
// ...with double-spaced registers (not used for codegen):
-def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">;
-def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">;
-def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">;
+def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
+def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
+def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
// ...with address register writeback:
class VLD3DUPWB<bits<4> op7_4, string Dt>
@@ -1026,9 +1442,9 @@ def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
-def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
-def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
-def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
+def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
+def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
+def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
@@ -1054,9 +1470,9 @@ def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
// ...with double-spaced registers (not used for codegen):
-def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">;
-def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">;
-def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
+def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
+def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
// ...with address register writeback:
class VLD4DUPWB<bits<4> op7_4, string Dt>
@@ -1073,9 +1489,9 @@ def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
-def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
-def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
-def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
+def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
+def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
@@ -1093,12 +1509,29 @@ class VSTQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
"$addr.addr = $wb">;
+class VSTQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, QPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
+ "$addr.addr = $wb">;
class VSTQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
class VSTQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
"$addr.addr = $wb">;
+class VSTQQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, QQPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
+ "$addr.addr = $wb">;
+
class VSTQQQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
class VSTQQQQWBPseudo<InstrItinClass itin>
@@ -1108,16 +1541,15 @@ class VSTQQQQWBPseudo<InstrItinClass itin>
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt>
- : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd),
- IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+ : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd),
+ IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
}
class VST1Q<bits<4> op7_4, string Dt>
- : NLdSt<0,0b00,0b1010,op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2,
- "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+ : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd),
+ IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
@@ -1133,185 +1565,233 @@ def VST1q16 : VST1Q<{0,1,?,?}, "16">;
def VST1q32 : VST1Q<{1,0,?,?}, "32">;
def VST1q64 : VST1Q<{1,1,?,?}, "64">;
-def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>;
-def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>;
-def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>;
-def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>;
-
// ...with address register writeback:
-class VST1DWB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u,
- "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1DWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd),
+ IIC_VLD1u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-class VST1QWB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
- IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1QWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd),
+ IIC_VLD1x2u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">;
-def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">;
-def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">;
-def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">;
-
-def VST1q8_UPD : VST1QWB<{0,0,?,?}, "8">;
-def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">;
-def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">;
-def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">;
+defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">;
+defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">;
+defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">;
+defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">;
-def VST1q8Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
-def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
-def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
-def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">;
+defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">;
+defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">;
+defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">;
-// ...with 3 registers (some of these are only for the disassembler):
+// ...with 3 registers
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
- IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ (ins addrmode6:$Rn, VecListThreeD:$Vd),
+ IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
}
-class VST1D3WB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3),
- IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1D3WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
+ IIC_VLD1x3u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST1d8T : VST1D3<{0,0,0,?}, "8">;
-def VST1d16T : VST1D3<{0,1,0,?}, "16">;
-def VST1d32T : VST1D3<{1,0,0,?}, "32">;
-def VST1d64T : VST1D3<{1,1,0,?}, "64">;
+def VST1d8T : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T : VST1D3<{1,1,0,?}, "64">;
-def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">;
-def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
-def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
-def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
+defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">;
+defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">;
+defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
+defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
-def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
-def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
-// ...with 4 registers (some of these are only for the disassembler):
+// ...with 4 registers
class VST1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
+ (ins addrmode6:$Rn, VecListFourD:$Vd),
+ IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
[]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
-class VST1D4WB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
- "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1D4WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1x4u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
-def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
-def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
-def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
+def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
-def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">;
-def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
-def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
-def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
+defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">;
+defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">;
+defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
+defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
-def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
-def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
// VST2 : Vector Store (multiple 2-element structures)
-class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
- IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
- let Rm = 0b1111;
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
-}
-class VST2Q<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
- "", []> {
+class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ InstrItinClass itin>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd),
+ itin, "vst2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
-def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">;
-def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">;
-def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">;
+def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>;
+def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>;
+def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>;
-def VST2q8 : VST2Q<{0,0,?,?}, "8">;
-def VST2q16 : VST2Q<{0,1,?,?}, "16">;
-def VST2q32 : VST2Q<{1,0,?,?}, "32">;
-
-def VST2d8Pseudo : VSTQPseudo<IIC_VST2>;
-def VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
-def VST2d32Pseudo : VSTQPseudo<IIC_VST2>;
+def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>;
+def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
+def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
// ...with address register writeback:
-class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
- IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy> {
+ def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-class VST2QWB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
- "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST2QWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">;
-def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
-def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
+defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>;
+defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>;
+defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>;
-def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">;
-def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
-def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
+defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
+defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
+defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
-def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
-def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-
-// ...with double-spaced registers (for disassembly only):
-def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">;
-def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">;
-def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">;
-def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">;
-def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
-def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
+// ...with double-spaced registers
+def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>;
+def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>;
+def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>;
+defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>;
+defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>;
+defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>;
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1458,20 +1938,11 @@ class VSTQQQQLNWBPseudo<InstrItinClass itin>
// VST1LN : Vector Store (single element from one lane)
class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
- PatFrag StoreOp, SDNode ExtractOp>
- : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
- IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
- [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
- let Rm = 0b1111;
- let DecoderMethod = "DecodeVST1LN";
-}
-class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
- PatFrag StoreOp, SDNode ExtractOp>
+ PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane),
+ (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
- [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{
+ [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVST1LN";
}
@@ -1482,16 +1953,17 @@ class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
}
def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
- NEONvgetlaneu> {
+ NEONvgetlaneu, addrmode6> {
let Inst{7-5} = lane{2-0};
}
def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
- NEONvgetlaneu> {
+ NEONvgetlaneu, addrmode6> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{5};
}
-def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
+def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
+ addrmode6oneL32> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
}
@@ -1507,14 +1979,14 @@ def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
// ...with address register writeback:
class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
- PatFrag StoreOp, SDNode ExtractOp>
+ PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
+ (ins AdrMode:$Rn, am6offset:$Rm,
DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
"\\{$Vd[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb",
[(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
- addrmode6:$Rn, am6offset:$Rm))]> {
+ AdrMode:$Rn, am6offset:$Rm))]> {
let DecoderMethod = "DecodeVST1LN";
}
class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
@@ -1524,16 +1996,16 @@ class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
}
def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
- NEONvgetlaneu> {
+ NEONvgetlaneu, addrmode6> {
let Inst{7-5} = lane{2-0};
}
def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
- NEONvgetlaneu> {
+ NEONvgetlaneu, addrmode6> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{5};
}
def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
- extractelt> {
+ extractelt, addrmode6oneL32> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
}
@@ -1585,10 +2057,10 @@ def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
// ...with address register writeback:
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
- "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
- "$addr.addr = $wb", []> {
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
+ "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST2LN";
}
@@ -1914,8 +2386,8 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode ShOp>
: N3VLane32<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
@@ -1924,8 +2396,8 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
- NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
@@ -1954,8 +2426,8 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3VLane32<1, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
@@ -1965,8 +2437,8 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3VLane16<1, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
- NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_8:$Vm),
@@ -1987,8 +2459,8 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
: N3VLane32<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (Ty DPR:$Vd),
(Ty (IntOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
@@ -1998,8 +2470,8 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (Ty DPR:$Vd),
(Ty (IntOp (Ty DPR:$Vn),
(Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
@@ -2028,8 +2500,8 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3VLane32<1, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
@@ -2040,8 +2512,8 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3VLane16<1, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$Vn),
(ResTy (NEONvduplane (OpTy DPR_8:$Vm),
@@ -2073,9 +2545,9 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
: N3VLane32<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd),
- (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$src1),
(Ty (MulOp DPR:$Vn,
@@ -2086,9 +2558,9 @@ class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType Ty, SDNode MulOp, SDNode ShOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd),
- (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$src1),
(Ty (MulOp DPR:$Vn,
@@ -2108,9 +2580,9 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
SDPatternOperator MulOp, SDPatternOperator ShOp>
: N3VLane32<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
- (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (MulOp QPR:$Vn,
@@ -2122,9 +2594,9 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
SDNode MulOp, SDNode ShOp>
: N3VLane16<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
- (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (MulOp QPR:$Vn,
@@ -2182,9 +2654,9 @@ class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
- (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set QPR:$Vd,
(OpNode (TyQ QPR:$src1),
(TyQ (MulOp (TyD DPR:$Vn),
@@ -2194,9 +2666,9 @@ class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
- (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set QPR:$Vd,
(OpNode (TyQ QPR:$src1),
(TyQ (MulOp (TyD DPR:$Vn),
@@ -2230,9 +2702,9 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
- (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$src1),
(OpTy DPR:$Vn),
@@ -2243,9 +2715,9 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
- (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$src1),
(OpTy DPR:$Vn),
@@ -2277,8 +2749,8 @@ class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set QPR:$Vd,
(TyQ (OpNode (TyD DPR:$Vn),
(TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
@@ -2286,8 +2758,8 @@ class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set QPR:$Vd,
(TyQ (OpNode (TyD DPR:$Vn),
(TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
@@ -2332,8 +2804,8 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (OpTy DPR:$Vn),
(OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
@@ -2342,8 +2814,8 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (OpTy DPR:$Vn),
(OpTy (NEONvduplane (OpTy DPR_8:$Vm),
@@ -2417,9 +2889,9 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// Long shift by immediate.
class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
- (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
+ (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
(i32 imm:$SIMM))))]>;
@@ -2649,14 +3121,11 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
v4i32, v4i32, OpNode, Commutable>;
}
-multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
- def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
- v4i16, ShOp>;
- def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
- v2i32, ShOp>;
- def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
- v8i16, v4i16, ShOp>;
- def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
+ def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
+ def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
+ def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
+ def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
v4i32, v2i32, ShOp>;
}
@@ -3165,7 +3634,7 @@ multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
}
multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- SDNode OpNode> {
+ string baseOpc, SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
@@ -3199,6 +3668,33 @@ multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
// imm6 = xxxxxx
+
+ // Aliases for two-operand forms (source and dest regs the same).
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8"))
+ DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16"))
+ DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v2i32"))
+ DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v1i64"))
+ DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8"))
+ QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16"))
+ QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32"))
+ QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+ def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "v2i64"))
+ QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
}
// Neon Shift-Accumulate vector operations,
@@ -3321,15 +3817,15 @@ multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
}
@@ -3418,7 +3914,7 @@ def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
v2f32, v2f32, fmul, 1>;
def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
v4f32, v4f32, fmul, 1>;
-defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
+defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
v2f32, fmul>;
@@ -3509,10 +4005,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
v2f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
v4f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -3567,10 +4063,10 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
v2f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
v4f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -3619,6 +4115,37 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
"vqdmlsl", "s", int_arm_neon_vqdmlsl>;
defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
+def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
+ v4f32, fmul_su, fadd_mlx>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+// Fused Vector Multiply Subtract (floating-point)
+def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
+ v4f32, fmul_su, fsub_mlx>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)),
+ (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)),
+ (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)),
+ (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)),
+ (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasVFP4]>;
+
// Vector Subtract Operations.
// VSUB : Vector Subtract (integer and floating-point)
@@ -3741,7 +4268,7 @@ def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
v4i32, v4i32, or, 1>;
def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
- (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
IIC_VMOVImm,
"vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
@@ -3750,7 +4277,7 @@ def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
}
def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
- (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
IIC_VMOVImm,
"vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
@@ -3759,7 +4286,7 @@ def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
}
def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
- (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
IIC_VMOVImm,
"vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
@@ -3768,7 +4295,7 @@ def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
}
def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
- (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
IIC_VMOVImm,
"vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
@@ -3790,7 +4317,7 @@ def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
(vnotq QPR:$Vm))))]>;
def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
- (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
IIC_VMOVImm,
"vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
@@ -3799,7 +4326,7 @@ def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
}
def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
- (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
IIC_VMOVImm,
"vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
@@ -3808,7 +4335,7 @@ def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
}
def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
- (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
IIC_VMOVImm,
"vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
@@ -3817,7 +4344,7 @@ def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
}
def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
- (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
IIC_VMOVImm,
"vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
@@ -3842,28 +4369,28 @@ def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
let isReMaterializable = 1 in {
def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmvn", "i16", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmvn", "i16", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmvn", "i32", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
}
def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmvn", "i32", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
@@ -3912,12 +4439,12 @@ def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
"vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
- [/* For disassembly only; pattern left blank */]>;
+ []>;
def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VBINiQ,
"vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
- [/* For disassembly only; pattern left blank */]>;
+ []>;
// VBIT : Vector Bitwise Insert if True
// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
@@ -3926,12 +4453,12 @@ def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
"vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
- [/* For disassembly only; pattern left blank */]>;
+ []>;
def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VBINiQ,
"vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
- [/* For disassembly only; pattern left blank */]>;
+ []>;
// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
// for equivalent operations with different register constraints; it just
@@ -4119,8 +4646,10 @@ defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
// VSHR : Vector Shift Right (Immediate)
-defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>;
-defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>;
+defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
+ NEONvshrs>;
+defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
+ NEONvshru>;
// VSHLL : Vector Shift Left Long
defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
@@ -4129,18 +4658,18 @@ defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
// VSHLL : Vector Shift Left Long (with maximum shift count)
class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
- ValueType OpTy, SDNode OpNode>
+ ValueType OpTy, Operand ImmTy, SDNode OpNode>
: N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
- ResTy, OpTy, OpNode> {
+ ResTy, OpTy, ImmTy, OpNode> {
let Inst{21-16} = op21_16;
let DecoderMethod = "DecodeVSHLMaxInstruction";
}
def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
- v8i16, v8i8, NEONvshlli>;
+ v8i16, v8i8, imm8, NEONvshlli>;
def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
- v4i32, v4i16, NEONvshlli>;
+ v4i32, v4i16, imm16, NEONvshlli>;
def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
- v2i64, v2i32, NEONvshlli>;
+ v2i64, v2i32, imm32, NEONvshlli>;
// VSHRN : Vector Shift Right and Narrow
defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
@@ -4154,8 +4683,10 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
"vrshl", "u", int_arm_neon_vrshiftu>;
// VRSHR : Vector Rounding Shift Right
-defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>;
-defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>;
+defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
+ NEONvrshrs>;
+defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
+ NEONvrshru>;
// VRSHRN : Vector Rounding Shift Right and Narrow
defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
@@ -4298,13 +4829,15 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
IIC_VCNTiQ, "vcnt", "8",
v16i8, v16i8, int_arm_neon_vcnt>;
-// Vector Swap -- for disassembly only.
+// Vector Swap
def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
- (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
- "vswp", "$Vd, $Vm", "", []>;
+ (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
+ NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
+ []>;
def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
- (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
- "vswp", "$Vd, $Vm", "", []>;
+ (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
+ NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
+ []>;
// Vector Move Operations.
@@ -4318,89 +4851,98 @@ def : InstAlias<"vmov${p} $Vd, $Vm",
let isReMaterializable = 1 in {
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
}
def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
}
def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$Vd, $SIMM", "",
[(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
- (ins nModImm:$SIMM), IIC_VMOVImm,
+ (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+
+def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
+ "vmov", "f32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
+def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
+ "vmov", "f32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
} // isReMaterializable
// VMOV : Vector Get Lane (move scalar to ARM core register)
def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
- (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]",
+ (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
+ IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
[(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{2};
let Inst{6-5} = lane{1-0};
}
def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
- (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]",
+ (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
+ IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
[(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{1};
let Inst{6} = lane{0};
}
def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
- (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]",
+ (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
+ IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
[(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{2};
let Inst{6-5} = lane{1-0};
}
def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
- (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]",
+ (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
+ IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
[(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{1};
let Inst{6} = lane{0};
}
def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
- (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]",
+ (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
+ IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
[(set GPR:$R, (extractelt (v2i32 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{0};
@@ -4442,24 +4984,24 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
let Constraints = "$src1 = $V" in {
def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
- (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
- IIC_VMOVISL, "vmov", "8", "$V[$lane], $R",
+ (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
+ IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
[(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
GPR:$R, imm:$lane))]> {
let Inst{21} = lane{2};
let Inst{6-5} = lane{1-0};
}
def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
- (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
- IIC_VMOVISL, "vmov", "16", "$V[$lane], $R",
+ (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
+ IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
[(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
GPR:$R, imm:$lane))]> {
let Inst{21} = lane{1};
let Inst{6} = lane{0};
}
def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
- (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
- IIC_VMOVISL, "vmov", "32", "$V[$lane], $R",
+ (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
+ IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
[(set DPR:$V, (insertelt (v2i32 DPR:$src1),
GPR:$R, imm:$lane))]> {
let Inst{21} = lane{0};
@@ -4627,6 +5169,9 @@ defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
// VMOVL : Vector Lengthening Move
defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
+def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
+def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
+def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
// Vector Conversions.
@@ -4650,6 +5195,7 @@ def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
v4f32, v4i32, uint_to_fp>;
// VCVT : Vector Convert Between Floating-Point and Fixed-Point.
+let DecoderMethod = "DecodeVCVTD" in {
def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
@@ -4658,7 +5204,9 @@ def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+}
+let DecoderMethod = "DecodeVCVTQ" in {
def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
@@ -4667,6 +5215,7 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+}
// VCVT : Vector Convert Between Half-Precision and Single-Precision.
def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
@@ -4759,34 +5308,34 @@ def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
// VEXT : Vector Extract
-class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
+ (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
- (Ty DPR:$Vm), imm:$index)))]> {
+ (Ty DPR:$Vm), imm:$index)))]> {
bits<4> index;
let Inst{11-8} = index{3-0};
}
-class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
- (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
+ (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
- (Ty QPR:$Vm), imm:$index)))]> {
+ (Ty QPR:$Vm), imm:$index)))]> {
bits<4> index;
let Inst{11-8} = index{3-0};
}
-def VEXTd8 : VEXTd<"vext", "8", v8i8> {
+def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
let Inst{11-8} = index{3-0};
}
-def VEXTd16 : VEXTd<"vext", "16", v4i16> {
+def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
let Inst{11-9} = index{2-0};
let Inst{8} = 0b0;
}
-def VEXTd32 : VEXTd<"vext", "32", v2i32> {
+def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
let Inst{11-10} = index{1-0};
let Inst{9-8} = 0b00;
}
@@ -4795,17 +5344,21 @@ def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
(i32 imm:$index))),
(VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
-def VEXTq8 : VEXTq<"vext", "8", v16i8> {
+def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
let Inst{11-8} = index{3-0};
}
-def VEXTq16 : VEXTq<"vext", "16", v8i16> {
+def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
let Inst{11-9} = index{2-0};
let Inst{8} = 0b0;
}
-def VEXTq32 : VEXTq<"vext", "32", v4i32> {
+def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
let Inst{11-10} = index{1-0};
let Inst{9-8} = 0b00;
}
+def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
+ let Inst{11} = index{0};
+ let Inst{10-8} = 0b000;
+}
def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
(v4f32 QPR:$Vm),
(i32 imm:$index))),
@@ -4825,7 +5378,9 @@ def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
-def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
+// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
+ (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
@@ -4835,7 +5390,9 @@ def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
-def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
+// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
+ (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
@@ -4847,27 +5404,25 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
let DecoderMethod = "DecodeTBLInstruction" in {
def VTBL1
: N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
- "vtbl", "8", "$Vd, \\{$Vn\\}, $Vm", "",
- [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 DPR:$Vn, DPR:$Vm)))]>;
+ (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
+ (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
def VTBL3
: N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
+ (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
def VTBL4
: N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
+ (ins VecListFourD:$Vn, DPR:$Vm),
NVTBLFrm, IIC_VTB4,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
} // hasExtraSrcRegAllocReq = 1
-def VTBL2Pseudo
- : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>;
def VTBL3Pseudo
: PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
def VTBL4Pseudo
@@ -4876,31 +5431,28 @@ def VTBL4Pseudo
// VTBX : Vector Table Extension
def VTBX1
: N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
- (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
- "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd",
+ (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
+ "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
[(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
- DPR:$orig, DPR:$Vn, DPR:$Vm)))]>;
+ DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
let hasExtraSrcRegAllocReq = 1 in {
def VTBX2
: N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
- (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
+ (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+ "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
def VTBX3
: N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
- (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
+ (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
NVTBLFrm, IIC_VTBX3,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
+ "vtbx", "8", "$Vd, $Vn, $Vm",
"$orig = $Vd", []>;
def VTBX4
- : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
- DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
+ : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+ "vtbx", "8", "$Vd, $Vn, $Vm",
"$orig = $Vd", []>;
} // hasExtraSrcRegAllocReq = 1
-def VTBX2Pseudo
- : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src),
- IIC_VTBX2, "$orig = $dst", []>;
def VTBX3Pseudo
: PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
IIC_VTBX3, "$orig = $dst", []>;
@@ -4950,9 +5502,13 @@ def : N3VSPat<fadd, VADDfd>;
def : N3VSPat<fsub, VSUBfd>;
def : N3VSPat<fmul, VMULfd>;
def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
+def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
+ Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
+def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
+ Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
def : N3VSPat<NEONfmax, VMAXfd>;
@@ -5028,3 +5584,1448 @@ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+
+// Vector lengthening move with load, matching extending loads.
+
+// extload, zextload and sextload for a standard lengthening load. Example:
+// Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr))
+// (VMOVLuv8i16 (VLDRD addrmode5:$addr))>;
+multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
+ (VLDRD addrmode5:$addr))>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
+ (VLDRD addrmode5:$addr))>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
+ (VLDRD addrmode5:$addr))>;
+}
+
+// extload, zextload and sextload for a lengthening load which only uses
+// half the lanes available. Example:
+// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
+// Pat<(v4i16 (extloadvi8 addrmode5:$addr))
+// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+// (VLDRS addrmode5:$addr),
+// ssub_0)),
+// dsub_0)>;
+multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
+ string InsnLanes, string InsnTy> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)>;
+}
+
+// extload, zextload and sextload for a lengthening load followed by another
+// lengthening load, to quadruple the initial length.
+// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> =
+// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
+// (EXTRACT_SUBREG (VMOVLuv4i32
+// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+// (VLDRS addrmode5:$addr),
+// ssub_0)),
+// dsub_0)),
+// qsub_0)>;
+multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty, SubRegIndex RegType> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
+ ssub_0)), dsub_0)),
+ RegType)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
+ ssub_0)), dsub_0)),
+ RegType)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
+ ssub_0)), dsub_0)),
+ RegType)>;
+}
+
+defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16
+defm : Lengthen_Single<"4", "i32", "i16">; // v4i16 -> v4i32
+defm : Lengthen_Single<"2", "i64", "i32">; // v2i32 -> v2i64
+
+defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
+defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16
+defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
+
+// Double lengthening - v4i8 -> v4i16 -> v4i32
+defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0>;
+// v2i8 -> v2i16 -> v2i32
+defm : Lengthen_Double<"2", "i32", "i8", "8", "i16", "4", "i32", dsub_0>;
+// v2i16 -> v2i32 -> v2i64
+defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64", qsub_0>;
+
+// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
+def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)),
+ (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ dsub_0)), dsub_0))>;
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
+ (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
+def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
+ (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
+
+
+// VADD two-operand aliases.
+def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
+ (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
+ (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
+ (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
+ (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
+ (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
+ (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
+ (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
+ (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
+ (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
+ (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSUB two-operand aliases.
+def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
+ (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
+ (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
+ (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
+ (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
+ (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
+ (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
+ (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
+ (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
+ (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
+ (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VADDW two-operand aliases.
+def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm",
+ (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm",
+ (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm",
+ (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm",
+ (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm",
+ (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm",
+ (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
+ (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
+ (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+ (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+ (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
+ (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
+ (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+// ... two-operand aliases
+def : NEONInstAlias<"vand${p} $Vdn, $Vm",
+ (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vand${p} $Vdn, $Vm",
+ (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
+ (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
+ (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"veor${p} $Vdn, $Vm",
+ (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"veor${p} $Vdn, $Vm",
+ (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
+ (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
+ (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+ (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+ (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+ (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+ (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+ (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+ (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VMUL two-operand aliases.
+def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm",
+ (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm",
+ (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm",
+ (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm",
+ (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm",
+ (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm",
+ (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm",
+ (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm",
+ (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm",
+ (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm",
+ (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane",
+ (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm,
+ VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane",
+ (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm,
+ VectorIndex16:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane",
+ (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane",
+ (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane",
+ (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane",
+ (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+
+// VQADD (register) two-operand aliases.
+def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
+ (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
+ (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
+ (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
+ (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
+ (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
+ (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
+ (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
+ (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
+ (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
+ (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
+ (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
+ (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
+ (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
+ (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
+ (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
+ (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSHL (immediate) two-operand aliases.
+def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
+ (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
+ (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
+ (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
+ (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
+ (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
+ (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
+ (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
+ (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>;
+
+// VSHL (register) two-operand aliases.
+def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
+ (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
+ (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
+ (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
+ (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
+ (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
+ (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
+ (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
+ (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
+ (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
+ (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
+ (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
+ (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
+ (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
+ (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
+ (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
+ (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSHL (immediate) two-operand aliases.
+def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
+ (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
+ (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
+ (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
+ (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
+ (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
+ (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
+ (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
+ (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
+ (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
+ (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
+ (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
+ (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
+ (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
+ (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
+ (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
+ (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+// VLD1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD1LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD1LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD1LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST1LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST1LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST1LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD2LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD2LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST2LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
+ (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST2LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD3 all-lanes pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD3DUPdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VLD3 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD3LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD3 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD3dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VST3 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST3LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST3 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VST3dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+ (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListThreeQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD4 all-lanes pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD4DUPdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VLD4 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD4LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+
+// VLD4 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD4dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VST4 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST4LNdWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNdWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNdWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNqWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4LNqWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST4 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VST4dWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4qWB_fixed_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qWB_fixed_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qWB_fixed_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+ (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4dWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4dWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4dWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourD:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_8 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_16 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_32 :
+ NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+ (ins VecListFourQ:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VMOV takes an optional datatype suffix
+defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
+ (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
+ (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
+ (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
+ (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
+ (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
+ (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
+ (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
+ (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
+ (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
+ (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
+ (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
+ (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
+ (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
+ (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
+ (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
+ (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
+ (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
+ (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
+ (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
+ (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
+ (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
+ (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
+ (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
+ (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
+ (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
+ (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
+ (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
+ (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// Two-operand variants for VEXT
+def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
+ (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
+ (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
+ (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
+ (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
+ (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
+ (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm",
+ (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>;
+
+// Two-operand variants for VQDMULH
+def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
+ (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
+ (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
+ (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
+ (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// Two-operand variants for VMAX.
+def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm",
+ (VMAXsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm",
+ (VMAXsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm",
+ (VMAXsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm",
+ (VMAXuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm",
+ (VMAXuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm",
+ (VMAXuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm",
+ (VMAXfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm",
+ (VMAXsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm",
+ (VMAXsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm",
+ (VMAXsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm",
+ (VMAXuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm",
+ (VMAXuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm",
+ (VMAXuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm",
+ (VMAXfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// Two-operand variants for VMIN.
+def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm",
+ (VMINsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm",
+ (VMINsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm",
+ (VMINsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm",
+ (VMINuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm",
+ (VMINuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm",
+ (VMINuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm",
+ (VMINfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm",
+ (VMINsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm",
+ (VMINsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm",
+ (VMINsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm",
+ (VMINuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm",
+ (VMINuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm",
+ (VMINuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm",
+ (VMINfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// Two-operand variants for VPADD.
+def : NEONInstAlias<"vpadd${p}.i8 $Vdn, $Vm",
+ (VPADDi8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vpadd${p}.i16 $Vdn, $Vm",
+ (VPADDi16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vpadd${p}.i32 $Vdn, $Vm",
+ (VPADDi32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vpadd${p}.f32 $Vdn, $Vm",
+ (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+// Two-operand variants for VSRA.
+ // Signed.
+def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm",
+ (VSRAsv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm",
+ (VSRAsv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm",
+ (VSRAsv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm",
+ (VSRAsv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm",
+ (VSRAsv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm",
+ (VSRAsv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm",
+ (VSRAsv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm",
+ (VSRAsv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+ // Unsigned.
+def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm",
+ (VSRAuv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm",
+ (VSRAuv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm",
+ (VSRAuv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm",
+ (VSRAuv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm",
+ (VSRAuv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm",
+ (VSRAuv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm",
+ (VSRAuv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm",
+ (VSRAuv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+// Two-operand variants for VSRI.
+def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm",
+ (VSRIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm",
+ (VSRIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm",
+ (VSRIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm",
+ (VSRIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm",
+ (VSRIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm",
+ (VSRIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm",
+ (VSRIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm",
+ (VSRIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+// Two-operand variants for VSLI.
+def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm",
+ (VSLIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm",
+ (VSLIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm",
+ (VSLIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm",
+ (VSLIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm",
+ (VSLIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm",
+ (VSLIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm",
+ (VSLIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm",
+ (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+// VSWP allows, but does not require, a type suffix.
+defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
+ (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
+ (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
+
+// VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
+defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
+ (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
+ (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
+ (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
+ (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+
+// "vmov Rd, #-imm" can be handled via "vmvn".
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
+ (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
+ (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
+ (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
+ (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+
+// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
+// these should restrict to just the Q register variants, but the register
+// classes are enough to match correctly regardless, so we keep it simple
+// and just use MnemonicAlias.
+def : NEONMnemonicAlias<"vbicq", "vbic">;
+def : NEONMnemonicAlias<"vandq", "vand">;
+def : NEONMnemonicAlias<"veorq", "veor">;
+def : NEONMnemonicAlias<"vorrq", "vorr">;
+
+def : NEONMnemonicAlias<"vmovq", "vmov">;
+def : NEONMnemonicAlias<"vmvnq", "vmvn">;
+// Explicit versions for floating point so that the FPImm variants get
+// handled early. The parser gets confused otherwise.
+def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
+def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
+
+def : NEONMnemonicAlias<"vaddq", "vadd">;
+def : NEONMnemonicAlias<"vsubq", "vsub">;
+
+def : NEONMnemonicAlias<"vminq", "vmin">;
+def : NEONMnemonicAlias<"vmaxq", "vmax">;
+
+def : NEONMnemonicAlias<"vmulq", "vmul">;
+
+def : NEONMnemonicAlias<"vabsq", "vabs">;
+
+def : NEONMnemonicAlias<"vshlq", "vshl">;
+def : NEONMnemonicAlias<"vshrq", "vshr">;
+
+def : NEONMnemonicAlias<"vcvtq", "vcvt">;
+
+def : NEONMnemonicAlias<"vcleq", "vcle">;
+def : NEONMnemonicAlias<"vceqq", "vceq">;
+
+def : NEONMnemonicAlias<"vzipq", "vzip">;
+def : NEONMnemonicAlias<"vswpq", "vswp">;
+
+def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
+def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
+
+
+// Alias for loading floating point immediates that aren't representable
+// using the vmov.f32 encoding but the bitpattern is representable using
+// the .i32 encoding.
+def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
+ (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
+ (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index cedb54799db0..6335229d3c2a 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1,4 +1,4 @@
-//===- ARMInstrThumb.td - Thumb support for ARM ------------*- tablegen -*-===//
+//===-- ARMInstrThumb.td - Thumb support for ARM -----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -91,6 +91,12 @@ def t_imm0_508s4 : Operand<i32> {
let ParserMatchClass = t_imm0_508s4_asmoperand;
let OperandType = "OPERAND_IMMEDIATE";
}
+// Alias use only, so no printer is necessary.
+def t_imm0_508s4_neg_asmoperand: AsmOperandClass { let Name = "Imm0_508s4Neg"; }
+def t_imm0_508s4_neg : Operand<i32> {
+ let ParserMatchClass = t_imm0_508s4_neg_asmoperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
// Define Thumb specific addressing modes.
@@ -345,6 +351,11 @@ def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
let DecoderMethod = "DecodeThumbAddSPImm";
}
+def : tInstAlias<"add${p} sp, $imm",
+ (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
+def : tInstAlias<"add${p} sp, sp, $imm",
+ (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
+
// Can optionally specify SP as a three operand instruction.
def : tInstAlias<"add${p} sp, sp, $imm",
(tADDspi SP, t_imm0_508s4:$imm, pred:$p)>;
@@ -387,6 +398,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
bits<4> Rm;
let Inst{6-3} = Rm;
let Inst{2-0} = 0b000;
+ let Unpredictable{2-0} = 0b111;
}
}
@@ -404,15 +416,13 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
// prevent stack-pointer assignments that appear immediately before calls from
// potentially appearing dead.
let isCall = 1,
- // On non-Darwin platforms R9 is callee-saved.
- Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
- Uses = [SP] in {
+ Defs = [LR], Uses = [SP] in {
// Also used for Thumb2
def tBL : TIx2<0b11110, 0b11, 1,
(outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br,
"bl${p}\t$func",
[(ARMtcall tglobaladdr:$func)]>,
- Requires<[IsThumb, IsNotDarwin]> {
+ Requires<[IsThumb]> {
bits<22> func;
let Inst{26} = func{21};
let Inst{25-16} = func{20-11};
@@ -426,7 +436,7 @@ let isCall = 1,
(outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br,
"blx${p}\t$func",
[(ARMcall tglobaladdr:$func)]>,
- Requires<[IsThumb, HasV5T, IsNotDarwin]> {
+ Requires<[IsThumb, HasV5T]> {
bits<21> func;
let Inst{25-16} = func{20-11};
let Inst{13} = 1;
@@ -439,7 +449,7 @@ let isCall = 1,
def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
"blx${p}\t$func",
[(ARMtcall GPR:$func)]>,
- Requires<[IsThumb, HasV5T, IsNotDarwin]>,
+ Requires<[IsThumb, HasV5T]>,
T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24;
bits<4> func;
let Inst{6-3} = func;
@@ -450,38 +460,7 @@ let isCall = 1,
def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
4, IIC_Br,
[(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>;
-}
-
-let isCall = 1,
- // On Darwin R9 is call-clobbered.
- // R7 is marked as a use to prevent frame-pointer assignments from being
- // moved above / below calls.
- Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
- Uses = [R7, SP] in {
- // Also used for Thumb2
- def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops),
- 4, IIC_Br, [(ARMtcall tglobaladdr:$func)],
- (tBL pred:$p, t_bltarget:$func)>,
- Requires<[IsThumb, IsDarwin]>;
-
- // ARMv5T and above, also used for Thumb2
- def tBLXi_r9 : tPseudoExpand<(outs), (ins pred:$p, t_blxtarget:$func, variable_ops),
- 4, IIC_Br, [(ARMcall tglobaladdr:$func)],
- (tBLXi pred:$p, t_blxtarget:$func)>,
- Requires<[IsThumb, HasV5T, IsDarwin]>;
-
- // Also used for Thumb2
- def tBLXr_r9 : tPseudoExpand<(outs), (ins pred:$p, GPR:$func, variable_ops),
- 2, IIC_Br, [(ARMtcall GPR:$func)],
- (tBLXr pred:$p, GPR:$func)>,
- Requires<[IsThumb, HasV5T, IsDarwin]>;
-
- // ARMv4T
- def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
- 4, IIC_Br,
- [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsThumb, IsThumb1Only, IsDarwin]>;
+ Requires<[IsThumb, IsThumb1Only]>;
}
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
@@ -523,28 +502,22 @@ let isBranch = 1, isTerminator = 1 in
// Tail calls
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
- // Darwin versions.
- let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
- Uses = [SP] in {
- // tTAILJMPd: Darwin version uses a Thumb2 branch (no Thumb1 tail calls
- // on Darwin), so it's in ARMInstrThumb2.td.
+ // IOS versions.
+ let Uses = [SP] in {
def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
4, IIC_Br, [],
(tBX GPR:$dst, (ops 14, zero_reg))>,
- Requires<[IsThumb, IsDarwin]>;
+ Requires<[IsThumb]>;
}
- // Non-Darwin versions (the difference is R9).
- let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
- Uses = [SP] in {
+ // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls
+ // on IOS), so it's in ARMInstrThumb2.td.
+ // Non-IOS version:
+ let Uses = [SP] in {
def tTAILJMPdND : tPseudoExpand<(outs),
(ins t_brtarget:$dst, pred:$p, variable_ops),
4, IIC_Br, [],
(tB t_brtarget:$dst, pred:$p)>,
- Requires<[IsThumb, IsNotDarwin]>;
- def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
- 4, IIC_Br, [],
- (tBX GPR:$dst, (ops 14, zero_reg))>,
- Requires<[IsThumb, IsNotDarwin]>;
+ Requires<[IsThumb, IsNotIOS]>;
}
}
@@ -652,7 +625,7 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
}
// Load tconstpool
-// FIXME: Use ldr.n to work around a Darwin assembler bug.
+// FIXME: Use ldr.n to work around a darwin assembler bug.
let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in
def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
"ldr", ".n\t$Rt, $addr",
@@ -666,10 +639,9 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
}
// FIXME: Remove this entry when the above ldr.n workaround is fixed.
-// For disassembly use only.
-def tLDRpciDIS : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
- "ldr", "\t$Rt, $addr",
- [/* disassembly only */]>,
+// For assembly/disassembly use only.
+def tLDRpciASM : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr", []>,
T1Encoding<{0,1,0,0,1,?}> {
// A6.2 & A8.6.59
bits<3> Rt;
@@ -1131,9 +1103,6 @@ def tRSB : // A8.6.141
"rsb", "\t$Rd, $Rn, #0",
[(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
-def : tInstAlias<"neg${s}${p} $Rd, $Rm",
- (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
-
// Subtract with carry register
let Uses = [CPSR] in
def tSBC : // A8.6.151
@@ -1259,19 +1228,24 @@ def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
// preserve all of the callee-saved resgisters, which is exactly what we want.
// $val is a scratch register for our use.
let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ],
- hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in
def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
AddrModeNone, 0, NoItinerary, "","",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
-// FIXME: Non-Darwin version(s)
+// FIXME: Non-IOS version(s)
let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
Defs = [ R7, LR, SP ] in
def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
AddrModeNone, 0, IndexModeNone,
Pseudo, NoItinerary, "", "",
[(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
- Requires<[IsThumb, IsDarwin]>;
+ Requires<[IsThumb, IsIOS]>;
+
+let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ],
+ isBarrier = 1 in
+def tInt_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -1309,20 +1283,14 @@ def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// Direct calls
def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
- Requires<[IsThumb, IsNotDarwin]>;
-def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>,
- Requires<[IsThumb, IsDarwin]>;
+ Requires<[IsThumb]>;
def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
- Requires<[IsThumb, HasV5T, IsNotDarwin]>;
-def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>,
- Requires<[IsThumb, HasV5T, IsDarwin]>;
+ Requires<[IsThumb, HasV5T]>;
// Indirect calls to ARM routines
def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
- Requires<[IsThumb, HasV5T, IsNotDarwin]>;
-def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>,
- Requires<[IsThumb, HasV5T, IsDarwin]>;
+ Requires<[IsThumb, HasV5T]>;
// zextload i1 -> zextload i8
def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
@@ -1434,3 +1402,16 @@ def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
// nothing).
def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : tInstAlias<"neg${s}${p} $Rd, $Rm",
+ (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
+
+
+// Implied destination operand forms for shifts.
+def : tInstAlias<"lsl${s}${p} $Rdm, $imm",
+ (tLSLri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm0_31:$imm, pred:$p)>;
+def : tInstAlias<"lsr${s}${p} $Rdm, $imm",
+ (tLSRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>;
+def : tInstAlias<"asr${s}${p} $Rdm, $imm",
+ (tASRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 05dcc8993969..e6fb9d5f01eb 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1,4 +1,4 @@
-//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===//
+//===-- ARMInstrThumb2.td - Thumb2 support for ARM ---------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -65,7 +65,7 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
// t2_so_imm - Match a 32-bit immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
// immediate splatted into multiple bytes of the word.
-def t2_so_imm_asmoperand : AsmOperandClass { let Name = "T2SOImm"; }
+def t2_so_imm_asmoperand : ImmAsmOperand { let Name = "T2SOImm"; }
def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
return ARM_AM::getT2SOImmVal(Imm) != -1;
}]> {
@@ -76,26 +76,39 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
// t2_so_imm_not - Match an immediate that is a complement
// of a t2_so_imm.
-def t2_so_imm_not : Operand<i32>,
- PatLeaf<(imm), [{
+// Note: this pattern doesn't require an encoder method and such, as it's
+// only used on aliases (Pat<> and InstAlias<>). The actual encoding
+// is handled by the destination instructions, which use t2_so_imm.
+def t2_so_imm_not_asmoperand : AsmOperandClass { let Name = "T2SOImmNot"; }
+def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
-}], t2_so_imm_not_XFORM>;
+}], t2_so_imm_not_XFORM> {
+ let ParserMatchClass = t2_so_imm_not_asmoperand;
+}
// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
-def t2_so_imm_neg : Operand<i32>,
- PatLeaf<(imm), [{
- return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
-}], t2_so_imm_neg_XFORM>;
+def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
+def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
+ int64_t Value = -(int)N->getZExtValue();
+ return Value && ARM_AM::getT2SOImmVal(Value) != -1;
+}], t2_so_imm_neg_XFORM> {
+ let ParserMatchClass = t2_so_imm_neg_asmoperand;
+}
/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
-def imm0_4095 : Operand<i32>,
- ImmLeaf<i32, [{
+def imm0_4095_asmoperand: ImmAsmOperand { let Name = "Imm0_4095"; }
+def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 4096;
-}]>;
+}]> {
+ let ParserMatchClass = imm0_4095_asmoperand;
+}
-def imm0_4095_neg : PatLeaf<(i32 imm), [{
+def imm0_4095_neg_asmoperand: AsmOperandClass { let Name = "Imm0_4095Neg"; }
+def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{
return (uint32_t)(-N->getZExtValue()) < 4096;
-}], imm_neg_XFORM>;
+}], imm_neg_XFORM> {
+ let ParserMatchClass = imm0_4095_neg_asmoperand;
+}
def imm0_255_neg : PatLeaf<(i32 imm), [{
return (uint32_t)(-N->getZExtValue()) < 255;
@@ -129,6 +142,12 @@ def t2ldrlabel : Operand<i32> {
let PrintMethod = "printT2LdrLabelOperand";
}
+def t2ldr_pcrel_imm12_asmoperand : AsmOperandClass {let Name = "MemPCRelImm12";}
+def t2ldr_pcrel_imm12 : Operand<i32> {
+ let ParserMatchClass = t2ldr_pcrel_imm12_asmoperand;
+ // used for assembler pseudo instruction and maps to t2ldrlabel, so
+ // doesn't need encoder or print methods of its own.
+}
// ADR instruction labels.
def t2adrlabel : Operand<i32> {
@@ -545,6 +564,11 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
PatFrag opnode, string baseOpc, bit Commutable = 0> :
T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> {
+ // Assembler aliases w/ the ".w" suffix.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
+ t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
// Assembler aliases w/o the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
(!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
@@ -556,6 +580,10 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
cc_out:$s)>;
// and with the optional destination operand, too.
+ def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"),
+ (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+ t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
(!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
rGPR:$Rm, pred:$p,
@@ -608,25 +636,48 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
///
/// These opcodes will be converted to the real non-S opcodes by
/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
-let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
-multiclass T2I_bin_s_irs<bits<4> opcod, string opc,
- InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Commutable = 0> {
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis, PatFrag opnode,
+ bit Commutable = 0> {
// shifted imm
- def ri : T2sTwoRegImm<
- (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii,
- opc, ".w\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_imm:$imm))]>;
+ def ri : t2PseudoInst<(outs rGPR:$Rd),
+ (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p),
+ 4, iii,
+ [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+ t2_so_imm:$imm))]>;
// register
- def rr : T2sThreeReg<
- (outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir,
- opc, ".w\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, rGPR:$Rm))]>;
+ def rr : t2PseudoInst<(outs rGPR:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm, pred:$p),
+ 4, iir,
+ [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+ rGPR:$Rm))]> {
+ let isCommutable = Commutable;
+ }
// shifted register
- def rs : T2sTwoRegShiftedReg<
- (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
- opc, ".w\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]>;
+ def rs : t2PseudoInst<(outs rGPR:$Rd),
+ (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
+ 4, iis,
+ [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+ t2_so_reg:$ShiftedRm))]>;
+}
+}
+
+/// T2I_rbin_s_is - Same as T2I_bin_s_irs, except selection DAG
+/// operands are reversed.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass T2I_rbin_s_is<PatFrag opnode> {
+ // shifted imm
+ def ri : t2PseudoInst<(outs rGPR:$Rd),
+ (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p),
+ 4, IIC_iALUi,
+ [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm,
+ GPRnopc:$Rn))]>;
+ // shifted register
+ def rs : t2PseudoInst<(outs rGPR:$Rd),
+ (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
+ 4, IIC_iALUsi,
+ [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm,
+ GPRnopc:$Rn))]>;
}
}
@@ -735,26 +786,6 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
}
}
-/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
-/// version is not needed since this is only for codegen.
-///
-/// These opcodes will be converted to the real non-S opcodes by
-/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
-let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
-multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
- // shifted imm
- def ri : T2sTwoRegImm<
- (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
- opc, ".w\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm, rGPR:$Rn))]>;
- // shifted register
- def rs : T2sTwoRegShiftedReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
- IIC_iALUsi, opc, "\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]>;
-}
-}
-
/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
// rotate operation that produces a value.
multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
@@ -930,7 +961,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
let DecoderMethod = "DecodeT2LoadShift";
}
- // FIXME: Is the pci variant actually needed?
+ // pci variant is very similar to i12, but supports negative offsets
+ // from the PC.
def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii,
opc, ".w\t$Rt, $addr",
[(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
@@ -1315,14 +1347,16 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
// Store doubleword
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
// Indexed stores
+
+let mayStore = 1, neverHasSideEffects = 1 in {
def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
- (ins rGPR:$Rt, t2addrmode_imm8:$addr),
+ (ins GPRnopc:$Rt, t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"str", "\t$Rt, $addr!",
"$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
@@ -1343,15 +1377,16 @@ def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
"$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
}
+} // mayStore = 1, neverHasSideEffects = 1
def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
- (ins rGPR:$Rt, addr_offset_none:$Rn,
+ (ins GPRnopc:$Rt, addr_offset_none:$Rn,
t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
"str", "\t$Rt, $Rn$offset",
"$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPRnopc:$Rn_wb,
- (post_store rGPR:$Rt, addr_offset_none:$Rn,
+ (post_store GPRnopc:$Rt, addr_offset_none:$Rn,
t2am_imm8_offset:$offset))]>;
def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
@@ -1398,7 +1433,6 @@ def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
(pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
}
-
// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
// only.
// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
@@ -1455,7 +1489,7 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
"$addr.base = $wb", []>;
// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
-// data/instruction access. These are for disassembly only.
+// data/instruction access.
// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
// (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1).
multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
@@ -1513,6 +1547,10 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
let DecoderMethod = "DecodeT2LoadShift";
}
+ // FIXME: We should have a separate 'pci' variant here. As-is we represent
+ // it via the i12 variant, which it's related to, but that means we can
+ // represent negative immediates, which aren't legal for anything except
+ // the 'pci' case (Rn == 15).
}
defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>;
@@ -1689,6 +1727,8 @@ def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
let Inst{14-12} = 0b000;
let Inst{7-4} = 0b0000;
}
+def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+ pred:$p, zero_reg)>;
def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
pred:$p, CPSR)>;
def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
@@ -1837,11 +1877,9 @@ defm t2SUB : T2I_bin_ii12rs<0b101, "sub",
// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
// support for an optional CPSR definition that corresponds to the DAG
// node's second value. We can then eliminate the implicit def of CPSR.
-defm t2ADDS : T2I_bin_s_irs <0b1000, "add",
- IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+defm t2ADDS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi,
BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
-defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
- IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+defm t2SUBS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi,
BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
let hasPostISelHook = 1 in {
@@ -1857,8 +1895,7 @@ defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
// FIXME: Eliminate them if we can write def : Pat patterns which defines
// CPSR and the implicit def of CPSR is not needed.
-defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
- BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+defm t2RSBS : T2I_rbin_s_is <BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
// The assume-no-carry-in form uses the negation of the input since add/sub
@@ -2840,6 +2877,8 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
+
+let isCommutable = 1 in
def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
(ins rGPR:$false, rGPR:$Rm, pred:$p),
4, IIC_iCMOVr,
@@ -2883,7 +2922,7 @@ def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
let isMoveImm = 1 in
def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
- IIC_iCMOVi, "mvn", ".w\t$Rd, $imm",
+ IIC_iCMOVi, "mvn", "\t$Rd, $imm",
[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd"> {
@@ -2922,6 +2961,35 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
RegConstraint<"$false = $Rd">;
} // isCodeGenOnly = 1
+
+multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
+ // shifted imm
+ def ri : t2PseudoExpand<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s),
+ 4, iii, [],
+ (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ // register
+ def rr : t2PseudoExpand<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s),
+ 4, iir, [],
+ (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ // shifted register
+ def rs : t2PseudoExpand<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s),
+ 4, iis, [],
+ (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+} // T2I_bincc_irs
+
+defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
+defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
+defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
} // neverHasSideEffects
//===----------------------------------------------------------------------===//
@@ -3043,9 +3111,7 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
let Inst{11-8} = Rd;
let Inst{7-0} = addr{7-0};
}
-}
-
-let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
+let hasExtraSrcRegAllocReq = 1 in
def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
@@ -3054,6 +3120,7 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
+}
def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>,
Requires<[IsThumb2, HasV7]> {
@@ -3081,8 +3148,9 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>,
// $val is a scratch register for our use.
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
- QQQQ0, QQQQ1, QQQQ2, QQQQ3 ],
- hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+ Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
AddrModeNone, 0, NoItinerary, "", "",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
@@ -3091,7 +3159,8 @@ let Defs =
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
- hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
AddrModeNone, 0, NoItinerary, "", "",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
@@ -3128,6 +3197,7 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
let Inst{13} = target{17};
let Inst{21-16} = target{16-11};
let Inst{10-0} = target{10-0};
+ let DecoderMethod = "DecodeT2BInstruction";
}
let isNotDuplicable = 1, isIndirectBranch = 1 in {
@@ -3195,19 +3265,32 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
let DecoderMethod = "DecodeThumb2BCCInstruction";
}
-// Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so
+// Tail calls. The IOS version of thumb tail calls uses a t2 branch, so
// it goes here.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
- // Darwin version.
- let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
- Uses = [SP] in
+ // IOS version.
+ let Uses = [SP] in
def tTAILJMPd: tPseudoExpand<(outs),
(ins uncondbrtarget:$dst, pred:$p, variable_ops),
4, IIC_Br, [],
(t2B uncondbrtarget:$dst, pred:$p)>,
- Requires<[IsThumb2, IsDarwin]>;
+ Requires<[IsThumb2, IsIOS]>;
}
+let isCall = 1, Defs = [LR], Uses = [SP] in {
+ // mov lr, pc; b if callee is marked noreturn to avoid confusing the
+ // return stack predictor.
+ def t2BMOVPCB_CALL : tPseudoInst<(outs),
+ (ins t_bltarget:$func, variable_ops),
+ 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
+ Requires<[IsThumb]>;
+}
+
+// Direct calls
+def : T2Pat<(ARMcall_nolink texternalsym:$func),
+ (t2BMOVPCB_CALL texternalsym:$func)>,
+ Requires<[IsThumb]>;
+
// IT block
let Defs = [ITSTATE] in
def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
@@ -3430,7 +3513,7 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
imm:$cp))]>,
Requires<[IsThumb2]>;
-// Pseudo isntruction that combines movs + predicated rsbmi
+// Pseudo isntruction that combines movs + predicated rsbmi
// to implement integer ABS
let usesCustomInserter = 1, Defs = [CPSR] in {
def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
@@ -3667,20 +3750,32 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : t2InstAlias<"mcr $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
(outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : t2InstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
/* from coprocessor to ARM core register */
def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
(outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
(outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
@@ -3851,6 +3946,29 @@ def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm",
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm",
(t2ADDrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
pred:$p, cc_out:$s)>;
+// ... and with the destination and source register combined.
+def : t2InstAlias<"add${s}${p} $Rdn, $imm",
+ (t2ADDri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rdn, $imm",
+ (t2ADDri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $Rm",
+ (t2ADDrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm",
+ (t2ADDrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
+
+// add w/ negative immediates is just a sub.
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
+ (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $imm",
+ (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
+ cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rdn, $imm",
+ (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+
// Aliases for SUB without the ".w" optional width specifier.
def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm",
@@ -3862,6 +3980,18 @@ def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm",
def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm",
(t2SUBrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
pred:$p, cc_out:$s)>;
+// ... and with the destination and source register combined.
+def : t2InstAlias<"sub${s}${p} $Rdn, $imm",
+ (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${p} $Rdn, $imm",
+ (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"sub${s}${p}.w $Rdn, $Rm",
+ (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rdn, $Rm",
+ (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm",
+ (t2SUBrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
// Alias for compares without the ".w" optional width specifier.
def : t2InstAlias<"cmn${p} $Rn, $Rm",
@@ -3900,7 +4030,20 @@ def : t2InstAlias<"ldrsb${p} $Rt, $addr",
def : t2InstAlias<"ldrsh${p} $Rt, $addr",
(t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
-// Alias for MVN without the ".w" optional width specifier.
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+ (t2LDRpci GPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+ (t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+ (t2LDRHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+ (t2LDRSBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+ (t2LDRSHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+
+// Alias for MVN with(out) the ".w" optional width specifier.
+def : t2InstAlias<"mvn${s}${p}.w $Rd, $imm",
+ (t2MVNi rGPR:$Rd, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
def : t2InstAlias<"mvn${s}${p} $Rd, $Rm",
(t2MVNr rGPR:$Rd, rGPR:$Rm, pred:$p, cc_out:$s)>;
def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm",
@@ -3921,6 +4064,30 @@ def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+// STMIA/STMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"stm${p} $Rn, $regs",
+ (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"stm${p} $Rn!, $regs",
+ (t2STMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// LDMIA/LDMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"ldm${p} $Rn, $regs",
+ (t2LDMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"ldm${p} $Rn!, $regs",
+ (t2LDMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// STMDB/STMDB_UPD aliases w/ the optional .w suffix
+def : t2InstAlias<"stmdb${p}.w $Rn, $regs",
+ (t2STMDB GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"stmdb${p}.w $Rn!, $regs",
+ (t2STMDB_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// LDMDB/LDMDB_UPD aliases w/ the optional .w suffix
+def : t2InstAlias<"ldmdb${p}.w $Rn, $regs",
+ (t2LDMDB GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"ldmdb${p}.w $Rn!, $regs",
+ (t2LDMDB_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
// Alias for REV/REV16/REVSH without the ".w" optional width specifier.
def : t2InstAlias<"rev${p} $Rd, $Rm", (t2REV rGPR:$Rd, rGPR:$Rm, pred:$p)>;
def : t2InstAlias<"rev16${p} $Rd, $Rm", (t2REV16 rGPR:$Rd, rGPR:$Rm, pred:$p)>;
@@ -4016,3 +4183,87 @@ def : t2InstAlias<"sxtb16${p} $Rd, $Rm$rot",
(t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
(t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+
+
+// "mov Rd, t2_so_imm_not" can be handled via "mvn" in assembly, just like
+// for isel.
+def : t2InstAlias<"mov${p} $Rd, $imm",
+ (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+def : t2InstAlias<"mvn${p} $Rd, $imm",
+ (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+// Same for AND <--> BIC
+def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+ (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
+ (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
+ (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rdn, $imm",
+ (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+// Likewise, "add Rd, t2_so_imm_neg" -> sub
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rd, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via t2_so_imm_neg
+def : t2InstAlias<"cmp${p} $Rd, $imm",
+ (t2CMNzri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rd, $imm",
+ (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+
+
+// Wide 'mul' encoding can be specified with only two operands.
+def : t2InstAlias<"mul${p} $Rn, $Rm",
+ (t2MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : t2InstAlias<"neg${s}${p} $Rd, $Rm",
+ (t2RSBri rGPR:$Rd, rGPR:$Rm, 0, pred:$p, cc_out:$s)>;
+
+// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for
+// these, unfortunately.
+def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift",
+ (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift",
+ (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+
+def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift",
+ (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift",
+ (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+
+// ADR w/o the .w suffix
+def : t2InstAlias<"adr${p} $Rd, $addr",
+ (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>;
+
+// LDR(literal) w/ alternate [pc, #imm] syntax.
+def t2LDRpcrel : t2AsmPseudo<"ldr${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRBpcrel : t2AsmPseudo<"ldrb${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRHpcrel : t2AsmPseudo<"ldrh${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRSBpcrel : t2AsmPseudo<"ldrsb${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRSHpcrel : t2AsmPseudo<"ldrsh${p} $Rt, $addr",
+ (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+ // Version w/ the .w suffix.
+def : t2InstAlias<"ldr${p}.w $Rt, $addr",
+ (t2LDRpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p}.w $Rt, $addr",
+ (t2LDRBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p}.w $Rt, $addr",
+ (t2LDRHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p}.w $Rt, $addr",
+ (t2LDRSBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p}.w $Rt, $addr",
+ (t2LDRSHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"add${p} $Rd, pc, $imm",
+ (t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e746cf20d032..3600b889d6f1 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1,4 +1,4 @@
-//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===//
+//===-- ARMInstrVFP.td - VFP support for ARM ---------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -61,6 +61,22 @@ def vfp_f64imm : Operand<f64>,
let ParserMatchClass = FPImmOperand;
}
+// The VCVT to/from fixed-point instructions encode the 'fbits' operand
+// (the number of fixed bits) differently than it appears in the assembly
+// source. It's encoded as "Size - fbits" where Size is the size of the
+// fixed-point representation (32 or 16) and fbits is the value appearing
+// in the assembly source, an integer in [0,16] or (0,32], depending on size.
+def fbits32_asm_operand : AsmOperandClass { let Name = "FBits32"; }
+def fbits32 : Operand<i32> {
+ let PrintMethod = "printFBits32";
+ let ParserMatchClass = fbits32_asm_operand;
+}
+
+def fbits16_asm_operand : AsmOperandClass { let Name = "FBits16"; }
+def fbits16 : Operand<i32> {
+ let PrintMethod = "printFBits16";
+ let ParserMatchClass = fbits16_asm_operand;
+}
//===----------------------------------------------------------------------===//
// Load / store Instructions.
@@ -69,11 +85,11 @@ def vfp_f64imm : Operand<f64>,
let canFoldAsLoad = 1, isReMaterializable = 1 in {
def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
- IIC_fpLoad64, "vldr", ".64\t$Dd, $addr",
+ IIC_fpLoad64, "vldr", "\t$Dd, $addr",
[(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
- IIC_fpLoad32, "vldr", ".32\t$Sd, $addr",
+ IIC_fpLoad32, "vldr", "\t$Sd, $addr",
[(set SPR:$Sd, (load addrmode5:$addr))]> {
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
@@ -83,11 +99,11 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
- IIC_fpStore64, "vstr", ".64\t$Dd, $addr",
+ IIC_fpStore64, "vstr", "\t$Dd, $addr",
[(store (f64 DPR:$Dd), addrmode5:$addr)]>;
def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
- IIC_fpStore32, "vstr", ".32\t$Sd, $addr",
+ IIC_fpStore32, "vstr", "\t$Sd, $addr",
[(store SPR:$Sd, addrmode5:$addr)]> {
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
@@ -190,6 +206,14 @@ def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>,
Requires<[HasVFP2]>;
def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>,
Requires<[HasVFP2]>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+ (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+ (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+ (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+ (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>;
// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
@@ -270,7 +294,7 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b),
(VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
// These are encoded as unary instructions.
-let Defs = [FPSCR] in {
+let Defs = [FPSCR_NZCV] in {
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
@@ -299,7 +323,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-} // Defs = [FPSCR]
+} // Defs = [FPSCR_NZCV]
//===----------------------------------------------------------------------===//
// FP Unary Operations.
@@ -319,7 +343,7 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
let D = VFPNeonA8Domain;
}
-let Defs = [FPSCR] in {
+let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
@@ -360,7 +384,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-} // Defs = [FPSCR]
+} // Defs = [FPSCR_NZCV]
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
@@ -790,127 +814,131 @@ def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
// S32 (U=0, sx=1) -> SL
// U32 (U=1, sx=1) -> UL
-// FIXME: Marking these as codegen only seems wrong. They are real
-// instructions(?)
-let Constraints = "$a = $dst", isCodeGenOnly = 1 in {
+let Constraints = "$a = $dst" in {
// FP to Fixed-Point:
-def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+// Single Precision register
+class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+ bits<5> dst;
+ // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+ let Inst{22} = dst{0};
+ let Inst{15-12} = dst{4-1};
+}
+
+// Double Precision register
+class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+ bits<5> dst;
+ // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+ let Inst{22} = dst{4};
+ let Inst{15-12} = dst{3-0};
+}
+
+def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>;
-def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>;
-def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>;
-def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>;
// Fixed-Point to FP:
-def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
- (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]> {
+def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>;
-def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>;
-def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>;
-def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
- (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>;
-} // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in'
+} // End of 'let Constraints = "$a = $dst" in'
//===----------------------------------------------------------------------===//
// FP Multiply-Accumulate Operations.
@@ -922,7 +950,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -930,7 +958,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -938,10 +966,10 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
+ Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
def VMLSD : ADbI<0b11100, 0b00, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -949,7 +977,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -957,7 +985,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -965,10 +993,10 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -976,7 +1004,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -984,7 +1012,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -992,10 +1020,10 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1003,14 +1031,14 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1018,11 +1046,172 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx]>;
+ Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
(VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+
+//===----------------------------------------------------------------------===//
+// Fused FP Multiply-Accumulate Operations.
+//
+def VFMAD : ADbI<0b11101, 0b10, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)),
+ (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)),
+ (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
+def VFMSD : ADbI<0b11101, 0b10, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fma (fneg x), y, z) -> (vfms x, y, z)
+def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)),
+ (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)),
+ (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fneg (fma x, (fneg y), z) -> (vfms x, y, z)
+def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))),
+ (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))),
+ (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
+def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+ (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+ (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fneg (fma x, y, z)) -> (vfnma x, y, z)
+def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))),
+ (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))),
+ (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z)
+def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))),
+ (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))),
+ (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
+def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+ (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+ (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z)
+def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fma x, (fneg y), z) -> (vnfms x, y, z)
+def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
//===----------------------------------------------------------------------===//
// FP Conditional moves.
@@ -1063,9 +1252,9 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
// to APSR.
-let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in
+let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in
def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
- "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>;
+ "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
// Application level FPSCR -> GPR
let hasSideEffects = 1, Uses = [FPSCR] in
@@ -1079,6 +1268,10 @@ let Uses = [FPSCR] in {
"vmrs", "\t$Rt, fpexc", []>;
def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
"vmrs", "\t$Rt, fpsid", []>;
+ def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr0", []>;
+ def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr1", []>;
}
//===----------------------------------------------------------------------===//
@@ -1160,6 +1353,115 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
//===----------------------------------------------------------------------===//
// Assembler aliases.
//
+// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to
+// support them all, but supporting at least some of the basics is
+// good to be friendly.
+def : VFP2MnemonicAlias<"flds", "vldr">;
+def : VFP2MnemonicAlias<"fldd", "vldr">;
+def : VFP2MnemonicAlias<"fmrs", "vmov">;
+def : VFP2MnemonicAlias<"fmsr", "vmov">;
+def : VFP2MnemonicAlias<"fsqrts", "vsqrt">;
+def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">;
+def : VFP2MnemonicAlias<"fadds", "vadd.f32">;
+def : VFP2MnemonicAlias<"faddd", "vadd.f64">;
+def : VFP2MnemonicAlias<"fmrdd", "vmov">;
+def : VFP2MnemonicAlias<"fmrds", "vmov">;
+def : VFP2MnemonicAlias<"fmrrd", "vmov">;
+def : VFP2MnemonicAlias<"fmdrr", "vmov">;
+def : VFP2MnemonicAlias<"fmuls", "vmul.f32">;
+def : VFP2MnemonicAlias<"fmuld", "vmul.f64">;
+def : VFP2MnemonicAlias<"fnegs", "vneg.f32">;
+def : VFP2MnemonicAlias<"fnegd", "vneg.f64">;
+def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">;
+def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">;
+def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">;
+def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">;
+def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">;
+def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">;
+def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">;
+def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">;
+def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">;
+def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">;
+def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">;
+def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">;
+def : VFP2MnemonicAlias<"fsts", "vstr">;
+def : VFP2MnemonicAlias<"fstd", "vstr">;
+def : VFP2MnemonicAlias<"fmacd", "vmla.f64">;
+def : VFP2MnemonicAlias<"fmacs", "vmla.f32">;
+def : VFP2MnemonicAlias<"fcpys", "vmov.f32">;
+def : VFP2MnemonicAlias<"fcpyd", "vmov.f64">;
+def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">;
+def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">;
+def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">;
+def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">;
+def : VFP2MnemonicAlias<"fmrx", "vmrs">;
+def : VFP2MnemonicAlias<"fmxr", "vmsr">;
+
+// Be friendly and accept the old form of zero-compare
+def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
+def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
-def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
+ (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm",
+ (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm",
+ (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm",
+ (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+
+// No need for the size suffix on VSQRT. It's implied by the register classes.
+def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+// VLDR/VSTR accept an optional type suffix.
+def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr",
+ (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr",
+ (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr",
+ (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr",
+ (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+
+// VMUL has a two-operand form (implied destination operand)
+def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm",
+ (VMULD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"vmul${p}.f32 $Sn, $Sm",
+ (VMULS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
+// VADD has a two-operand form (implied destination operand)
+def : VFP2InstAlias<"vadd${p}.f64 $Dn, $Dm",
+ (VADDD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"vadd${p}.f32 $Sn, $Sm",
+ (VADDS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
+// VSUB has a two-operand form (implied destination operand)
+def : VFP2InstAlias<"vsub${p}.f64 $Dn, $Dm",
+ (VSUBD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"vsub${p}.f32 $Sn, $Sm",
+ (VSUBS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
+
+// VMOV can accept optional 32-bit or less data type suffix suffix.
+def : VFP2InstAlias<"vmov${p}.8 $Rt, $Sn",
+ (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.16 $Rt, $Sn",
+ (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.32 $Rt, $Sn",
+ (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.8 $Sn, $Rt",
+ (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.16 $Sn, $Rt",
+ (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.32 $Sn, $Rt",
+ (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+
+def : VFP2InstAlias<"vmov${p}.f64 $Rt, $Rt2, $Dn",
+ (VMOVRRD GPR:$Rt, GPR:$Rt2, DPR:$Dn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.f64 $Dn, $Rt, $Rt2",
+ (VMOVDRR DPR:$Dn, GPR:$Rt, GPR:$Rt2, pred:$p)>;
+
+// VMOVS doesn't need the .f32 to disambiguate from the NEON encoding the way
+// VMOVD does.
+def : VFP2InstAlias<"vmov${p} $Sd, $Sm",
+ (VMOVS SPR:$Sd, SPR:$Sm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 45b7e48d0cfb..98930ccdb194 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -13,7 +13,7 @@
#define DEBUG_TYPE "jit"
#include "ARMJITInfo.h"
-#include "ARMInstrInfo.h"
+#include "ARM.h"
#include "ARMConstantPoolValue.h"
#include "ARMRelocations.h"
#include "ARMSubtarget.h"
@@ -61,7 +61,7 @@ extern "C" {
// concerned, so we can't just preserve the callee saved regs.
"stmdb sp!, {r0, r1, r2, r3, lr}\n"
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
- "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+ "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
#endif
// The LR contains the address of the stub function on entry.
// pass it as the argument to the C part of the callback
@@ -85,7 +85,7 @@ extern "C" {
//
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
// Restore VFP caller-saved registers.
- "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+ "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
#endif
//
// We need to exchange the values in slots 0 and 1 so we can
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index 2f9792813d32..792818442724 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -1,4 +1,4 @@
-//===- ARMJITInfo.h - ARM implementation of the JIT interface --*- C++ -*-===//
+//===-- ARMJITInfo.h - ARM implementation of the JIT interface -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index faa8ba76845e..9ef2ace29cff 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1,4 +1,4 @@
-//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,8 +15,8 @@
#define DEBUG_TYPE "arm-ldst-opt"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
-#include "ARMRegisterInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -32,6 +32,8 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -62,6 +64,7 @@ namespace {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
RegScavenger *RS;
bool isThumb2;
@@ -90,7 +93,9 @@ namespace {
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, bool BaseKill, int Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
- DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
+ DebugLoc dl,
+ ArrayRef<std::pair<unsigned, bool> > Regs,
+ ArrayRef<unsigned> ImpDefs);
void MergeOpsUpdate(MachineBasicBlock &MBB,
MemOpQueue &MemOps,
unsigned memOpsBegin,
@@ -141,7 +146,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::db: return ARM::LDMDB;
case ARM_AM::ib: return ARM::LDMIB;
}
- break;
case ARM::STRi12:
++NumSTMGened;
switch (Mode) {
@@ -151,7 +155,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::db: return ARM::STMDB;
case ARM_AM::ib: return ARM::STMIB;
}
- break;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
++NumLDMGened;
@@ -160,7 +163,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ia: return ARM::t2LDMIA;
case ARM_AM::db: return ARM::t2LDMDB;
}
- break;
case ARM::t2STRi8:
case ARM::t2STRi12:
++NumSTMGened;
@@ -169,7 +171,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ia: return ARM::t2STMIA;
case ARM_AM::db: return ARM::t2STMDB;
}
- break;
case ARM::VLDRS:
++NumVLDMGened;
switch (Mode) {
@@ -177,7 +178,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ia: return ARM::VLDMSIA;
case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
}
- break;
case ARM::VSTRS:
++NumVSTMGened;
switch (Mode) {
@@ -185,7 +185,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ia: return ARM::VSTMSIA;
case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
}
- break;
case ARM::VLDRD:
++NumVLDMGened;
switch (Mode) {
@@ -193,7 +192,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ia: return ARM::VLDMDIA;
case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
}
- break;
case ARM::VSTRD:
++NumVSTMGened;
switch (Mode) {
@@ -201,10 +199,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ia: return ARM::VSTMDIA;
case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
}
- break;
}
-
- return 0;
}
namespace llvm {
@@ -259,8 +254,6 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
case ARM::STMIB_UPD:
return ARM_AM::ib;
}
-
- return ARM_AM::bad_am_submode;
}
} // end namespace ARM_AM
@@ -291,7 +284,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
int Offset, unsigned Base, bool BaseKill,
int Opcode, ARMCC::CondCodes Pred,
unsigned PredReg, unsigned Scratch, DebugLoc dl,
- SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
+ ArrayRef<std::pair<unsigned, bool> > Regs,
+ ArrayRef<unsigned> ImpDefs) {
// Only a single register to load / store. Don't bother.
unsigned NumRegs = Regs.size();
if (NumRegs <= 1)
@@ -359,6 +353,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
| getKillRegState(Regs[i].second));
+ // Add implicit defs for super-registers.
+ for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
+ MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
+
return true;
}
@@ -393,19 +391,29 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
}
SmallVector<std::pair<unsigned, bool>, 8> Regs;
+ SmallVector<unsigned, 8> ImpDefs;
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
unsigned Reg = memOps[i].Reg;
// If we are inserting the merged operation after an operation that
// uses the same register, make sure to transfer any kill flag.
bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
Regs.push_back(std::make_pair(Reg, isKill));
+
+ // Collect any implicit defs of super-registers. They must be preserved.
+ for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
+ continue;
+ unsigned DefReg = MO->getReg();
+ if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
+ ImpDefs.push_back(DefReg);
+ }
}
// Try to do the merge.
MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
++Loc;
if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
- Pred, PredReg, Scratch, dl, Regs))
+ Pred, PredReg, Scratch, dl, Regs, ImpDefs))
return;
// Merge succeeded, update records.
@@ -506,50 +514,84 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
return;
}
-static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
- unsigned Bytes, unsigned Limit,
- ARMCC::CondCodes Pred, unsigned PredReg){
+static bool definesCPSR(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
+ // If the instruction has live CPSR def, then it's not safe to fold it
+ // into load / store.
+ return true;
+ }
+
+ return false;
+}
+
+static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, unsigned Limit,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
unsigned MyPredReg = 0;
if (!MI)
return false;
- if (MI->getOpcode() != ARM::t2SUBri &&
- MI->getOpcode() != ARM::tSUBspi &&
- MI->getOpcode() != ARM::SUBri)
- return false;
+
+ bool CheckCPSRDef = false;
+ switch (MI->getOpcode()) {
+ default: return false;
+ case ARM::t2SUBri:
+ case ARM::SUBri:
+ CheckCPSRDef = true;
+ // fallthrough
+ case ARM::tSUBspi:
+ break;
+ }
// Make sure the offset fits in 8 bits.
if (Bytes == 0 || (Limit && Bytes >= Limit))
return false;
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
- return (MI->getOperand(0).getReg() == Base &&
- MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm()*Scale) == Bytes &&
- llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
- MyPredReg == PredReg);
+ if (!(MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg))
+ return false;
+
+ return CheckCPSRDef ? !definesCPSR(MI) : true;
}
-static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
- unsigned Bytes, unsigned Limit,
- ARMCC::CondCodes Pred, unsigned PredReg){
+static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, unsigned Limit,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
unsigned MyPredReg = 0;
if (!MI)
return false;
- if (MI->getOpcode() != ARM::t2ADDri &&
- MI->getOpcode() != ARM::tADDspi &&
- MI->getOpcode() != ARM::ADDri)
- return false;
+
+ bool CheckCPSRDef = false;
+ switch (MI->getOpcode()) {
+ default: return false;
+ case ARM::t2ADDri:
+ case ARM::ADDri:
+ CheckCPSRDef = true;
+ // fallthrough
+ case ARM::tADDspi:
+ break;
+ }
if (Bytes == 0 || (Limit && Bytes >= Limit))
// Make sure the offset fits in 8 bits.
return false;
unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
- return (MI->getOperand(0).getReg() == Base &&
- MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm()*Scale) == Bytes &&
- llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
- MyPredReg == PredReg);
+ if (!(MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg))
+ return false;
+
+ return CheckCPSRDef ? !definesCPSR(MI) : true;
}
static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
@@ -603,7 +645,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
case ARM_AM::da: return ARM::LDMDA_UPD;
case ARM_AM::db: return ARM::LDMDB_UPD;
}
- break;
case ARM::STMIA:
case ARM::STMDA:
case ARM::STMDB:
@@ -615,7 +656,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
case ARM_AM::da: return ARM::STMDA_UPD;
case ARM_AM::db: return ARM::STMDB_UPD;
}
- break;
case ARM::t2LDMIA:
case ARM::t2LDMDB:
switch (Mode) {
@@ -623,7 +663,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
case ARM_AM::ia: return ARM::t2LDMIA_UPD;
case ARM_AM::db: return ARM::t2LDMDB_UPD;
}
- break;
case ARM::t2STMIA:
case ARM::t2STMDB:
switch (Mode) {
@@ -631,38 +670,31 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
case ARM_AM::ia: return ARM::t2STMIA_UPD;
case ARM_AM::db: return ARM::t2STMDB_UPD;
}
- break;
case ARM::VLDMSIA:
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VLDMSIA_UPD;
case ARM_AM::db: return ARM::VLDMSDB_UPD;
}
- break;
case ARM::VLDMDIA:
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VLDMDIA_UPD;
case ARM_AM::db: return ARM::VLDMDDB_UPD;
}
- break;
case ARM::VSTMSIA:
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VSTMSIA_UPD;
case ARM_AM::db: return ARM::VSTMSDB_UPD;
}
- break;
case ARM::VSTMDIA:
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VSTMDIA_UPD;
case ARM_AM::db: return ARM::VSTMDDB_UPD;
}
- break;
}
-
- return 0;
}
/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
@@ -686,7 +718,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
bool BaseKill = MI->getOperand(0).isKill();
unsigned Bytes = getLSMultipleTransferSize(MI);
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
int Opcode = MI->getOpcode();
DebugLoc dl = MI->getDebugLoc();
@@ -783,7 +815,6 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
return ARM::t2STR_PRE;
default: llvm_unreachable("Unhandled opcode!");
}
- return 0;
}
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
@@ -809,7 +840,6 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
return ARM::t2STR_POST;
default: llvm_unreachable("Unhandled opcode!");
}
- return 0;
}
/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
@@ -841,7 +871,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
return false;
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
bool DoMerge = false;
ARM_AM::AddrOpc AddSub = ARM_AM::add;
unsigned NewOpc = 0;
@@ -1071,11 +1101,17 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
unsigned Opcode = MI->getOpcode();
if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
+ const MachineOperand &BaseOp = MI->getOperand(2);
+ unsigned BaseReg = BaseOp.getReg();
unsigned EvenReg = MI->getOperand(0).getReg();
unsigned OddReg = MI->getOperand(1).getReg();
unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
- if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
+ // ARM errata 602117: LDRD with base in list may result in incorrect base
+ // register when interrupted or faulted.
+ bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
+ if (!Errata602117 &&
+ ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
return false;
MachineBasicBlock::iterator NewBBI = MBBI;
@@ -1087,15 +1123,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
bool OddDeadKill = isLd ?
MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
bool OddUndef = MI->getOperand(1).isUndef();
- const MachineOperand &BaseOp = MI->getOperand(2);
- unsigned BaseReg = BaseOp.getReg();
bool BaseKill = BaseOp.isKill();
bool BaseUndef = BaseOp.isUndef();
bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
int OffImm = getMemoryOpOffset(MI);
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
if (OddRegNum > EvenRegNum && OffImm == 0) {
// Ascending register numbers and no offset. It's safe to change it to a
@@ -1126,6 +1160,11 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
unsigned NewOpc = (isLd)
? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
: (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+ // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
+ // so adjust and use t2LDRi12 here for that.
+ unsigned NewOpc2 = (isLd)
+ ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+ : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
DebugLoc dl = MBBI->getDebugLoc();
// If this is a load and base register is killed, it may have been
// re-defed by the load, make sure the first load does not clobber it.
@@ -1133,11 +1172,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
(BaseKill || OffKill) &&
(TRI->regsOverlap(EvenReg, BaseReg))) {
assert(!TRI->regsOverlap(OddReg, BaseReg));
- InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
OddReg, OddDeadKill, false,
BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
NewBBI = llvm::prior(MBBI);
+ if (isT2 && NewOpc == ARM::t2LDRi8 && OffImm+4 >= 0)
+ NewOpc = ARM::t2LDRi12;
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, false,
BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
@@ -1150,12 +1191,16 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
EvenDeadKill = false;
OddDeadKill = true;
}
+ // Never kill the base register in the first instruction.
+ // <rdar://problem/11101911>
+ if (EvenReg == BaseReg)
+ EvenDeadKill = false;
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, EvenUndef,
BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
NewBBI = llvm::prior(MBBI);
- InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
OddReg, OddDeadKill, OddUndef,
BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
Pred, PredReg, TII, isT2);
@@ -1206,7 +1251,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
bool isKill = MO.isDef() ? false : MO.isKill();
unsigned Base = MBBI->getOperand(1).getReg();
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
int Offset = getMemoryOpOffset(MBBI);
// Watch out for:
// r4 := ldr [r5]
@@ -1380,6 +1425,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
+ STI = &TM.getSubtarget<ARMSubtarget>();
RS = new RegScavenger();
isThumb2 = AFI->isThumb2Function();
@@ -1464,19 +1510,18 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
while (++I != E) {
if (I->isDebugValue() || MemOps.count(&*I))
continue;
- const MCInstrDesc &MCID = I->getDesc();
- if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
+ if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
return false;
- if (isLd && MCID.mayStore())
+ if (isLd && I->mayStore())
return false;
if (!isLd) {
- if (MCID.mayLoad())
+ if (I->mayLoad())
return false;
// It's not safe to move the first 'str' down.
// str r1, [r0]
// strh r5, [r0]
// str r4, [r0, #+4]
- if (MCID.mayStore())
+ if (I->mayStore())
return false;
}
for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
@@ -1498,6 +1543,23 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
return AddedRegPressure.size() <= MemRegs.size() * 2;
}
+
+/// Copy Op0 and Op1 operands into a new array assigned to MI.
+static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
+ MachineInstr *Op1) {
+ assert(MI->memoperands_empty() && "expected a new machineinstr");
+ size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
+ + (Op1->memoperands_end() - Op1->memoperands_begin());
+
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
+ MachineSDNode::mmo_iterator MemEnd =
+ std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
+ MemEnd =
+ std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
+ MI->setMemRefs(MemBegin, MemEnd);
+}
+
bool
ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
DebugLoc &dl,
@@ -1565,7 +1627,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
if (EvenReg == OddReg)
return false;
BaseReg = Op0->getOperand(1).getReg();
- Pred = llvm::getInstrPredicate(Op0, PredReg);
+ Pred = getInstrPredicate(Op0, PredReg);
dl = Op0->getDebugLoc();
return true;
}
@@ -1615,8 +1677,9 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
LastOp = Op;
}
- unsigned Opcode = Op->getOpcode();
- if (LastOpcode && Opcode != LastOpcode)
+ unsigned LSMOpcode
+ = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
+ if (LastOpcode && LSMOpcode != LastOpcode)
break;
int Offset = getMemoryOpOffset(Op);
@@ -1627,7 +1690,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
}
LastOffset = Offset;
LastBytes = Bytes;
- LastOpcode = Opcode;
+ LastOpcode = LSMOpcode;
if (++NumMove == 8) // FIXME: Tune this limit.
break;
}
@@ -1692,6 +1755,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
if (!isT2)
MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ concatenateMemOperands(MIB, Op0, Op1);
+ DEBUG(dbgs() << "Formed " << *MIB << "\n");
++NumLDRDFormed;
} else {
MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
@@ -1704,6 +1769,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
if (!isT2)
MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ concatenateMemOperands(MIB, Op0, Op1);
+ DEBUG(dbgs() << "Formed " << *MIB << "\n");
++NumSTRDFormed;
}
MBB->erase(Op0);
@@ -1745,8 +1812,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
while (MBBI != E) {
for (; MBBI != E; ++MBBI) {
MachineInstr *MI = MBBI;
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.isCall() || MCID.isTerminator()) {
+ if (MI->isCall() || MI->isTerminator()) {
// Stop at barriers.
++MBBI;
break;
@@ -1758,7 +1824,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
if (!isMemoryOp(MI))
continue;
unsigned PredReg = 0;
- if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
+ if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
continue;
int Opc = MI->getOpcode();
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index daa126def401..e2ac9a466ed8 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -31,8 +31,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
OutContext);
switch (MO.getTargetFlags()) {
- default:
- assert(0 && "Unknown target flag on symbol operand");
+ default: llvm_unreachable("Unknown target flag on symbol operand");
case 0:
break;
case ARMII::MO_LO16:
@@ -67,9 +66,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOperand &MCOp) {
switch (MO.getType()) {
- default:
- assert(0 && "unknown operand type");
- return false;
+ default: llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all non-CPSR implicit register operands.
if (MO.isImplicit() && MO.getReg() != ARM::CPSR)
@@ -107,6 +104,9 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
break;
}
+ case MachineOperand::MO_RegisterMask:
+ // Ignore call clobbers.
+ return false;
}
return true;
}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..af445e2f35aa
--- /dev/null
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- ARMMachineFuctionInfo.cpp - ARM machine function info -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void ARMFunctionInfo::anchor() { }
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 138f0c262271..f1c8fc84816e 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//===-- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -25,6 +25,7 @@ namespace llvm {
/// ARMFunctionInfo - This class is derived from MachineFunctionInfo and
/// contains private ARM-specific information for each MachineFunction.
class ARMFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
/// isThumb - True if this function is compiled under Thumb mode.
/// Used to initialized Align, so must precede it.
@@ -63,6 +64,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// GPR callee-saved (2) : r8, r10, r11
/// --------------------------------------------
/// DPR callee-saved : d8 - d15
+ ///
+ /// Also see AlignedDPRCSRegs below. Not all D-regs need to go in area 3.
+ /// Some may be spilled after the stack has been realigned.
unsigned GPRCS1Offset;
unsigned GPRCS2Offset;
unsigned DPRCSOffset;
@@ -79,6 +83,15 @@ class ARMFunctionInfo : public MachineFunctionInfo {
BitVector GPRCS2Frames;
BitVector DPRCSFrames;
+ /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
+ /// the aligned portion of the stack frame. This is always a contiguous
+ /// sequence of D-registers starting from d8.
+ ///
+ /// We do not keep track of the frame indices used for these registers - they
+ /// behave like any other frame index in the aligned stack frame. These
+ /// registers also aren't included in DPRCSSize above.
+ unsigned NumAlignedDPRCS2Regs;
+
/// JumpTableUId - Unique id for jumptables.
///
unsigned JumpTableUId;
@@ -104,6 +117,7 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
+ NumAlignedDPRCS2Regs(0),
JumpTableUId(0), PICLabelUId(0),
VarArgsFrameIndex(0), HasITBlocks(false) {}
@@ -137,6 +151,9 @@ public:
unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+ unsigned getNumAlignedDPRCS2Regs() const { return NumAlignedDPRCS2Regs; }
+ void setNumAlignedDPRCS2Regs(unsigned n) { NumAlignedDPRCS2Regs = n; }
+
unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h
index 18e162000602..efa22fbed9f7 100644
--- a/lib/Target/ARM/ARMPerfectShuffle.h
+++ b/lib/Target/ARM/ARMPerfectShuffle.h
@@ -1,4 +1,4 @@
-//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table ------------------===//
+//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 1cba1ba591ef..6f3819afd0a5 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===//
+//===-- ARMRegisterInfo.cpp - ARM Register Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,11 +11,13 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMRegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
-#include "ARMRegisterInfo.h"
using namespace llvm;
+void ARMRegisterInfo::anchor() { }
+
ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &sti)
: ARMBaseRegisterInfo(tii, sti) {
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 8edfb9a2057f..8a248425c33c 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===//
+//===-- ARMRegisterInfo.h - ARM Register Information Impl -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,15 +15,15 @@
#define ARMREGISTERINFO_H
#include "ARM.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "ARMBaseRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
- class Type;
struct ARMRegisterInfo : public ARMBaseRegisterInfo {
+ virtual void anchor();
public:
ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
};
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 036822d18ad2..1466e983f3be 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===//
+//===-- ARMRegisterInfo.td - ARM Register defs -------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,6 +16,8 @@ class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
field bits<4> Num;
let Namespace = "ARM";
let SubRegs = subregs;
+ // All bits of ARM registers with sub-registers are covered by sub-registers.
+ let CoveredBySubRegs = 1;
}
class ARMFReg<bits<6> num, string n> : Register<n> {
@@ -25,28 +27,30 @@ class ARMFReg<bits<6> num, string n> : Register<n> {
// Subregister indices.
let Namespace = "ARM" in {
+def qqsub_0 : SubRegIndex;
+def qqsub_1 : SubRegIndex;
+
// Note: Code depends on these having consecutive numbers.
-def ssub_0 : SubRegIndex;
-def ssub_1 : SubRegIndex;
-def ssub_2 : SubRegIndex; // In a Q reg.
-def ssub_3 : SubRegIndex;
+def qsub_0 : SubRegIndex;
+def qsub_1 : SubRegIndex;
+def qsub_2 : SubRegIndex<[qqsub_1, qsub_0]>;
+def qsub_3 : SubRegIndex<[qqsub_1, qsub_1]>;
def dsub_0 : SubRegIndex;
def dsub_1 : SubRegIndex;
-def dsub_2 : SubRegIndex;
-def dsub_3 : SubRegIndex;
-def dsub_4 : SubRegIndex;
-def dsub_5 : SubRegIndex;
-def dsub_6 : SubRegIndex;
-def dsub_7 : SubRegIndex;
+def dsub_2 : SubRegIndex<[qsub_1, dsub_0]>;
+def dsub_3 : SubRegIndex<[qsub_1, dsub_1]>;
+def dsub_4 : SubRegIndex<[qsub_2, dsub_0]>;
+def dsub_5 : SubRegIndex<[qsub_2, dsub_1]>;
+def dsub_6 : SubRegIndex<[qsub_3, dsub_0]>;
+def dsub_7 : SubRegIndex<[qsub_3, dsub_1]>;
-def qsub_0 : SubRegIndex;
-def qsub_1 : SubRegIndex;
-def qsub_2 : SubRegIndex;
-def qsub_3 : SubRegIndex;
-
-def qqsub_0 : SubRegIndex;
-def qqsub_1 : SubRegIndex;
+def ssub_0 : SubRegIndex;
+def ssub_1 : SubRegIndex;
+def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>;
+def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>;
+// Let TableGen synthesize the remaining 12 ssub_* indices.
+// We don't need to name them.
}
// Integer registers
@@ -127,9 +131,7 @@ def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>;
def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>;
// Advanced SIMD (NEON) defines 16 quad-word aliases
-let SubRegIndices = [dsub_0, dsub_1],
- CompositeIndices = [(ssub_2 dsub_1, ssub_0),
- (ssub_3 dsub_1, ssub_1)] in {
+let SubRegIndices = [dsub_0, dsub_1] in {
def Q0 : ARMReg< 0, "q0", [D0, D1]>;
def Q1 : ARMReg< 1, "q1", [D2, D3]>;
def Q2 : ARMReg< 2, "q2", [D4, D5]>;
@@ -150,45 +152,22 @@ def Q14 : ARMReg<14, "q14", [D28, D29]>;
def Q15 : ARMReg<15, "q15", [D30, D31]>;
}
-// Pseudo 256-bit registers to represent pairs of Q registers. These should
-// never be present in the emitted code.
-// These are used for NEON load / store instructions, e.g., vld4, vst3.
-// NOTE: It's possible to define more QQ registers since technically the
-// starting D register number doesn't have to be multiple of 4, e.g.,
-// D1, D2, D3, D4 would be a legal quad, but that would make the subregister
-// stuff very messy.
-let SubRegIndices = [qsub_0, qsub_1],
- CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in {
-def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>;
-def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>;
-def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>;
-def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>;
-def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>;
-def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>;
-def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>;
-def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>;
-}
-
-// Pseudo 512-bit registers to represent four consecutive Q registers.
-let SubRegIndices = [qqsub_0, qqsub_1],
- CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
- (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1),
- (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in {
-def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
-def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
-def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>;
-def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
-}
-
// Current Program Status Register.
-def CPSR : ARMReg<0, "cpsr">;
-def APSR : ARMReg<1, "apsr">;
-def SPSR : ARMReg<2, "spsr">;
-def FPSCR : ARMReg<3, "fpscr">;
-def ITSTATE : ARMReg<4, "itstate">;
+// We model fpscr with two registers: FPSCR models the control bits and will be
+// reserved. FPSCR_NZCV models the flag bits and will be unreserved.
+def CPSR : ARMReg<0, "cpsr">;
+def APSR : ARMReg<1, "apsr">;
+def SPSR : ARMReg<2, "spsr">;
+def FPSCR : ARMReg<3, "fpscr">;
+def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
+ let Aliases = [FPSCR];
+}
+def ITSTATE : ARMReg<4, "itstate">;
// Special Registers - only available in privileged mode.
def FPSID : ARMReg<0, "fpsid">;
+def MVFR1 : ARMReg<6, "mvfr1">;
+def MVFR0 : ARMReg<7, "mvfr0">;
def FPEXC : ARMReg<8, "fpexc">;
// Register classes.
@@ -261,6 +240,12 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> {
}];
}
+// Condition code registers.
+def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+ let isAllocatable = 0;
+}
+
// Scalar single precision floating point register class..
def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>;
@@ -316,37 +301,100 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
(DPR_8 dsub_0, dsub_1)];
}
+// Pseudo-registers representing odd-even pairs of D registers. The even-odd
+// pairs are already represented by the Q registers.
+// These are needed by NEON instructions requiring two consecutive D registers.
+// There is no D31_D0 register as that is always an UNPREDICTABLE encoding.
+def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1],
+ [(decimate (shl DPR, 1), 2),
+ (decimate (shl DPR, 2), 2)]>;
+
+// Register class representing a pair of consecutive D registers.
+// Use the Q registers for the even-odd pairs.
+def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (interleave QPR, TuplesOE2D)> {
+ // Allocate starting at non-VFP2 registers D16-D31 first.
+ // Prefer even-odd pairs as they are easier to copy.
+ let AltOrders = [(add (rotl QPR, 8), (rotl DPair, 16))];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// Pseudo-registers representing 3 consecutive D registers.
+def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2],
+ [(shl DPR, 0),
+ (shl DPR, 1),
+ (shl DPR, 2)]>;
+
+// 3 consecutive D registers.
+def DTriple : RegisterClass<"ARM", [untyped], 64, (add Tuples3D)> {
+ let Size = 192; // 3 x 64 bits, we have no predefined type of that size.
+}
+
+// Pseudo 256-bit registers to represent pairs of Q registers. These should
+// never be present in the emitted code.
+// These are used for NEON load / store instructions, e.g., vld4, vst3.
+def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], [(shl QPR, 0), (shl QPR, 1)]>;
+
// Pseudo 256-bit vector register class to model pairs of Q registers
// (4 consecutive D registers).
-def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> {
+def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> {
let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
(QPR qsub_0, qsub_1)];
// Allocate non-VFP2 aliases first.
- let AltOrders = [(rotl QQPR, 4)];
+ let AltOrders = [(rotl QQPR, 8)];
let AltOrderSelect = [{ return 1; }];
}
-// Subset of QQPR that have 32-bit SPR subregs.
-def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], 256, (trunc QQPR, 4)> {
- let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3),
- (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3),
- (QPR_VFP2 qsub_0, qsub_1)];
+// Tuples of 4 D regs that isn't also a pair of Q regs.
+def TuplesOE4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3],
+ [(decimate (shl DPR, 1), 2),
+ (decimate (shl DPR, 2), 2),
+ (decimate (shl DPR, 3), 2),
+ (decimate (shl DPR, 4), 2)]>;
-}
+// 4 consecutive D registers.
+def DQuad : RegisterClass<"ARM", [v4i64], 256,
+ (interleave Tuples2Q, TuplesOE4D)>;
+
+// Pseudo 512-bit registers to represent four consecutive Q registers.
+def Tuples2QQ : RegisterTuples<[qqsub_0, qqsub_1],
+ [(shl QQPR, 0), (shl QQPR, 2)]>;
// Pseudo 512-bit vector register class to model 4 consecutive Q registers
// (8 consecutive D registers).
-def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> {
+def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> {
let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
dsub_4, dsub_5, dsub_6, dsub_7),
(QPR qsub_0, qsub_1, qsub_2, qsub_3)];
// Allocate non-VFP2 aliases first.
- let AltOrders = [(rotl QQQQPR, 2)];
+ let AltOrders = [(rotl QQQQPR, 8)];
let AltOrderSelect = [{ return 1; }];
}
-// Condition code registers.
-def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
- let CopyCost = -1; // Don't allow copying of status registers.
- let isAllocatable = 0;
+
+// Pseudo-registers representing 2-spaced consecutive D registers.
+def Tuples2DSpc : RegisterTuples<[dsub_0, dsub_2],
+ [(shl DPR, 0),
+ (shl DPR, 2)]>;
+
+// Spaced pairs of D registers.
+def DPairSpc : RegisterClass<"ARM", [v2i64], 64, (add Tuples2DSpc)>;
+
+def Tuples3DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4],
+ [(shl DPR, 0),
+ (shl DPR, 2),
+ (shl DPR, 4)]>;
+
+// Spaced triples of D registers.
+def DTripleSpc : RegisterClass<"ARM", [untyped], 64, (add Tuples3DSpc)> {
+ let Size = 192; // 3 x 64 bits, we have no predefined type of that size.
}
+
+def Tuples4DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4, dsub_6],
+ [(shl DPR, 0),
+ (shl DPR, 2),
+ (shl DPR, 4),
+ (shl DPR, 6)]>;
+
+// Spaced quads of D registers.
+def DQuadSpc : RegisterClass<"ARM", [v4i64], 64, (add Tuples3DSpc)>;
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
index 86e7206f2cc6..21877fd9af37 100644
--- a/lib/Target/ARM/ARMRelocations.h
+++ b/lib/Target/ARM/ARMRelocations.h
@@ -1,4 +1,4 @@
-//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===//
+//===-- ARMRelocations.h - ARM Code Relocations -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 958c5c647013..45486fd0b6dd 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -1,10 +1,10 @@
-//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===//
-//
+//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -118,6 +118,8 @@ def IIC_fpMUL32 : InstrItinClass;
def IIC_fpMUL64 : InstrItinClass;
def IIC_fpMAC32 : InstrItinClass;
def IIC_fpMAC64 : InstrItinClass;
+def IIC_fpFMAC32 : InstrItinClass;
+def IIC_fpFMAC64 : InstrItinClass;
def IIC_fpDIV32 : InstrItinClass;
def IIC_fpDIV64 : InstrItinClass;
def IIC_fpSQRT32 : InstrItinClass;
@@ -208,6 +210,8 @@ def IIC_VPERMQ : InstrItinClass;
def IIC_VPERMQ3 : InstrItinClass;
def IIC_VMACD : InstrItinClass;
def IIC_VMACQ : InstrItinClass;
+def IIC_VFMACD : InstrItinClass;
+def IIC_VFMACQ : InstrItinClass;
def IIC_VRECSD : InstrItinClass;
def IIC_VRECSQ : InstrItinClass;
def IIC_VCNTiD : InstrItinClass;
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 8d86c01dc741..8b1fb9386ad5 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -324,6 +324,15 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrStage<19, [A8_NPipe], 0>,
InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
//
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<19, [A8_NPipe], 0>,
+ InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
+ //
// Single-precision FP DIV
InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<20, [A8_NPipe], 0>,
@@ -860,6 +869,16 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
//
+ // Double-register Fused FP Multiple-Accumulate
+ InstrItinData<IIC_VFMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
+ //
+ // Quad-register Fused FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VFMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
+ //
// Double-register Reciprical Step
InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 49fedf63f8bc..0d710cc1acee 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -604,6 +604,22 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<2, [A9_NPipe]>],
[9, 1, 1, 1]>,
//
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<9, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [8, 1, 1, 1]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<10, [A9_DRegsN], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [9, 1, 1, 1]>,
+ //
// Single-precision FP DIV
InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
@@ -1697,6 +1713,26 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<4, [A9_NPipe]>],
[8, 4, 2, 1]>,
//
+ // Double-register Fused FP Multiple-Accumulate
+ InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
+ //
+ // Quad-register Fused FP Multiple-Accumulate
+ // Result written in N9, but that is relative to the last cycle of multicycle,
+ // so we use 10 for those cases
+ InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 9 cycles
+ InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe]>],
+ [8, 4, 2, 1]>,
+ //
// Double-register Reciprical Step
InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index c1880a72fff3..0ace9bc1796d 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -1,10 +1,10 @@
-//===- ARMScheduleV6.td - ARM v6 Scheduling Definitions ----*- tablegen -*-===//
-//
+//===-- ARMScheduleV6.td - ARM v6 Scheduling Definitions ---*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines the itinerary class data for the ARM v6 processors.
@@ -243,6 +243,12 @@ def ARMV6Itineraries : ProcessorItineraries<
// Double-precision FP MAC
InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
//
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
+ //
// Single-precision FP DIV
InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
//
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index a3a3d58841db..e2530d07e237 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -67,7 +67,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
- false, 0);
+ false, false, 0);
TFOps[i] = Loads[i].getValue(1);
SrcOff += VTSize;
}
@@ -105,7 +105,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
- SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+ SrcPtrInfo.getWithOffset(SrcOff),
+ false, false, false, 0);
TFOps[i] = Loads[i].getValue(1);
++i;
SrcOff += VTSize;
@@ -144,8 +145,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
SDValue Src, SDValue Size,
unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
- // Use default for non AAPCS subtargets
- if (!Subtarget->isAAPCS_ABI())
+ // Use default for non AAPCS (or Darwin) subtargets
+ if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin())
return SDValue();
const ARMTargetLowering &TLI =
@@ -188,6 +189,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
0, // number of fixed arguments
TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv
false, // is tail call
+ false, // does not return
false, // is return val used
DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
TLI.getPointerTy()), // callee
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 247d6be59ad4..e247b76ad43b 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -1,4 +1,4 @@
-//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,7 +16,6 @@
#include "llvm/GlobalValue.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/SmallVector.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
@@ -47,13 +46,13 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasV7Ops(false)
, HasVFPv2(false)
, HasVFPv3(false)
+ , HasVFPv4(false)
, HasNEON(false)
, UseNEONForSinglePrecisionFP(false)
, SlowFPVMLx(false)
, HasVMLxForwarding(false)
, SlowFPBrcc(false)
, InThumbMode(false)
- , InNaClMode(false)
, HasThumb2(false)
, IsMClass(false)
, NoARM(false)
@@ -104,18 +103,19 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
computeIssueWidth();
if (TT.find("eabi") != std::string::npos)
+ // FIXME: We might want to separate AAPCS and EABI. Some systems, e.g.
+ // Darwin-EABI conforms to AACPS but not the rest of EABI.
TargetABI = ARM_ABI_AAPCS;
if (isAAPCS_ABI())
stackAlignment = 8;
- if (!isTargetDarwin())
+ if (!isTargetIOS())
UseMovt = hasV6T2Ops();
else {
IsR9Reserved = ReserveR9 | !HasV6Ops;
UseMovt = DarwinUseMOVT && hasV6T2Ops();
- const Triple &T = getTargetTriple();
- SupportsTailCall = T.getOS() == Triple::IOS && !T.isOSVersionLT(5, 0);
+ SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0);
}
if (!isThumb() || hasThumb2())
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index b63e1085fb83..e72b06fa3fcc 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -1,4 +1,4 @@
-//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
+//===-- ARMSubtarget.h - Define Subtarget for the ARM ----------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -45,10 +45,11 @@ protected:
bool HasV6T2Ops;
bool HasV7Ops;
- /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are
- /// supported.
+ /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what
+ /// floating point ISAs are supported.
bool HasVFPv2;
bool HasVFPv3;
+ bool HasVFPv4;
bool HasNEON;
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
@@ -70,9 +71,6 @@ protected:
/// InThumbMode - True if compiling for Thumb, false for ARM.
bool InThumbMode;
- /// InNaClMode - True if targeting Native Client
- bool InNaClMode;
-
/// HasThumb2 - True if Thumb2 instructions are supported.
bool HasThumb2;
@@ -126,6 +124,10 @@ protected:
/// CPSR setting instruction.
bool AvoidCPSRPartialUpdate;
+ /// HasRAS - Some processors perform return stack prediction. CodeGen should
+ /// avoid issue "normal" call instructions to callees which do not return.
+ bool HasRAS;
+
/// HasMPExtension - True if the subtarget supports Multiprocessing
/// extension (ARMv7 only).
bool HasMPExtension;
@@ -194,11 +196,13 @@ protected:
bool isCortexA8() const { return ARMProcFamily == CortexA8; }
bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+ bool isCortexM3() const { return CPUString == "cortex-m3"; }
bool hasARMOps() const { return !NoARM; }
bool hasVFP2() const { return HasVFPv2; }
bool hasVFP3() const { return HasVFPv3; }
+ bool hasVFP4() const { return HasVFPv4; }
bool hasNEON() const { return HasNEON; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
@@ -212,6 +216,7 @@ protected:
bool isFPOnlySP() const { return FPOnlySP; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+ bool hasRAS() const { return HasRAS; }
bool hasMPExtension() const { return HasMPExtension; }
bool hasThumb2DSP() const { return Thumb2DSP; }
@@ -220,6 +225,7 @@ protected:
const Triple &getTargetTriple() const { return TargetTriple; }
+ bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetNaCl() const {
return TargetTriple.getOS() == Triple::NativeClient;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 96b1e89b0df0..047efc23a4ea 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -20,6 +20,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
static cl::opt<bool>
@@ -33,24 +34,32 @@ extern "C" void LLVMInitializeARMTarget() {
RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
}
+
/// TargetMachine ctor - Create an ARM architecture model.
///
ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
JITInfo(),
InstrItins(Subtarget.getInstrItineraryData()) {
// Default to soft float ABI
- if (FloatABIType == FloatABI::Default)
- FloatABIType = FloatABI::Soft;
+ if (Options.FloatABIType == FloatABI::Default)
+ this->Options.FloatABIType = FloatABI::Soft;
}
+void ARMTargetMachine::anchor() { }
+
ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), InstrInfo(Subtarget),
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
"v128:32:128-v64:32:64-n32-S32") :
@@ -68,10 +77,14 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
"support ARM mode execution!");
}
+void ThumbTargetMachine::anchor() { }
+
ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM),
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
InstrInfo(Subtarget.hasThumb2()
? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
@@ -94,37 +107,62 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
: (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
}
-bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- if (OptLevel != CodeGenOpt::None && EnableGlobalMerge)
- PM.add(createARMGlobalMergePass(getTargetLowering()));
+namespace {
+/// ARM Code Generator Pass Configuration Options.
+class ARMPassConfig : public TargetPassConfig {
+public:
+ ARMPassConfig(ARMBaseTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ ARMBaseTargetMachine &getARMTargetMachine() const {
+ return getTM<ARMBaseTargetMachine>();
+ }
+
+ const ARMSubtarget &getARMSubtarget() const {
+ return *getARMTargetMachine().getSubtargetImpl();
+ }
+
+ virtual bool addPreISel();
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPreSched2();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new ARMPassConfig(this, PM);
+}
+
+bool ARMPassConfig::addPreISel() {
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge)
+ PM.add(createGlobalMergePass(TM->getTargetLowering()));
return false;
}
-bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- PM.add(createARMISelDag(*this, OptLevel));
+bool ARMPassConfig::addInstSelector() {
+ PM.add(createARMISelDag(getARMTargetMachine(), getOptLevel()));
return false;
}
-bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool ARMPassConfig::addPreRegAlloc() {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
- if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
+ if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass(true));
- if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9())
+ if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
PM.add(createMLxExpansionPass());
return true;
}
-bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool ARMPassConfig::addPreSched2() {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
- if (OptLevel != CodeGenOpt::None) {
- if (!Subtarget.isThumb1Only())
+ if (getOptLevel() != CodeGenOpt::None) {
+ if (!getARMSubtarget().isThumb1Only()) {
PM.add(createARMLoadStoreOptimizationPass());
- if (Subtarget.hasNEON())
+ printAndVerify("After ARM load / store optimizer");
+ }
+ if (getARMSubtarget().hasNEON())
PM.add(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
@@ -132,27 +170,31 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
// proper scheduling.
PM.add(createARMExpandPseudoPass());
- if (OptLevel != CodeGenOpt::None) {
- if (!Subtarget.isThumb1Only())
- PM.add(createIfConverterPass());
+ if (getOptLevel() != CodeGenOpt::None) {
+ if (!getARMSubtarget().isThumb1Only())
+ addPass(IfConverterID);
}
- if (Subtarget.isThumb2())
+ if (getARMSubtarget().isThumb2())
PM.add(createThumb2ITBlockPass());
return true;
}
-bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
- PM.add(createThumb2SizeReductionPass());
+bool ARMPassConfig::addPreEmitPass() {
+ if (getARMSubtarget().isThumb2()) {
+ if (!getARMSubtarget().prefers32BitThumb())
+ PM.add(createThumb2SizeReductionPass());
+
+ // Constant island pass work on unbundled instructions.
+ addPass(UnpackMachineBundlesID);
+ }
PM.add(createARMConstantIslandPass());
+
return true;
}
bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
// Machine code emitter pass for ARM.
PM.add(createARMJITCodeEmitterPass(*this, JCE));
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index c8c601c30171..abcdb24c0c69 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -41,7 +41,9 @@ private:
public:
ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
@@ -50,18 +52,15 @@ public:
}
// Pass Pipeline Configuration
- virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
- JITCodeEmitter &MCE);
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
+ virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE);
};
/// ARMTargetMachine - ARM target machine.
///
class ARMTargetMachine : public ARMBaseTargetMachine {
+ virtual void anchor();
ARMInstrInfo InstrInfo;
const TargetData DataLayout; // Calculates type size & alignment
ARMELFWriterInfo ELFWriterInfo;
@@ -71,7 +70,9 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
public:
ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const ARMRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
@@ -100,6 +101,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
/// Thumb-1 and Thumb-2.
///
class ThumbTargetMachine : public ARMBaseTargetMachine {
+ virtual void anchor();
// Either Thumb1InstrInfo or Thumb2InstrInfo.
OwningPtr<ARMBaseInstrInfo> InstrInfo;
const TargetData DataLayout; // Calculates type size & alignment
@@ -111,7 +113,9 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
public:
ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
/// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 19defa1b5196..a5ea1c202e2c 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/StringExtras.h"
using namespace llvm;
using namespace dwarf;
@@ -24,8 +25,9 @@ using namespace dwarf;
void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI();
- if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
+ if (isAAPCS_ABI) {
StaticCtorSection =
getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
ELF::SHF_WRITE |
@@ -45,3 +47,33 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
0,
SectionKind::getMetadata());
}
+
+const MCSection *
+ARMElfTargetObjectFile::getStaticCtorSection(unsigned Priority) const {
+ if (!isAAPCS_ABI)
+ return TargetLoweringObjectFileELF::getStaticCtorSection(Priority);
+
+ if (Priority == 65535)
+ return StaticCtorSection;
+
+ // Emit ctors in priority order.
+ std::string Name = std::string(".init_array.") + utostr(Priority);
+ return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+}
+
+const MCSection *
+ARMElfTargetObjectFile::getStaticDtorSection(unsigned Priority) const {
+ if (!isAAPCS_ABI)
+ return TargetLoweringObjectFileELF::getStaticDtorSection(Priority);
+
+ if (Priority == 65535)
+ return StaticDtorSection;
+
+ // Emit dtors in priority order.
+ std::string Name = std::string(".fini_array.") + utostr(Priority);
+ return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+}
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index c6a7261439d7..ff210604148d 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -20,6 +20,7 @@ class TargetMachine;
class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
protected:
const MCSection *AttributesSection;
+ bool isAAPCS_ABI;
public:
ARMElfTargetObjectFile() :
TargetLoweringObjectFileELF(),
@@ -31,6 +32,9 @@ public:
virtual const MCSection *getAttributesSection() const {
return AttributesSection;
}
+
+ const MCSection * getStaticCtorSection(unsigned Priority) const;
+ const MCSection * getStaticDtorSection(unsigned Priority) const;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
index 14d35ba54654..fda8536fcf6b 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -17,9 +17,6 @@
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include <string>
@@ -107,11 +104,9 @@ AsmToken ARMBaseAsmLexer::LexTokenUAL() {
SetError(Lexer->getErrLoc(), Lexer->getErr());
break;
case AsmToken::Identifier: {
- std::string upperCase = lexedToken.getString().str();
- std::string lowerCase = LowercaseString(upperCase);
- StringRef lowerRef(lowerCase);
+ std::string lowerCase = lexedToken.getString().lower();
- unsigned regID = MatchRegisterName(lowerRef);
+ unsigned regID = MatchRegisterName(lowerCase);
// Check for register aliases.
// r13 -> sp
// r14 -> lr
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 24f15b4694ff..e55a7dad45db 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -30,7 +30,6 @@
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
@@ -40,9 +39,15 @@ namespace {
class ARMOperand;
+enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
+
class ARMAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ const MCRegisterInfo *MRI;
+
+ // Map of register aliases registers via the .req directive.
+ StringMap<unsigned> RegisterReqs;
struct {
ARMCC::CondCodes Cond; // Condition for IT block.
@@ -91,9 +96,14 @@ class ARMAsmParser : public MCTargetAsmParser {
unsigned &ShiftAmount);
bool parseDirectiveWord(unsigned Size, SMLoc L);
bool parseDirectiveThumb(SMLoc L);
+ bool parseDirectiveARM(SMLoc L);
bool parseDirectiveThumbFunc(SMLoc L);
bool parseDirectiveCode(SMLoc L);
bool parseDirectiveSyntax(SMLoc L);
+ bool parseDirectiveReq(StringRef Name, SMLoc L);
+ bool parseDirectiveUnreq(SMLoc L);
+ bool parseDirectiveArch(SMLoc L);
+ bool parseDirectiveEabiAttr(SMLoc L);
StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
bool &CarrySetting, unsigned &ProcessorIMod,
@@ -161,6 +171,8 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index);
// Asm Match Converter Methods
bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
@@ -197,10 +209,18 @@ class ARMAsmParser : public MCTargetAsmParser {
const SmallVectorImpl<MCParsedAsmOperand*> &);
bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
bool validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
- void processInstruction(MCInst &Inst,
+ bool processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
bool shouldOmitCCOutOperand(StringRef Mnemonic,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -217,6 +237,9 @@ public:
: MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
+ // Cache the MCRegisterInfo.
+ MRI = &getContext().getRegisterInfo();
+
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -251,7 +274,6 @@ class ARMOperand : public MCParsedAsmOperand {
k_CoprocReg,
k_CoprocOption,
k_Immediate,
- k_FPImmediate,
k_MemBarrierOpt,
k_Memory,
k_PostIndexRegister,
@@ -262,6 +284,9 @@ class ARMOperand : public MCParsedAsmOperand {
k_RegisterList,
k_DPRRegisterList,
k_SPRRegisterList,
+ k_VectorList,
+ k_VectorListAllLanes,
+ k_VectorListIndexed,
k_ShiftedRegister,
k_ShiftedImmediate,
k_ShifterImmediate,
@@ -311,6 +336,14 @@ class ARMOperand : public MCParsedAsmOperand {
unsigned RegNum;
} Reg;
+ // A vector register list is a sequential list of 1 to 4 registers.
+ struct {
+ unsigned RegNum;
+ unsigned Count;
+ unsigned LaneIndex;
+ bool isDoubleSpaced;
+ } VectorList;
+
struct {
unsigned Val;
} VectorIndex;
@@ -319,10 +352,6 @@ class ARMOperand : public MCParsedAsmOperand {
const MCExpr *Val;
} Imm;
- struct {
- unsigned Val; // encoded 8-bit representation
- } FPImm;
-
/// Combined record for all forms of ARM address expressions.
struct {
unsigned BaseRegNum;
@@ -333,7 +362,7 @@ class ARMOperand : public MCParsedAsmOperand {
ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
unsigned ShiftImm; // shift for OffsetReg.
unsigned Alignment; // 0 = no alignment specified
- // n = alignment in bytes (8, 16, or 32)
+ // n = alignment in bytes (2, 4, 8, 16, or 32)
unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
} Memory;
@@ -393,6 +422,11 @@ public:
case k_SPRRegisterList:
Registers = o.Registers;
break;
+ case k_VectorList:
+ case k_VectorListAllLanes:
+ case k_VectorListIndexed:
+ VectorList = o.VectorList;
+ break;
case k_CoprocNum:
case k_CoprocReg:
Cop = o.Cop;
@@ -403,9 +437,6 @@ public:
case k_Immediate:
Imm = o.Imm;
break;
- case k_FPImmediate:
- FPImm = o.FPImm;
- break;
case k_MemBarrierOpt:
MBOpt = o.MBOpt;
break;
@@ -474,15 +505,10 @@ public:
}
const MCExpr *getImm() const {
- assert(Kind == k_Immediate && "Invalid access!");
+ assert(isImm() && "Invalid access!");
return Imm.Val;
}
- unsigned getFPImm() const {
- assert(Kind == k_FPImmediate && "Invalid access!");
- return FPImm.Val;
- }
-
unsigned getVectorIndex() const {
assert(Kind == k_VectorIndex && "Invalid access!");
return VectorIndex.Val;
@@ -511,90 +537,219 @@ public:
bool isITMask() const { return Kind == k_ITCondMask; }
bool isITCondCode() const { return Kind == k_CondCode; }
bool isImm() const { return Kind == k_Immediate; }
- bool isFPImm() const { return Kind == k_FPImmediate; }
+ bool isFPImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
+ return Val != -1;
+ }
+ bool isFBits16() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value <= 16;
+ }
+ bool isFBits32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 1 && Value <= 32;
+ }
bool isImm8s4() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
}
bool isImm0_1020s4() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return ((Value & 3) == 0) && Value >= 0 && Value <= 1020;
}
bool isImm0_508s4() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return ((Value & 3) == 0) && Value >= 0 && Value <= 508;
}
+ bool isImm0_508s4Neg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = -CE->getValue();
+ // explicitly exclude zero. we want that to use the normal 0_508 version.
+ return ((Value & 3) == 0) && Value > 0 && Value <= 508;
+ }
bool isImm0_255() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 256;
}
+ bool isImm0_4095() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 4096;
+ }
+ bool isImm0_4095Neg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = -CE->getValue();
+ return Value > 0 && Value < 4096;
+ }
+ bool isImm0_1() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 2;
+ }
+ bool isImm0_3() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 4;
+ }
bool isImm0_7() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 8;
}
bool isImm0_15() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 16;
}
bool isImm0_31() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 32;
}
+ bool isImm0_63() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 64;
+ }
+ bool isImm8() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 8;
+ }
+ bool isImm16() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 16;
+ }
+ bool isImm32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 32;
+ }
+ bool isShrImm8() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 8;
+ }
+ bool isShrImm16() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 16;
+ }
+ bool isShrImm32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 32;
+ }
+ bool isShrImm64() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 64;
+ }
+ bool isImm1_7() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 8;
+ }
+ bool isImm1_15() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 16;
+ }
+ bool isImm1_31() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 32;
+ }
bool isImm1_16() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value > 0 && Value < 17;
}
bool isImm1_32() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value > 0 && Value < 33;
}
+ bool isImm0_32() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 33;
+ }
bool isImm0_65535() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 65536;
}
bool isImm0_65535Expr() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
// If it's not a constant expression, it'll generate a fixup and be
// handled later.
@@ -603,56 +758,81 @@ public:
return Value >= 0 && Value < 65536;
}
bool isImm24bit() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value <= 0xffffff;
}
bool isImmThumbSR() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value > 0 && Value < 33;
}
bool isPKHLSLImm() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value >= 0 && Value < 32;
}
bool isPKHASRImm() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return Value > 0 && Value <= 32;
}
bool isARMSOImm() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return ARM_AM::getSOImmVal(Value) != -1;
}
+ bool isARMSOImmNot() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getSOImmVal(~Value) != -1;
+ }
+ bool isARMSOImmNeg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // Only use this when not representable as a plain so_imm.
+ return ARM_AM::getSOImmVal(Value) == -1 &&
+ ARM_AM::getSOImmVal(-Value) != -1;
+ }
bool isT2SOImm() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
return ARM_AM::getT2SOImmVal(Value) != -1;
}
+ bool isT2SOImmNot() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getT2SOImmVal(~Value) != -1;
+ }
+ bool isT2SOImmNeg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // Only use this when not representable as a plain so_imm.
+ return ARM_AM::getT2SOImmVal(Value) == -1 &&
+ ARM_AM::getT2SOImmVal(-Value) != -1;
+ }
bool isSetEndImm() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
@@ -672,7 +852,7 @@ public:
bool isBitfield() const { return Kind == k_BitfieldDescriptor; }
bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; }
bool isPostIdxReg() const {
- return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy == ARM_AM::no_shift;
+ return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift;
}
bool isMemNoOffset(bool alignOK = false) const {
if (!isMemory())
@@ -681,6 +861,17 @@ public:
return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 &&
(alignOK || Memory.Alignment == 0);
}
+ bool isMemPCRelImm12() const {
+ if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+ // Base register must be PC.
+ if (Memory.BaseRegNum != ARM::PC)
+ return false;
+ // Immediate offset in range [-4095, 4095].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val > -4096 && Val < 4096) || (Val == INT32_MIN);
+ }
bool isAlignedMemory() const {
return isMemNoOffset(true);
}
@@ -694,8 +885,7 @@ public:
return Val > -4096 && Val < 4096;
}
bool isAM2OffsetImm() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
// Immediate offset in range [-4095, 4095].
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
@@ -703,6 +893,11 @@ public:
return Val > -4096 && Val < 4096;
}
bool isAddrMode3() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
if (!isMemory() || Memory.Alignment != 0) return false;
// No shifts are legal for AM3.
if (Memory.ShiftType != ARM_AM::no_shift) return false;
@@ -726,6 +921,11 @@ public:
return (Val > -256 && Val < 256) || Val == INT32_MIN;
}
bool isAddrMode5() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
if (!isMemory() || Memory.Alignment != 0) return false;
// Check for register offset.
if (Memory.OffsetRegNum) return false;
@@ -733,7 +933,7 @@ public:
if (!Memory.OffsetImm) return true;
int64_t Val = Memory.OffsetImm->getValue();
return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) ||
- Val == INT32_MIN;
+ Val == INT32_MIN;
}
bool isMemTBB() const {
if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
@@ -810,6 +1010,11 @@ public:
return Val >= 0 && Val <= 1020 && (Val % 4) == 0;
}
bool isMemImm8s4Offset() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset a multiple of 4 in range [-1020, 1020].
@@ -828,6 +1033,8 @@ public:
bool isMemImm8Offset() const {
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
+ // Base reg of PC isn't allowed for these encodings.
+ if (Memory.BaseRegNum == ARM::PC) return false;
// Immediate offset in range [-255, 255].
if (!Memory.OffsetImm) return true;
int64_t Val = Memory.OffsetImm->getValue();
@@ -844,18 +1051,14 @@ public:
bool isMemNegImm8Offset() const {
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
+ // Base reg of PC isn't allowed for these encodings.
+ if (Memory.BaseRegNum == ARM::PC) return false;
// Immediate offset in range [-255, -1].
- if (!Memory.OffsetImm) return true;
+ if (!Memory.OffsetImm) return false;
int64_t Val = Memory.OffsetImm->getValue();
- return Val > -256 && Val < 0;
+ return (Val == INT32_MIN) || (Val > -256 && Val < 0);
}
bool isMemUImm12Offset() const {
- // If we have an immediate that's not a constant, treat it as a label
- // reference needing a fixup. If it is a constant, it's something else
- // and we reject it.
- if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm()))
- return true;
-
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset in range [0, 4095].
@@ -867,7 +1070,7 @@ public:
// If we have an immediate that's not a constant, treat it as a label
// reference needing a fixup. If it is a constant, it's something else
// and we reject it.
- if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm()))
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
@@ -878,16 +1081,14 @@ public:
return (Val > -4096 && Val < 4096) || (Val == INT32_MIN);
}
bool isPostIdxImm8() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Val = CE->getValue();
return (Val > -256 && Val < 256) || (Val == INT32_MIN);
}
bool isPostIdxImm8s4() const {
- if (Kind != k_Immediate)
- return false;
+ if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Val = CE->getValue();
@@ -898,6 +1099,188 @@ public:
bool isMSRMask() const { return Kind == k_MSRMask; }
bool isProcIFlags() const { return Kind == k_ProcIFlags; }
+ // NEON operands.
+ bool isSingleSpacedVectorList() const {
+ return Kind == k_VectorList && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorList() const {
+ return Kind == k_VectorList && VectorList.isDoubleSpaced;
+ }
+ bool isVecListOneD() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return VectorList.Count == 1;
+ }
+
+ bool isVecListDPair() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return (ARMMCRegisterClasses[ARM::DPairRegClassID]
+ .contains(VectorList.RegNum));
+ }
+
+ bool isVecListThreeD() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListFourD() const {
+ if (!isSingleSpacedVectorList()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isVecListDPairSpaced() const {
+ if (isSingleSpacedVectorList()) return false;
+ return (ARMMCRegisterClasses[ARM::DPairSpcRegClassID]
+ .contains(VectorList.RegNum));
+ }
+
+ bool isVecListThreeQ() const {
+ if (!isDoubleSpacedVectorList()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListFourQ() const {
+ if (!isDoubleSpacedVectorList()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isSingleSpacedVectorAllLanes() const {
+ return Kind == k_VectorListAllLanes && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorAllLanes() const {
+ return Kind == k_VectorListAllLanes && VectorList.isDoubleSpaced;
+ }
+ bool isVecListOneDAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 1;
+ }
+
+ bool isVecListDPairAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return (ARMMCRegisterClasses[ARM::DPairRegClassID]
+ .contains(VectorList.RegNum));
+ }
+
+ bool isVecListDPairSpacedAllLanes() const {
+ if (!isDoubleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 2;
+ }
+
+ bool isVecListThreeDAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListThreeQAllLanes() const {
+ if (!isDoubleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 3;
+ }
+
+ bool isVecListFourDAllLanes() const {
+ if (!isSingleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isVecListFourQAllLanes() const {
+ if (!isDoubleSpacedVectorAllLanes()) return false;
+ return VectorList.Count == 4;
+ }
+
+ bool isSingleSpacedVectorIndexed() const {
+ return Kind == k_VectorListIndexed && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorIndexed() const {
+ return Kind == k_VectorListIndexed && VectorList.isDoubleSpaced;
+ }
+ bool isVecListOneDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListOneDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListOneDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListTwoDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListTwoDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListTwoQWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListTwoQHWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListTwoDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListThreeDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListThreeDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListThreeQWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListThreeQHWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListThreeDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 3 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListFourDByteIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListFourDHWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListFourQWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListFourQHWordIndexed() const {
+ if (!isDoubleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListFourDWordIndexed() const {
+ if (!isSingleSpacedVectorIndexed()) return false;
+ return VectorList.Count == 4 && VectorList.LaneIndex <= 1;
+ }
+
bool isVectorIndex8() const {
if (Kind != k_VectorIndex) return false;
return VectorIndex.Val < 8;
@@ -911,7 +1294,82 @@ public:
return VectorIndex.Val < 2;
}
+ bool isNEONi8splat() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // i8 value splatted across 8 bytes. The immediate is just the 8 byte
+ // value.
+ return Value >= 0 && Value < 256;
+ }
+
+ bool isNEONi16splat() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // i16 value in the range [0,255] or [0x0100, 0xff00]
+ return (Value >= 0 && Value < 256) || (Value >= 0x0100 && Value <= 0xff00);
+ }
+
+ bool isNEONi32splat() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
+ return (Value >= 0 && Value < 256) ||
+ (Value >= 0x0100 && Value <= 0xff00) ||
+ (Value >= 0x010000 && Value <= 0xff0000) ||
+ (Value >= 0x01000000 && Value <= 0xff000000);
+ }
+ bool isNEONi32vmov() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
+ // for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+ return (Value >= 0 && Value < 256) ||
+ (Value >= 0x0100 && Value <= 0xff00) ||
+ (Value >= 0x010000 && Value <= 0xff0000) ||
+ (Value >= 0x01000000 && Value <= 0xff000000) ||
+ (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) ||
+ (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff);
+ }
+ bool isNEONi32vmovNeg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ int64_t Value = ~CE->getValue();
+ // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
+ // for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+ return (Value >= 0 && Value < 256) ||
+ (Value >= 0x0100 && Value <= 0xff00) ||
+ (Value >= 0x010000 && Value <= 0xff0000) ||
+ (Value >= 0x01000000 && Value <= 0xff000000) ||
+ (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) ||
+ (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff);
+ }
+
+ bool isNEONi64splat() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ uint64_t Value = CE->getValue();
+ // i64 value with each byte being either 0 or 0xff.
+ for (unsigned i = 0; i < 8; ++i)
+ if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff) return false;
+ return true;
+ }
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
@@ -967,7 +1425,8 @@ public:
void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 3 && "Invalid number of operands!");
- assert(isRegShiftedReg() && "addRegShiftedRegOperands() on non RegShiftedReg!");
+ assert(isRegShiftedReg() &&
+ "addRegShiftedRegOperands() on non RegShiftedReg!");
Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg));
Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg));
Inst.addOperand(MCOperand::CreateImm(
@@ -976,7 +1435,8 @@ public:
void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
- assert(isRegShiftedImm() && "addRegShiftedImmOperands() on non RegShiftedImm!");
+ assert(isRegShiftedImm() &&
+ "addRegShiftedImmOperands() on non RegShiftedImm!");
Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg));
Inst.addOperand(MCOperand::CreateImm(
ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, RegShiftedImm.ShiftImm)));
@@ -1026,9 +1486,23 @@ public:
addExpr(Inst, getImm());
}
+ void addFBits16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(16 - CE->getValue()));
+ }
+
+ void addFBits32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(32 - CE->getValue()));
+ }
+
void addFPImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getFPImm()));
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
+ Inst.addOperand(MCOperand::CreateImm(Val));
}
void addImm8s4Operands(MCInst &Inst, unsigned N) const {
@@ -1047,32 +1521,20 @@ public:
Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
}
- void addImm0_508s4Operands(MCInst &Inst, unsigned N) const {
+ void addImm0_508s4NegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate is scaled by four in the encoding and is stored
// in the MCInst as such. Lop off the low two bits here.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
+ Inst.addOperand(MCOperand::CreateImm(-(CE->getValue() / 4)));
}
- void addImm0_255Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
- void addImm0_7Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
- void addImm0_15Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
- void addImm0_31Operands(MCInst &Inst, unsigned N) const {
+ void addImm0_508s4Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
+ // The immediate is scaled by four in the encoding and is stored
+ // in the MCInst as such. Lop off the low two bits here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
}
void addImm1_16Operands(MCInst &Inst, unsigned N) const {
@@ -1091,21 +1553,6 @@ public:
Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
}
- void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
- void addImm0_65535ExprOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
- void addImm24bitOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
void addImmThumbSROperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The constant encodes as the immediate, except for 32, which encodes as
@@ -1115,11 +1562,6 @@ public:
Inst.addOperand(MCOperand::CreateImm((Imm == 32 ? 0 : Imm)));
}
- void addPKHLSLImmOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
void addPKHASRImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// An ASR value of 32 encodes as 0, so that's how we want to add it to
@@ -1129,19 +1571,44 @@ public:
Inst.addOperand(MCOperand::CreateImm(Val == 32 ? 0 : Val));
}
- void addARMSOImmOperands(MCInst &Inst, unsigned N) const {
+ void addT2SOImmNotOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
+ // The operand is actually a t2_so_imm, but we have its bitwise
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
}
- void addT2SOImmOperands(MCInst &Inst, unsigned N) const {
+ void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
+ // The operand is actually a t2_so_imm, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
}
- void addSetEndImmOperands(MCInst &Inst, unsigned N) const {
+ void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
+ // The operand is actually an imm0_4095, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+ }
+
+ void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a so_imm, but we have its bitwise
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
+ }
+
+ void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a so_imm, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
}
void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
@@ -1154,6 +1621,14 @@ public:
Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
}
+ void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ int32_t Imm = Memory.OffsetImm->getValue();
+ // FIXME: Handle #-0
+ if (Imm == INT32_MIN) Imm = 0;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
+
void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
@@ -1196,6 +1671,16 @@ public:
void addAddrMode3Operands(MCInst &Inst, unsigned N) const {
assert(N == 3 && "Invalid number of operands!");
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm()) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ Inst.addOperand(MCOperand::CreateReg(0));
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
+
int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
if (!Memory.OffsetRegNum) {
ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
@@ -1237,6 +1722,15 @@ public:
void addAddrMode5Operands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm()) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
+
// The lower two bits are always zero and as such are not encoded.
int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0;
ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
@@ -1250,6 +1744,15 @@ public:
void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm()) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return;
+ }
+
int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
Inst.addOperand(MCOperand::CreateImm(Val));
@@ -1281,7 +1784,7 @@ public:
void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
// If this is an immediate, it's a label reference.
- if (Kind == k_Immediate) {
+ if (isImm()) {
addExpr(Inst, getImm());
Inst.addOperand(MCOperand::CreateImm(0));
return;
@@ -1296,7 +1799,7 @@ public:
void addMemImm12OffsetOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
// If this is an immediate, it's a label reference.
- if (Kind == k_Immediate) {
+ if (isImm()) {
addExpr(Inst, getImm());
Inst.addOperand(MCOperand::CreateImm(0));
return;
@@ -1322,8 +1825,9 @@ public:
void addMemRegOffsetOperands(MCInst &Inst, unsigned N) const {
assert(N == 3 && "Invalid number of operands!");
- unsigned Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
- Memory.ShiftImm, Memory.ShiftType);
+ unsigned Val =
+ ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
+ Memory.ShiftImm, Memory.ShiftType);
Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
Inst.addOperand(MCOperand::CreateImm(Val));
@@ -1420,6 +1924,17 @@ public:
Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
}
+ void addVecListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+ }
+
+ void addVecListIndexedOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+ Inst.addOperand(MCOperand::CreateImm(VectorList.LaneIndex));
+ }
+
void addVectorIndex8Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
@@ -1435,6 +1950,80 @@ public:
Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
}
+ void addNEONi8splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ // Mask in that this is an i8 splat.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() | 0xe00));
+ }
+
+ void addNEONi16splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ if (Value >= 256)
+ Value = (Value >> 8) | 0xa00;
+ else
+ Value |= 0x800;
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi32splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ if (Value >= 256 && Value <= 0xff00)
+ Value = (Value >> 8) | 0x200;
+ else if (Value > 0xffff && Value <= 0xff0000)
+ Value = (Value >> 16) | 0x400;
+ else if (Value > 0xffffff)
+ Value = (Value >> 24) | 0x600;
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ if (Value >= 256 && Value <= 0xffff)
+ Value = (Value >> 8) | ((Value & 0xff) ? 0xc00 : 0x200);
+ else if (Value > 0xffff && Value <= 0xffffff)
+ Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400);
+ else if (Value > 0xffffff)
+ Value = (Value >> 24) | 0x600;
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = ~CE->getValue();
+ if (Value >= 256 && Value <= 0xffff)
+ Value = (Value >> 8) | ((Value & 0xff) ? 0xc00 : 0x200);
+ else if (Value > 0xffff && Value <= 0xffffff)
+ Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400);
+ else if (Value > 0xffffff)
+ Value = (Value >> 24) | 0x600;
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi64splatOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint64_t Value = CE->getValue();
+ unsigned Imm = 0;
+ for (unsigned i = 0; i < 8; ++i, Value >>= 8) {
+ Imm |= (Value & 1) << i;
+ }
+ Inst.addOperand(MCOperand::CreateImm(Imm | 0x1e00));
+ }
+
virtual void print(raw_ostream &OS) const;
static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) {
@@ -1579,6 +2168,43 @@ public:
return Op;
}
+ static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count,
+ bool isDoubleSpaced, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorList);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count,
+ bool isDoubleSpaced,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorListAllLanes);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count,
+ unsigned Index,
+ bool isDoubleSpaced,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorListIndexed);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.LaneIndex = Index;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
MCContext &Ctx) {
ARMOperand *Op = new ARMOperand(k_VectorIndex);
@@ -1596,14 +2222,6 @@ public:
return Op;
}
- static ARMOperand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
- ARMOperand *Op = new ARMOperand(k_FPImmediate);
- Op->FPImm.Val = Val;
- Op->StartLoc = S;
- Op->EndLoc = S;
- return Op;
- }
-
static ARMOperand *CreateMem(unsigned BaseRegNum,
const MCConstantExpr *OffsetImm,
unsigned OffsetRegNum,
@@ -1668,10 +2286,6 @@ public:
void ARMOperand::print(raw_ostream &OS) const {
switch (Kind) {
- case k_FPImmediate:
- OS << "<fpimm " << getFPImm() << "(" << ARM_AM::getFPImmFloat(getFPImm())
- << ") >";
- break;
case k_CondCode:
OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
break;
@@ -1679,9 +2293,10 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << "<ccout " << getReg() << ">";
break;
case k_ITCondMask: {
- static char MaskStr[][6] = { "()", "(t)", "(e)", "(tt)", "(et)", "(te)",
- "(ee)", "(ttt)", "(ett)", "(tet)", "(eet)", "(tte)", "(ete)",
- "(tee)", "(eee)" };
+ static const char *MaskStr[] = {
+ "()", "(t)", "(e)", "(tt)", "(et)", "(te)", "(ee)", "(ttt)", "(ett)",
+ "(tet)", "(eet)", "(tte)", "(ete)", "(tee)", "(eee)"
+ };
assert((ITMask.Mask & 0xf) == ITMask.Mask);
OS << "<it-mask " << MaskStr[ITMask.Mask] << ">";
break;
@@ -1735,18 +2350,15 @@ void ARMOperand::print(raw_ostream &OS) const {
break;
case k_ShiftedRegister:
OS << "<so_reg_reg "
- << RegShiftedReg.SrcReg
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedReg.ShiftImm))
- << ", " << RegShiftedReg.ShiftReg << ", "
- << ARM_AM::getSORegOffset(RegShiftedReg.ShiftImm)
- << ">";
+ << RegShiftedReg.SrcReg << " "
+ << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy)
+ << " " << RegShiftedReg.ShiftReg << ">";
break;
case k_ShiftedImmediate:
OS << "<so_reg_imm "
- << RegShiftedImm.SrcReg
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedImm.ShiftImm))
- << ", " << ARM_AM::getSORegOffset(RegShiftedImm.ShiftImm)
- << ">";
+ << RegShiftedImm.SrcReg << " "
+ << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy)
+ << " #" << RegShiftedImm.ShiftImm << ">";
break;
case k_RotateImmediate:
OS << "<ror " << " #" << (RotImm.Imm * 8) << ">";
@@ -1770,6 +2382,18 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << ">";
break;
}
+ case k_VectorList:
+ OS << "<vector_list " << VectorList.Count << " * "
+ << VectorList.RegNum << ">";
+ break;
+ case k_VectorListAllLanes:
+ OS << "<vector_list(all lanes) " << VectorList.Count << " * "
+ << VectorList.RegNum << ">";
+ break;
+ case k_VectorListIndexed:
+ OS << "<vector_list(lane " << VectorList.LaneIndex << ") "
+ << VectorList.Count << " * " << VectorList.RegNum << ">";
+ break;
case k_Token:
OS << "'" << getToken() << "'";
break;
@@ -1788,7 +2412,9 @@ static unsigned MatchRegisterName(StringRef Name);
bool ARMAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
+ StartLoc = Parser.getTok().getLoc();
RegNo = tryParseRegister();
+ EndLoc = Parser.getTok().getLoc();
return (RegNo == (unsigned)-1);
}
@@ -1801,10 +2427,7 @@ int ARMAsmParser::tryParseRegister() {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) return -1;
- // FIXME: Validate register for the current architecture; we have to do
- // validation later, so maybe there is no need for this here.
- std::string upperCase = Tok.getString().str();
- std::string lowerCase = LowercaseString(upperCase);
+ std::string lowerCase = Tok.getString().lower();
unsigned RegNum = MatchRegisterName(lowerCase);
if (!RegNum) {
RegNum = StringSwitch<unsigned>(lowerCase)
@@ -1812,44 +2435,38 @@ int ARMAsmParser::tryParseRegister() {
.Case("r14", ARM::LR)
.Case("r15", ARM::PC)
.Case("ip", ARM::R12)
+ // Additional register name aliases for 'gas' compatibility.
+ .Case("a1", ARM::R0)
+ .Case("a2", ARM::R1)
+ .Case("a3", ARM::R2)
+ .Case("a4", ARM::R3)
+ .Case("v1", ARM::R4)
+ .Case("v2", ARM::R5)
+ .Case("v3", ARM::R6)
+ .Case("v4", ARM::R7)
+ .Case("v5", ARM::R8)
+ .Case("v6", ARM::R9)
+ .Case("v7", ARM::R10)
+ .Case("v8", ARM::R11)
+ .Case("sb", ARM::R9)
+ .Case("sl", ARM::R10)
+ .Case("fp", ARM::R11)
.Default(0);
}
- if (!RegNum) return -1;
+ if (!RegNum) {
+ // Check for aliases registered via .req. Canonicalize to lower case.
+ // That's more consistent since register names are case insensitive, and
+ // it's how the original entry was passed in from MC/MCParser/AsmParser.
+ StringMap<unsigned>::const_iterator Entry = RegisterReqs.find(lowerCase);
+ // If no match, return failure.
+ if (Entry == RegisterReqs.end())
+ return -1;
+ Parser.Lex(); // Eat identifier token.
+ return Entry->getValue();
+ }
Parser.Lex(); // Eat identifier token.
-#if 0
- // Also check for an index operand. This is only legal for vector registers,
- // but that'll get caught OK in operand matching, so we don't need to
- // explicitly filter everything else out here.
- if (Parser.getTok().is(AsmToken::LBrac)) {
- SMLoc SIdx = Parser.getTok().getLoc();
- Parser.Lex(); // Eat left bracket token.
-
- const MCExpr *ImmVal;
- SMLoc ExprLoc = Parser.getTok().getLoc();
- if (getParser().ParseExpression(ImmVal))
- return MatchOperand_ParseFail;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- TokError("immediate value expected for vector index");
- return MatchOperand_ParseFail;
- }
-
- SMLoc E = Parser.getTok().getLoc();
- if (Parser.getTok().isNot(AsmToken::RBrac)) {
- Error(E, "']' expected");
- return MatchOperand_ParseFail;
- }
-
- Parser.Lex(); // Eat right bracket token.
-
- Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
- SIdx, E,
- getContext()));
- }
-#endif
-
return RegNum;
}
@@ -1864,9 +2481,9 @@ int ARMAsmParser::tryParseShiftRegister(
const AsmToken &Tok = Parser.getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
- std::string upperCase = Tok.getString().str();
- std::string lowerCase = LowercaseString(upperCase);
+ std::string lowerCase = Tok.getString().lower();
ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+ .Case("asl", ARM_AM::lsl)
.Case("lsl", ARM_AM::lsl)
.Case("lsr", ARM_AM::lsr)
.Case("asr", ARM_AM::asr)
@@ -1895,7 +2512,8 @@ int ARMAsmParser::tryParseShiftRegister(
ShiftReg = SrcReg;
} else {
// Figure out if this is shifted by a constant or a register (for non-RRX).
- if (Parser.getTok().is(AsmToken::Hash)) {
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
Parser.Lex(); // Eat hash.
SMLoc ImmLoc = Parser.getTok().getLoc();
const MCExpr *ShiftExpr = 0;
@@ -1919,6 +2537,10 @@ int ARMAsmParser::tryParseShiftRegister(
Error(ImmLoc, "immediate shift value out of range");
return -1;
}
+ // shift by zero is a nop. Always send it through as lsl.
+ // ('as' compatibility)
+ if (Imm == 0)
+ ShiftTy = ARM_AM::lsl;
} else if (Parser.getTok().is(AsmToken::Identifier)) {
ShiftReg = tryParseRegister();
SMLoc L = Parser.getTok().getLoc();
@@ -1976,20 +2598,15 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat left bracket token.
const MCExpr *ImmVal;
- SMLoc ExprLoc = Parser.getTok().getLoc();
if (getParser().ParseExpression(ImmVal))
- return MatchOperand_ParseFail;
+ return true;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- TokError("immediate value expected for vector index");
- return MatchOperand_ParseFail;
- }
+ if (!MCE)
+ return TokError("immediate value expected for vector index");
SMLoc E = Parser.getTok().getLoc();
- if (Parser.getTok().isNot(AsmToken::RBrac)) {
- Error(E, "']' expected");
- return MatchOperand_ParseFail;
- }
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(E, "']' expected");
Parser.Lex(); // Eat right bracket token.
@@ -2008,7 +2625,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
// Use the same layout as the tablegen'erated register name matcher. Ugly,
// but efficient.
switch (Name.size()) {
- default: break;
+ default: return -1;
case 2:
if (Name[0] != CoprocOp)
return -1;
@@ -2025,7 +2642,6 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
case '8': return 8;
case '9': return 9;
}
- break;
case 3:
if (Name[0] != CoprocOp || Name[1] != '1')
return -1;
@@ -2038,10 +2654,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
case '4': return 14;
case '5': return 15;
}
- break;
}
-
- return -1;
}
/// parseITCondCode - Try to parse a condition code for an IT instruction.
@@ -2161,7 +2774,7 @@ static unsigned getNextRegister(unsigned Reg) {
if (!ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
return Reg + 1;
switch(Reg) {
- default: assert(0 && "Invalid GPR number!");
+ default: llvm_unreachable("Invalid GPR number!");
case ARM::R0: return ARM::R1; case ARM::R1: return ARM::R2;
case ARM::R2: return ARM::R3; case ARM::R3: return ARM::R4;
case ARM::R4: return ARM::R5; case ARM::R5: return ARM::R6;
@@ -2173,6 +2786,29 @@ static unsigned getNextRegister(unsigned Reg) {
}
}
+// Return the low-subreg of a given Q register.
+static unsigned getDRegFromQReg(unsigned QReg) {
+ switch (QReg) {
+ default: llvm_unreachable("expected a Q register!");
+ case ARM::Q0: return ARM::D0;
+ case ARM::Q1: return ARM::D2;
+ case ARM::Q2: return ARM::D4;
+ case ARM::Q3: return ARM::D6;
+ case ARM::Q4: return ARM::D8;
+ case ARM::Q5: return ARM::D10;
+ case ARM::Q6: return ARM::D12;
+ case ARM::Q7: return ARM::D14;
+ case ARM::Q8: return ARM::D16;
+ case ARM::Q9: return ARM::D18;
+ case ARM::Q10: return ARM::D20;
+ case ARM::Q11: return ARM::D22;
+ case ARM::Q12: return ARM::D24;
+ case ARM::Q13: return ARM::D26;
+ case ARM::Q14: return ARM::D28;
+ case ARM::Q15: return ARM::D30;
+ }
+}
+
/// Parse a register list.
bool ARMAsmParser::
parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
@@ -2188,7 +2824,17 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (Reg == -1)
return Error(RegLoc, "register expected");
- MCRegisterClass *RC;
+ // The reglist instructions have at most 16 registers, so reserve
+ // space for that many.
+ SmallVector<std::pair<unsigned, SMLoc>, 16> Registers;
+
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ Reg = getDRegFromQReg(Reg);
+ Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+ ++Reg;
+ }
+ const MCRegisterClass *RC;
if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
RC = &ARMMCRegisterClasses[ARM::GPRRegClassID];
else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg))
@@ -2198,10 +2844,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
else
return Error(RegLoc, "invalid register in register list");
- // The reglist instructions have at most 16 registers, so reserve
- // space for that many.
- SmallVector<std::pair<unsigned, SMLoc>, 16> Registers;
- // Store the first register.
+ // Store the register.
Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
// This starts immediately after the first register token in the list,
@@ -2210,11 +2853,14 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
while (Parser.getTok().is(AsmToken::Comma) ||
Parser.getTok().is(AsmToken::Minus)) {
if (Parser.getTok().is(AsmToken::Minus)) {
- Parser.Lex(); // Eat the comma.
+ Parser.Lex(); // Eat the minus.
SMLoc EndLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
if (EndReg == -1)
return Error(EndLoc, "register expected");
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
+ EndReg = getDRegFromQReg(EndReg) + 1;
// If the register is the same as the start reg, there's nothing
// more to do.
if (Reg == EndReg)
@@ -2236,15 +2882,31 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the comma.
RegLoc = Parser.getTok().getLoc();
int OldReg = Reg;
+ const AsmToken RegTok = Parser.getTok();
Reg = tryParseRegister();
if (Reg == -1)
return Error(RegLoc, "register expected");
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ bool isQReg = false;
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ Reg = getDRegFromQReg(Reg);
+ isQReg = true;
+ }
// The register must be in the same register class as the first.
if (!RC->contains(Reg))
return Error(RegLoc, "invalid register in register list");
// List must be monotonically increasing.
- if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg))
- return Error(RegLoc, "register list not in ascending order");
+ if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) {
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ Warning(RegLoc, "register list not in ascending order");
+ else
+ return Error(RegLoc, "register list not in ascending order");
+ }
+ if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
+ Warning(RegLoc, "duplicated register (" + RegTok.getString() +
+ ") in register list");
+ continue;
+ }
// VFP register lists must also be contiguous.
// It's OK to use the enumeration values directly here rather, as the
// VFP register classes have the enum sorted properly.
@@ -2252,6 +2914,8 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Reg != OldReg + 1)
return Error(RegLoc, "non-contiguous register range");
Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+ if (isQReg)
+ Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc));
}
SMLoc E = Parser.getTok().getLoc();
@@ -2259,10 +2923,319 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return Error(E, "'}' expected");
Parser.Lex(); // Eat '}' token.
+ // Push the register list operand.
Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
+
+ // The ARM system instruction variants for LDM/STM have a '^' token here.
+ if (Parser.getTok().is(AsmToken::Caret)) {
+ Operands.push_back(ARMOperand::CreateToken("^",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat '^' token.
+ }
+
return false;
}
+// Helper function to parse the lane index for vector lists.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
+ Index = 0; // Always return a defined index value.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ Parser.Lex(); // Eat the '['.
+ if (Parser.getTok().is(AsmToken::RBrac)) {
+ // "Dn[]" is the 'all lanes' syntax.
+ LaneKind = AllLanes;
+ Parser.Lex(); // Eat the ']'.
+ return MatchOperand_Success;
+ }
+
+ // There's an optional '#' token here. Normally there wouldn't be, but
+ // inline assemble puts one in, and it's friendly to accept that.
+ if (Parser.getTok().is(AsmToken::Hash))
+ Parser.Lex(); // Eat the '#'
+
+ const MCExpr *LaneIndex;
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(LaneIndex)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LaneIndex);
+ if (!CE) {
+ Error(Loc, "lane index must be empty or an integer");
+ return MatchOperand_ParseFail;
+ }
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(Parser.getTok().getLoc(), "']' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat the ']'.
+ int64_t Val = CE->getValue();
+
+ // FIXME: Make this range check context sensitive for .8, .16, .32.
+ if (Val < 0 || Val > 7) {
+ Error(Parser.getTok().getLoc(), "lane index out of range");
+ return MatchOperand_ParseFail;
+ }
+ Index = Val;
+ LaneKind = IndexedLane;
+ return MatchOperand_Success;
+ }
+ LaneKind = NoLanes;
+ return MatchOperand_Success;
+}
+
+// parse a vector register list
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ VectorLaneTy LaneKind;
+ unsigned LaneIndex;
+ SMLoc S = Parser.getTok().getLoc();
+ // As an extension (to match gas), support a plain D register or Q register
+ // (without encosing curly braces) as a single or double entry list,
+ // respectively.
+ if (Parser.getTok().is(AsmToken::Identifier)) {
+ int Reg = tryParseRegister();
+ if (Reg == -1)
+ return MatchOperand_NoMatch;
+ SMLoc E = Parser.getTok().getLoc();
+ if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ if (Res != MatchOperand_Success)
+ return Res;
+ switch (LaneKind) {
+ case NoLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E));
+ break;
+ case AllLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false,
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1,
+ LaneIndex,
+ false, S, E));
+ break;
+ }
+ return MatchOperand_Success;
+ }
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ Reg = getDRegFromQReg(Reg);
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ if (Res != MatchOperand_Success)
+ return Res;
+ switch (LaneKind) {
+ case NoLanes:
+ E = Parser.getTok().getLoc();
+ Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
+ &ARMMCRegisterClasses[ARM::DPairRegClassID]);
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
+ break;
+ case AllLanes:
+ E = Parser.getTok().getLoc();
+ Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
+ &ARMMCRegisterClasses[ARM::DPairRegClassID]);
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2,
+ LaneIndex,
+ false, S, E));
+ break;
+ }
+ return MatchOperand_Success;
+ }
+ Error(S, "vector register expected");
+ return MatchOperand_ParseFail;
+ }
+
+ if (Parser.getTok().isNot(AsmToken::LCurly))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat '{' token.
+ SMLoc RegLoc = Parser.getTok().getLoc();
+
+ int Reg = tryParseRegister();
+ if (Reg == -1) {
+ Error(RegLoc, "register expected");
+ return MatchOperand_ParseFail;
+ }
+ unsigned Count = 1;
+ int Spacing = 0;
+ unsigned FirstReg = Reg;
+ // The list is of D registers, but we also allow Q regs and just interpret
+ // them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ FirstReg = Reg = getDRegFromQReg(Reg);
+ Spacing = 1; // double-spacing requires explicit D registers, otherwise
+ // it's ambiguous with four-register single spaced.
+ ++Reg;
+ ++Count;
+ }
+ if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ while (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::Minus)) {
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ if (!Spacing)
+ Spacing = 1; // Register range implies a single spaced list.
+ else if (Spacing == 2) {
+ Error(Parser.getTok().getLoc(),
+ "sequential registers in double spaced list");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat the minus.
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ int EndReg = tryParseRegister();
+ if (EndReg == -1) {
+ Error(EndLoc, "register expected");
+ return MatchOperand_ParseFail;
+ }
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
+ EndReg = getDRegFromQReg(EndReg) + 1;
+ // If the register is the same as the start reg, there's nothing
+ // more to do.
+ if (Reg == EndReg)
+ continue;
+ // The register must be in the same register class as the first.
+ if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) {
+ Error(EndLoc, "invalid register in register list");
+ return MatchOperand_ParseFail;
+ }
+ // Ranges must go from low to high.
+ if (Reg > EndReg) {
+ Error(EndLoc, "bad range in register list");
+ return MatchOperand_ParseFail;
+ }
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ EndLoc = Parser.getTok().getLoc();
+
+ // Add all the registers in the range to the register list.
+ Count += EndReg - Reg;
+ Reg = EndReg;
+ continue;
+ }
+ Parser.Lex(); // Eat the comma.
+ RegLoc = Parser.getTok().getLoc();
+ int OldReg = Reg;
+ Reg = tryParseRegister();
+ if (Reg == -1) {
+ Error(RegLoc, "register expected");
+ return MatchOperand_ParseFail;
+ }
+ // vector register lists must be contiguous.
+ // It's OK to use the enumeration values directly here rather, as the
+ // VFP register classes have the enum sorted properly.
+ //
+ // The list is of D registers, but we also allow Q regs and just interpret
+ // them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ if (!Spacing)
+ Spacing = 1; // Register range implies a single spaced list.
+ else if (Spacing == 2) {
+ Error(RegLoc,
+ "invalid register in double-spaced list (must be 'D' register')");
+ return MatchOperand_ParseFail;
+ }
+ Reg = getDRegFromQReg(Reg);
+ if (Reg != OldReg + 1) {
+ Error(RegLoc, "non-contiguous register range");
+ return MatchOperand_ParseFail;
+ }
+ ++Reg;
+ Count += 2;
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ continue;
+ }
+ // Normal D register.
+ // Figure out the register spacing (single or double) of the list if
+ // we don't know it already.
+ if (!Spacing)
+ Spacing = 1 + (Reg == OldReg + 2);
+
+ // Just check that it's contiguous and keep going.
+ if (Reg != OldReg + Spacing) {
+ Error(RegLoc, "non-contiguous register range");
+ return MatchOperand_ParseFail;
+ }
+ ++Count;
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ SMLoc E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RCurly)) {
+ Error(E, "'}' expected");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat '}' token.
+
+ switch (LaneKind) {
+ case NoLanes:
+ // Two-register operands have been converted to the
+ // composite register classes.
+ if (Count == 2) {
+ const MCRegisterClass *RC = (Spacing == 1) ?
+ &ARMMCRegisterClasses[ARM::DPairRegClassID] :
+ &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
+ FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
+ }
+
+ Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count,
+ (Spacing == 2), S, E));
+ break;
+ case AllLanes:
+ // Two-register operands have been converted to the
+ // composite register classes.
+ if (Count == 2) {
+ const MCRegisterClass *RC = (Spacing == 1) ?
+ &ARMMCRegisterClasses[ARM::DPairRegClassID] :
+ &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
+ FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
+ }
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
+ (Spacing == 2),
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count,
+ LaneIndex,
+ (Spacing == 2),
+ S, E));
+ break;
+ }
+ return MatchOperand_Success;
+}
+
/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
@@ -2337,7 +3310,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (isMClass()) {
// See ARMv6-M 10.1.1
- unsigned FlagsVal = StringSwitch<unsigned>(Mask)
+ std::string Name = Mask.lower();
+ unsigned FlagsVal = StringSwitch<unsigned>(Name)
.Case("apsr", 0)
.Case("iapsr", 1)
.Case("eapsr", 2)
@@ -2353,14 +3327,14 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
.Case("faultmask", 19)
.Case("control", 20)
.Default(~0U);
-
+
if (FlagsVal == ~0U)
return MatchOperand_NoMatch;
if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19)
// basepri, basepri_max and faultmask only valid for V7m.
return MatchOperand_NoMatch;
-
+
Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
return MatchOperand_Success;
@@ -2369,7 +3343,7 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
size_t Start = 0, Next = Mask.find('_');
StringRef Flags = "";
- std::string SpecReg = LowercaseString(Mask.slice(Start, Next));
+ std::string SpecReg = Mask.slice(Start, Next).lower();
if (Next != StringRef::npos)
Flags = Mask.slice(Next+1, Mask.size());
@@ -2392,7 +3366,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
FlagsVal = 8; // No flag
}
} else if (SpecReg == "cpsr" || SpecReg == "spsr") {
- if (Flags == "all") // cpsr_all is an alias for cpsr_fc
+ // cpsr_all is an alias for cpsr_fc, as is plain cpsr.
+ if (Flags == "all" || Flags == "")
Flags = "fc";
for (int i = 0, e = Flags.size(); i != e; ++i) {
unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1))
@@ -2411,9 +3386,13 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
} else // No match for special register.
return MatchOperand_NoMatch;
- // Special register without flags are equivalent to "fc" flags.
- if (!FlagsVal)
- FlagsVal = 0x9;
+ // Special register without flags is NOT equivalent to "fc" flags.
+ // NOTE: This is a divergence from gas' behavior. Uncommenting the following
+ // two lines would enable gas compatibility at the expense of breaking
+ // round-tripping.
+ //
+ // if (!FlagsVal)
+ // FlagsVal = 0x9;
// Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1)
if (SpecReg == "spsr")
@@ -2433,8 +3412,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
return MatchOperand_ParseFail;
}
StringRef ShiftName = Tok.getString();
- std::string LowerOp = LowercaseString(Op);
- std::string UpperOp = UppercaseString(Op);
+ std::string LowerOp = Op.lower();
+ std::string UpperOp = Op.upper();
if (ShiftName != LowerOp && ShiftName != UpperOp) {
Error(Parser.getTok().getLoc(), Op + " operand expected.");
return MatchOperand_ParseFail;
@@ -2442,7 +3421,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
Parser.Lex(); // Eat shift type token.
// There must be a '#' and a shift amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2520,7 +3500,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the operator.
// A '#' and a shift amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2580,7 +3561,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the operator.
// A '#' and a rotate amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2617,7 +3599,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
// The bitfield descriptor is really two operands, the LSB and the width.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2649,7 +3632,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat hash token.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2743,7 +3727,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Tok.getLoc();
// Do immediates first, as we always parse those if we have a '#'.
- if (Parser.getTok().is(AsmToken::Hash)) {
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
Parser.Lex(); // Eat the '#'.
// Explicitly look for a '-', as we need to encode negative zero
// differently.
@@ -3082,18 +4067,80 @@ cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
}
((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1);
- ((ARMOperand*)Operands[4])->addRegOperands(Inst, 1);
- // If we have a three-operand form, use that, else the second source operand
- // is just the destination operand again.
- if (Operands.size() == 6)
- ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
- else
- Inst.addOperand(Inst.getOperand(0));
+ // If we have a three-operand form, make sure to set Rn to be the operand
+ // that isn't the same as Rd.
+ unsigned RegOp = 4;
+ if (Operands.size() == 6 &&
+ ((ARMOperand*)Operands[4])->getReg() ==
+ ((ARMOperand*)Operands[3])->getReg())
+ RegOp = 5;
+ ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1);
+ Inst.addOperand(Inst.getOperand(0));
((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2);
return true;
}
+bool ARMAsmParser::
+cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Vd
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Vn
+ ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+bool ARMAsmParser::
+cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Vd
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Vn
+ ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+ // Vm
+ ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+bool ARMAsmParser::
+cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Vn
+ ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+ // Vt
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+bool ARMAsmParser::
+cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ // Vn
+ ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+ // Vm
+ ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
+ // Vt
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+ // pred
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
/// Parse an ARM memory expression, return false if successful else return true
/// or an error. The first token must be a '[' when called.
bool ARMAsmParser::
@@ -3153,7 +4200,10 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
unsigned Align = 0;
switch (CE->getValue()) {
default:
- return Error(E, "alignment specifier must be 64, 128, or 256 bits");
+ return Error(E,
+ "alignment specifier must be 16, 32, 64, 128, or 256 bits");
+ case 16: Align = 2; break;
+ case 32: Align = 4; break;
case 64: Align = 8; break;
case 128: Align = 16; break;
case 256: Align = 32; break;
@@ -3182,9 +4232,13 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
// If we have a '#', it's an immediate offset, else assume it's a register
- // offset.
- if (Parser.getTok().is(AsmToken::Hash)) {
- Parser.Lex(); // Eat the '#'.
+ // offset. Be friendly and also accept a plain integer (without a leading
+ // hash) for gas compatibility.
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar) ||
+ Parser.getTok().is(AsmToken::Integer)) {
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ Parser.Lex(); // Eat the '#'.
E = Parser.getTok().getLoc();
bool isNegative = getParser().getTok().is(AsmToken::Minus);
@@ -3281,7 +4335,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
if (Tok.isNot(AsmToken::Identifier))
return true;
StringRef ShiftName = Tok.getString();
- if (ShiftName == "lsl" || ShiftName == "LSL")
+ if (ShiftName == "lsl" || ShiftName == "LSL" ||
+ ShiftName == "asl" || ShiftName == "ASL")
St = ARM_AM::lsl;
else if (ShiftName == "lsr" || ShiftName == "LSR")
St = ARM_AM::lsr;
@@ -3301,7 +4356,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
Loc = Parser.getTok().getLoc();
// A '#' and a shift amount.
const AsmToken &HashTok = Parser.getTok();
- if (HashTok.isNot(AsmToken::Hash))
+ if (HashTok.isNot(AsmToken::Hash) &&
+ HashTok.isNot(AsmToken::Dollar))
return Error(HashTok.getLoc(), "'#' expected");
Parser.Lex(); // Eat hash token.
@@ -3328,10 +4384,36 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
/// parseFPImm - A floating point immediate expression operand.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Anything that can accept a floating point constant as an operand
+ // needs to go through here, as the regular ParseExpression is
+ // integer only.
+ //
+ // This routine still creates a generic Immediate operand, containing
+ // a bitcast of the 64-bit floating point value. The various operands
+ // that accept floats can check whether the value is valid for them
+ // via the standard is*() predicates.
+
SMLoc S = Parser.getTok().getLoc();
- if (Parser.getTok().isNot(AsmToken::Hash))
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
+
+ // Disambiguate the VMOV forms that can accept an FP immediate.
+ // vmov.f32 <sreg>, #imm
+ // vmov.f64 <dreg>, #imm
+ // vmov.f32 <dreg>, #imm @ vector f32x2
+ // vmov.f32 <qreg>, #imm @ vector f32x4
+ //
+ // There are also the NEON VMOV instructions which expect an
+ // integer constant. Make sure we don't try to parse an FPImm
+ // for these:
+ // vmov.i{8|16|32|64} <dreg|qreg>, #imm
+ ARMOperand *TyOp = static_cast<ARMOperand*>(Operands[2]);
+ if (!TyOp->isToken() || (TyOp->getToken() != ".f32" &&
+ TyOp->getToken() != ".f64"))
+ return MatchOperand_NoMatch;
+
Parser.Lex(); // Eat the '#'.
// Handle negation, as that still comes through as a separate token.
@@ -3341,34 +4423,39 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex();
}
const AsmToken &Tok = Parser.getTok();
+ SMLoc Loc = Tok.getLoc();
if (Tok.is(AsmToken::Real)) {
- APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+ APFloat RealVal(APFloat::IEEEsingle, Tok.getString());
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
// If we had a '-' in front, toggle the sign bit.
- IntVal ^= (uint64_t)isNegative << 63;
- int Val = ARM_AM::getFP64Imm(APInt(64, IntVal));
+ IntVal ^= (uint64_t)isNegative << 31;
Parser.Lex(); // Eat the token.
- if (Val == -1) {
- TokError("floating point value out of range");
- return MatchOperand_ParseFail;
- }
- Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext()));
+ Operands.push_back(ARMOperand::CreateImm(
+ MCConstantExpr::Create(IntVal, getContext()),
+ S, Parser.getTok().getLoc()));
return MatchOperand_Success;
}
+ // Also handle plain integers. Instructions which allow floating point
+ // immediates also allow a raw encoded 8-bit value.
if (Tok.is(AsmToken::Integer)) {
int64_t Val = Tok.getIntVal();
Parser.Lex(); // Eat the token.
if (Val > 255 || Val < 0) {
- TokError("encoded floating point value out of range");
+ Error(Loc, "encoded floating point value out of range");
return MatchOperand_ParseFail;
}
- Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext()));
+ double RealVal = ARM_AM::getFPImmFloat(Val);
+ Val = APFloat(APFloat::IEEEdouble, RealVal).bitcastToAPInt().getZExtValue();
+ Operands.push_back(ARMOperand::CreateImm(
+ MCConstantExpr::Create(Val, getContext()), S,
+ Parser.getTok().getLoc()));
return MatchOperand_Success;
}
- TokError("invalid floating point immediate");
+ Error(Loc, "invalid floating point immediate");
return MatchOperand_ParseFail;
}
+
/// Parse a arm instruction operand. For now this parses the operand regardless
/// of the mnemonic.
bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -3391,7 +4478,6 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
Error(Parser.getTok().getLoc(), "unexpected token in operand");
return true;
case AsmToken::Identifier: {
- // If this is VMRS, check for the apsr_nzcv operand.
if (!tryParseRegisterWithWriteBack(Operands))
return false;
int Res = tryParseShiftRegister(Operands);
@@ -3399,17 +4485,21 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return false;
else if (Res == -1) // irrecoverable error
return true;
- if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") {
+ // If this is VMRS, check for the apsr_nzcv operand.
+ if (Mnemonic == "vmrs" &&
+ Parser.getTok().getString().equals_lower("apsr_nzcv")) {
S = Parser.getTok().getLoc();
Parser.Lex();
- Operands.push_back(ARMOperand::CreateToken("apsr_nzcv", S));
+ Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
return false;
}
// Fall though for the Identifier case that is not a register or a
// special name.
}
+ case AsmToken::LParen: // parenthesized expressions like (_strcmp-4)
case AsmToken::Integer: // things like 1f and 2b as a branch targets
+ case AsmToken::String: // quoted label names.
case AsmToken::Dot: { // . as a branch target
// This was not a register so parse other operands that start with an
// identifier (like labels) as expressions and create them as immediates.
@@ -3425,6 +4515,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return parseMemory(Operands);
case AsmToken::LCurly:
return parseRegisterList(Operands);
+ case AsmToken::Dollar:
case AsmToken::Hash: {
// #42 -> immediate.
// TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
@@ -3435,13 +4526,11 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
if (getParser().ParseExpression(ImmVal))
return true;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!CE) {
- Error(S, "constant expression expected");
- return MatchOperand_ParseFail;
+ if (CE) {
+ int32_t Val = CE->getValue();
+ if (isNegative && Val == 0)
+ ImmVal = MCConstantExpr::Create(INT32_MIN, getContext());
}
- int32_t Val = CE->getValue();
- if (isNegative && Val == 0)
- ImmVal = MCConstantExpr::Create(INT32_MIN, getContext());
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
return false;
@@ -3524,7 +4613,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" ||
Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" ||
Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" ||
- Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal")
+ Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
+ Mnemonic == "fmuls")
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
@@ -3565,7 +4655,11 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" ||
Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" ||
Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" ||
- Mnemonic == "vrsqrts" || Mnemonic == "srs" ||
+ Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" ||
+ Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" ||
+ Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" ||
+ Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" ||
+ Mnemonic == "vfms" || Mnemonic == "vfnms" ||
(Mnemonic == "movs" && isThumb()))) {
Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
CarrySetting = true;
@@ -3609,6 +4703,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
Mnemonic == "orr" || Mnemonic == "mvn" ||
Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" ||
+ Mnemonic == "vfm" || Mnemonic == "vfnm" ||
(!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" ||
Mnemonic == "mla" || Mnemonic == "smlal" ||
Mnemonic == "umlal" || Mnemonic == "umull"))) {
@@ -3677,7 +4772,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
static_cast<ARMOperand*>(Operands[4])->isReg() &&
static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP &&
static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
- (static_cast<ARMOperand*>(Operands[5])->isReg() ||
+ ((Mnemonic == "add" &&static_cast<ARMOperand*>(Operands[5])->isReg()) ||
static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4()))
return true;
// For Thumb2, add/sub immediate does not have a cc_out operand for the
@@ -3694,9 +4789,11 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
//
// If either register is a high reg, it's either one of the SP
// variants (handled above) or a 32-bit encoding, so we just
- // check against T3.
+ // check against T3. If the second register is the PC, this is an
+ // alternate form of ADR, which uses encoding T4, so check for that too.
if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
!isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) &&
+ static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC &&
static_cast<ARMOperand*>(Operands[5])->isT2SOImm())
return false;
// If both registers are low, we're in an IT block, and the immediate is
@@ -3726,6 +4823,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// remove the cc_out operand.
(!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
!isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[5])->getReg()) ||
!inITBlock() ||
(static_cast<ARMOperand*>(Operands[3])->getReg() !=
static_cast<ARMOperand*>(Operands[5])->getReg() &&
@@ -3733,6 +4831,20 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
static_cast<ARMOperand*>(Operands[4])->getReg())))
return true;
+ // Also check the 'mul' syntax variant that doesn't specify an explicit
+ // destination register.
+ if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 5 &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ // If the registers aren't low regs or the cc_out operand is zero
+ // outside of an IT block, we have to use the 32-bit encoding, so
+ // remove the cc_out operand.
+ (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+ !inITBlock()))
+ return true;
+
// Register-register 'add/sub' for thumb does not have a cc_out operand
@@ -3744,15 +4856,52 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
(Operands.size() == 5 || Operands.size() == 6) &&
static_cast<ARMOperand*>(Operands[3])->isReg() &&
static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP &&
- static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ (static_cast<ARMOperand*>(Operands[4])->isImm() ||
+ (Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[5])->isImm())))
return true;
return false;
}
+static bool isDataTypeToken(StringRef Tok) {
+ return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" ||
+ Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" ||
+ Tok == ".u8" || Tok == ".u16" || Tok == ".u32" || Tok == ".u64" ||
+ Tok == ".s8" || Tok == ".s16" || Tok == ".s32" || Tok == ".s64" ||
+ Tok == ".p8" || Tok == ".p16" || Tok == ".f32" || Tok == ".f64" ||
+ Tok == ".f" || Tok == ".d";
+}
+
+// FIXME: This bit should probably be handled via an explicit match class
+// in the .td files that matches the suffix instead of having it be
+// a literal string token the way it is now.
+static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
+ return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
+}
+
+static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
/// Parse an arm instruction mnemonic followed by its operands.
bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Apply mnemonic aliases before doing anything else, as the destination
+ // mnemnonic may include suffices and we want to handle them normally.
+ // The generic tblgen'erated code does this later, at the start of
+ // MatchInstructionImpl(), but that's too late for aliases that include
+ // any sort of suffix.
+ unsigned AvailableFeatures = getAvailableFeatures();
+ applyMnemonicAliases(Name, AvailableFeatures);
+
+ // First check for the ARM-specific .req directive.
+ if (Parser.getTok().is(AsmToken::Identifier) &&
+ Parser.getTok().getIdentifier() == ".req") {
+ parseDirectiveReq(Name, NameLoc);
+ // We always return 'error' for this, as we're done with this
+ // statement and don't need to match the 'instruction."
+ return true;
+ }
+
// Create the leading tokens for the mnemonic, split by '.' characters.
size_t Start = 0, Next = Name.find('.');
StringRef Mnemonic = Name.slice(Start, Next);
@@ -3854,9 +5003,12 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
Next = Name.find('.', Start + 1);
StringRef ExtraToken = Name.slice(Start, Next);
- // For now, we're only parsing Thumb1 (for the most part), so
- // just ignore ".n" qualifiers. We'll use them to restrict
- // matching when we do Thumb2.
+ // Some NEON instructions have an optional datatype suffix that is
+ // completely ignored. Check for that.
+ if (isDataTypeToken(ExtraToken) &&
+ doesIgnoreDataTypeSuffix(Mnemonic, ExtraToken))
+ continue;
+
if (ExtraToken != ".n") {
SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start);
Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc));
@@ -3941,12 +5093,21 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
}
}
// Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the
- // end. Convert it to a token here.
+ // end. Convert it to a token here. Take care not to convert those
+ // that should hit the Thumb2 encoding.
if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
static_cast<ARMOperand*>(Operands[5])->isImm()) {
ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
- if (CE && CE->getValue() == 0) {
+ if (CE && CE->getValue() == 0 &&
+ (isThumbOne() ||
+ // The cc_out operand matches the IT block.
+ ((inITBlock() != CarrySetting) &&
+ // Neither register operand is a high register.
+ (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
+ isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){
Operands.erase(Operands.begin() + 5);
Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
delete Op;
@@ -3990,9 +5151,9 @@ static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) {
// the ARMInsts array) instead. Getting that here requires awkward
// API changes, though. Better way?
namespace llvm {
-extern MCInstrDesc ARMInsts[];
+extern const MCInstrDesc ARMInsts[];
}
-static MCInstrDesc &getInstDesc(unsigned Opcode) {
+static const MCInstrDesc &getInstDesc(unsigned Opcode) {
return ARMInsts[Opcode];
}
@@ -4000,13 +5161,14 @@ static MCInstrDesc &getInstDesc(unsigned Opcode) {
bool ARMAsmParser::
validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
SMLoc Loc = Operands[0]->getStartLoc();
// Check the IT block state first.
- // NOTE: In Thumb mode, the BKPT instruction has the interesting property of
- // being allowed in IT blocks, but not being predicable. It just always
+ // NOTE: BKPT instruction has the interesting property of being
+ // allowed in IT blocks, but not being predicable. It just always
// executes.
- if (inITBlock() && Inst.getOpcode() != ARM::tBKPT) {
+ if (inITBlock() && Inst.getOpcode() != ARM::tBKPT &&
+ Inst.getOpcode() != ARM::BKPT) {
unsigned bit = 1;
if (ITState.FirstCond)
ITState.FirstCond = false;
@@ -4115,16 +5277,21 @@ validateInstruction(MCInst &Inst,
"in register list");
break;
}
+ // Like for ldm/stm, push and pop have hi-reg handling version in Thumb2,
+ // so only issue a diagnostic for thumb1. The instructions will be
+ // switched to the t2 encodings in processInstruction() if necessary.
case ARM::tPOP: {
bool listContainsBase;
- if (checkLowRegisterList(Inst, 3, 0, ARM::PC, listContainsBase))
+ if (checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase) &&
+ !isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"registers must be in range r0-r7 or pc");
break;
}
case ARM::tPUSH: {
bool listContainsBase;
- if (checkLowRegisterList(Inst, 3, 0, ARM::LR, listContainsBase))
+ if (checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase) &&
+ !isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"registers must be in range r0-r7 or lr");
break;
@@ -4141,10 +5308,1553 @@ validateInstruction(MCInst &Inst,
return false;
}
-void ARMAsmParser::
+static unsigned getRealVSTOpcode(unsigned Opc, unsigned &Spacing) {
+ switch(Opc) {
+ default: llvm_unreachable("unexpected opcode!");
+ // VST1LN
+ case ARM::VST1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD;
+ case ARM::VST1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD;
+ case ARM::VST1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD;
+ case ARM::VST1LNdWB_register_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD;
+ case ARM::VST1LNdWB_register_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD;
+ case ARM::VST1LNdWB_register_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD;
+ case ARM::VST1LNdAsm_8: Spacing = 1; return ARM::VST1LNd8;
+ case ARM::VST1LNdAsm_16: Spacing = 1; return ARM::VST1LNd16;
+ case ARM::VST1LNdAsm_32: Spacing = 1; return ARM::VST1LNd32;
+
+ // VST2LN
+ case ARM::VST2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD;
+ case ARM::VST2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD;
+ case ARM::VST2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD;
+ case ARM::VST2LNqWB_fixed_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD;
+ case ARM::VST2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD;
+
+ case ARM::VST2LNdWB_register_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD;
+ case ARM::VST2LNdWB_register_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD;
+ case ARM::VST2LNdWB_register_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD;
+ case ARM::VST2LNqWB_register_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD;
+ case ARM::VST2LNqWB_register_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD;
+
+ case ARM::VST2LNdAsm_8: Spacing = 1; return ARM::VST2LNd8;
+ case ARM::VST2LNdAsm_16: Spacing = 1; return ARM::VST2LNd16;
+ case ARM::VST2LNdAsm_32: Spacing = 1; return ARM::VST2LNd32;
+ case ARM::VST2LNqAsm_16: Spacing = 2; return ARM::VST2LNq16;
+ case ARM::VST2LNqAsm_32: Spacing = 2; return ARM::VST2LNq32;
+
+ // VST3LN
+ case ARM::VST3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD;
+ case ARM::VST3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD;
+ case ARM::VST3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD;
+ case ARM::VST3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNq16_UPD;
+ case ARM::VST3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD;
+ case ARM::VST3LNdWB_register_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD;
+ case ARM::VST3LNdWB_register_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD;
+ case ARM::VST3LNdWB_register_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD;
+ case ARM::VST3LNqWB_register_Asm_16: Spacing = 2; return ARM::VST3LNq16_UPD;
+ case ARM::VST3LNqWB_register_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD;
+ case ARM::VST3LNdAsm_8: Spacing = 1; return ARM::VST3LNd8;
+ case ARM::VST3LNdAsm_16: Spacing = 1; return ARM::VST3LNd16;
+ case ARM::VST3LNdAsm_32: Spacing = 1; return ARM::VST3LNd32;
+ case ARM::VST3LNqAsm_16: Spacing = 2; return ARM::VST3LNq16;
+ case ARM::VST3LNqAsm_32: Spacing = 2; return ARM::VST3LNq32;
+
+ // VST3
+ case ARM::VST3dWB_fixed_Asm_8: Spacing = 1; return ARM::VST3d8_UPD;
+ case ARM::VST3dWB_fixed_Asm_16: Spacing = 1; return ARM::VST3d16_UPD;
+ case ARM::VST3dWB_fixed_Asm_32: Spacing = 1; return ARM::VST3d32_UPD;
+ case ARM::VST3qWB_fixed_Asm_8: Spacing = 2; return ARM::VST3q8_UPD;
+ case ARM::VST3qWB_fixed_Asm_16: Spacing = 2; return ARM::VST3q16_UPD;
+ case ARM::VST3qWB_fixed_Asm_32: Spacing = 2; return ARM::VST3q32_UPD;
+ case ARM::VST3dWB_register_Asm_8: Spacing = 1; return ARM::VST3d8_UPD;
+ case ARM::VST3dWB_register_Asm_16: Spacing = 1; return ARM::VST3d16_UPD;
+ case ARM::VST3dWB_register_Asm_32: Spacing = 1; return ARM::VST3d32_UPD;
+ case ARM::VST3qWB_register_Asm_8: Spacing = 2; return ARM::VST3q8_UPD;
+ case ARM::VST3qWB_register_Asm_16: Spacing = 2; return ARM::VST3q16_UPD;
+ case ARM::VST3qWB_register_Asm_32: Spacing = 2; return ARM::VST3q32_UPD;
+ case ARM::VST3dAsm_8: Spacing = 1; return ARM::VST3d8;
+ case ARM::VST3dAsm_16: Spacing = 1; return ARM::VST3d16;
+ case ARM::VST3dAsm_32: Spacing = 1; return ARM::VST3d32;
+ case ARM::VST3qAsm_8: Spacing = 2; return ARM::VST3q8;
+ case ARM::VST3qAsm_16: Spacing = 2; return ARM::VST3q16;
+ case ARM::VST3qAsm_32: Spacing = 2; return ARM::VST3q32;
+
+ // VST4LN
+ case ARM::VST4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD;
+ case ARM::VST4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD;
+ case ARM::VST4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD;
+ case ARM::VST4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNq16_UPD;
+ case ARM::VST4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD;
+ case ARM::VST4LNdWB_register_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD;
+ case ARM::VST4LNdWB_register_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD;
+ case ARM::VST4LNdWB_register_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD;
+ case ARM::VST4LNqWB_register_Asm_16: Spacing = 2; return ARM::VST4LNq16_UPD;
+ case ARM::VST4LNqWB_register_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD;
+ case ARM::VST4LNdAsm_8: Spacing = 1; return ARM::VST4LNd8;
+ case ARM::VST4LNdAsm_16: Spacing = 1; return ARM::VST4LNd16;
+ case ARM::VST4LNdAsm_32: Spacing = 1; return ARM::VST4LNd32;
+ case ARM::VST4LNqAsm_16: Spacing = 2; return ARM::VST4LNq16;
+ case ARM::VST4LNqAsm_32: Spacing = 2; return ARM::VST4LNq32;
+
+ // VST4
+ case ARM::VST4dWB_fixed_Asm_8: Spacing = 1; return ARM::VST4d8_UPD;
+ case ARM::VST4dWB_fixed_Asm_16: Spacing = 1; return ARM::VST4d16_UPD;
+ case ARM::VST4dWB_fixed_Asm_32: Spacing = 1; return ARM::VST4d32_UPD;
+ case ARM::VST4qWB_fixed_Asm_8: Spacing = 2; return ARM::VST4q8_UPD;
+ case ARM::VST4qWB_fixed_Asm_16: Spacing = 2; return ARM::VST4q16_UPD;
+ case ARM::VST4qWB_fixed_Asm_32: Spacing = 2; return ARM::VST4q32_UPD;
+ case ARM::VST4dWB_register_Asm_8: Spacing = 1; return ARM::VST4d8_UPD;
+ case ARM::VST4dWB_register_Asm_16: Spacing = 1; return ARM::VST4d16_UPD;
+ case ARM::VST4dWB_register_Asm_32: Spacing = 1; return ARM::VST4d32_UPD;
+ case ARM::VST4qWB_register_Asm_8: Spacing = 2; return ARM::VST4q8_UPD;
+ case ARM::VST4qWB_register_Asm_16: Spacing = 2; return ARM::VST4q16_UPD;
+ case ARM::VST4qWB_register_Asm_32: Spacing = 2; return ARM::VST4q32_UPD;
+ case ARM::VST4dAsm_8: Spacing = 1; return ARM::VST4d8;
+ case ARM::VST4dAsm_16: Spacing = 1; return ARM::VST4d16;
+ case ARM::VST4dAsm_32: Spacing = 1; return ARM::VST4d32;
+ case ARM::VST4qAsm_8: Spacing = 2; return ARM::VST4q8;
+ case ARM::VST4qAsm_16: Spacing = 2; return ARM::VST4q16;
+ case ARM::VST4qAsm_32: Spacing = 2; return ARM::VST4q32;
+ }
+}
+
+static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
+ switch(Opc) {
+ default: llvm_unreachable("unexpected opcode!");
+ // VLD1LN
+ case ARM::VLD1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD;
+ case ARM::VLD1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD;
+ case ARM::VLD1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD;
+ case ARM::VLD1LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD;
+ case ARM::VLD1LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD;
+ case ARM::VLD1LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD;
+ case ARM::VLD1LNdAsm_8: Spacing = 1; return ARM::VLD1LNd8;
+ case ARM::VLD1LNdAsm_16: Spacing = 1; return ARM::VLD1LNd16;
+ case ARM::VLD1LNdAsm_32: Spacing = 1; return ARM::VLD1LNd32;
+
+ // VLD2LN
+ case ARM::VLD2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD;
+ case ARM::VLD2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD;
+ case ARM::VLD2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD;
+ case ARM::VLD2LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNq16_UPD;
+ case ARM::VLD2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD;
+ case ARM::VLD2LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD;
+ case ARM::VLD2LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD;
+ case ARM::VLD2LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD;
+ case ARM::VLD2LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD2LNq16_UPD;
+ case ARM::VLD2LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD;
+ case ARM::VLD2LNdAsm_8: Spacing = 1; return ARM::VLD2LNd8;
+ case ARM::VLD2LNdAsm_16: Spacing = 1; return ARM::VLD2LNd16;
+ case ARM::VLD2LNdAsm_32: Spacing = 1; return ARM::VLD2LNd32;
+ case ARM::VLD2LNqAsm_16: Spacing = 2; return ARM::VLD2LNq16;
+ case ARM::VLD2LNqAsm_32: Spacing = 2; return ARM::VLD2LNq32;
+
+ // VLD3DUP
+ case ARM::VLD3DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD;
+ case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD;
+ case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD;
+ case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD;
+ case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPq16_UPD;
+ case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD;
+ case ARM::VLD3DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD;
+ case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD;
+ case ARM::VLD3DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD;
+ case ARM::VLD3DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD3DUPq8_UPD;
+ case ARM::VLD3DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD;
+ case ARM::VLD3DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD;
+ case ARM::VLD3DUPdAsm_8: Spacing = 1; return ARM::VLD3DUPd8;
+ case ARM::VLD3DUPdAsm_16: Spacing = 1; return ARM::VLD3DUPd16;
+ case ARM::VLD3DUPdAsm_32: Spacing = 1; return ARM::VLD3DUPd32;
+ case ARM::VLD3DUPqAsm_8: Spacing = 2; return ARM::VLD3DUPq8;
+ case ARM::VLD3DUPqAsm_16: Spacing = 2; return ARM::VLD3DUPq16;
+ case ARM::VLD3DUPqAsm_32: Spacing = 2; return ARM::VLD3DUPq32;
+
+ // VLD3LN
+ case ARM::VLD3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD;
+ case ARM::VLD3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD;
+ case ARM::VLD3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD;
+ case ARM::VLD3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNq16_UPD;
+ case ARM::VLD3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD;
+ case ARM::VLD3LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD;
+ case ARM::VLD3LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD;
+ case ARM::VLD3LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD;
+ case ARM::VLD3LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD3LNq16_UPD;
+ case ARM::VLD3LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD;
+ case ARM::VLD3LNdAsm_8: Spacing = 1; return ARM::VLD3LNd8;
+ case ARM::VLD3LNdAsm_16: Spacing = 1; return ARM::VLD3LNd16;
+ case ARM::VLD3LNdAsm_32: Spacing = 1; return ARM::VLD3LNd32;
+ case ARM::VLD3LNqAsm_16: Spacing = 2; return ARM::VLD3LNq16;
+ case ARM::VLD3LNqAsm_32: Spacing = 2; return ARM::VLD3LNq32;
+
+ // VLD3
+ case ARM::VLD3dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD;
+ case ARM::VLD3dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD;
+ case ARM::VLD3dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD;
+ case ARM::VLD3qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD;
+ case ARM::VLD3qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD;
+ case ARM::VLD3qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD;
+ case ARM::VLD3dWB_register_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD;
+ case ARM::VLD3dWB_register_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD;
+ case ARM::VLD3dWB_register_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD;
+ case ARM::VLD3qWB_register_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD;
+ case ARM::VLD3qWB_register_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD;
+ case ARM::VLD3qWB_register_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD;
+ case ARM::VLD3dAsm_8: Spacing = 1; return ARM::VLD3d8;
+ case ARM::VLD3dAsm_16: Spacing = 1; return ARM::VLD3d16;
+ case ARM::VLD3dAsm_32: Spacing = 1; return ARM::VLD3d32;
+ case ARM::VLD3qAsm_8: Spacing = 2; return ARM::VLD3q8;
+ case ARM::VLD3qAsm_16: Spacing = 2; return ARM::VLD3q16;
+ case ARM::VLD3qAsm_32: Spacing = 2; return ARM::VLD3q32;
+
+ // VLD4LN
+ case ARM::VLD4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD;
+ case ARM::VLD4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD;
+ case ARM::VLD4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD;
+ case ARM::VLD4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNq16_UPD;
+ case ARM::VLD4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD;
+ case ARM::VLD4LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD;
+ case ARM::VLD4LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD;
+ case ARM::VLD4LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD;
+ case ARM::VLD4LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD4LNq16_UPD;
+ case ARM::VLD4LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD;
+ case ARM::VLD4LNdAsm_8: Spacing = 1; return ARM::VLD4LNd8;
+ case ARM::VLD4LNdAsm_16: Spacing = 1; return ARM::VLD4LNd16;
+ case ARM::VLD4LNdAsm_32: Spacing = 1; return ARM::VLD4LNd32;
+ case ARM::VLD4LNqAsm_16: Spacing = 2; return ARM::VLD4LNq16;
+ case ARM::VLD4LNqAsm_32: Spacing = 2; return ARM::VLD4LNq32;
+
+ // VLD4DUP
+ case ARM::VLD4DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD;
+ case ARM::VLD4DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD;
+ case ARM::VLD4DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD;
+ case ARM::VLD4DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPq8_UPD;
+ case ARM::VLD4DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPq16_UPD;
+ case ARM::VLD4DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD;
+ case ARM::VLD4DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD;
+ case ARM::VLD4DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD;
+ case ARM::VLD4DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD;
+ case ARM::VLD4DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD4DUPq8_UPD;
+ case ARM::VLD4DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD4DUPq16_UPD;
+ case ARM::VLD4DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD;
+ case ARM::VLD4DUPdAsm_8: Spacing = 1; return ARM::VLD4DUPd8;
+ case ARM::VLD4DUPdAsm_16: Spacing = 1; return ARM::VLD4DUPd16;
+ case ARM::VLD4DUPdAsm_32: Spacing = 1; return ARM::VLD4DUPd32;
+ case ARM::VLD4DUPqAsm_8: Spacing = 2; return ARM::VLD4DUPq8;
+ case ARM::VLD4DUPqAsm_16: Spacing = 2; return ARM::VLD4DUPq16;
+ case ARM::VLD4DUPqAsm_32: Spacing = 2; return ARM::VLD4DUPq32;
+
+ // VLD4
+ case ARM::VLD4dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD;
+ case ARM::VLD4dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD;
+ case ARM::VLD4dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD;
+ case ARM::VLD4qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD;
+ case ARM::VLD4qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD;
+ case ARM::VLD4qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD;
+ case ARM::VLD4dWB_register_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD;
+ case ARM::VLD4dWB_register_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD;
+ case ARM::VLD4dWB_register_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD;
+ case ARM::VLD4qWB_register_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD;
+ case ARM::VLD4qWB_register_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD;
+ case ARM::VLD4qWB_register_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD;
+ case ARM::VLD4dAsm_8: Spacing = 1; return ARM::VLD4d8;
+ case ARM::VLD4dAsm_16: Spacing = 1; return ARM::VLD4d16;
+ case ARM::VLD4dAsm_32: Spacing = 1; return ARM::VLD4d32;
+ case ARM::VLD4qAsm_8: Spacing = 2; return ARM::VLD4q8;
+ case ARM::VLD4qAsm_16: Spacing = 2; return ARM::VLD4q16;
+ case ARM::VLD4qAsm_32: Spacing = 2; return ARM::VLD4q32;
+ }
+}
+
+bool ARMAsmParser::
processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
switch (Inst.getOpcode()) {
+ // Aliases for alternate PC+imm syntax of LDR instructions.
+ case ARM::t2LDRpcrel:
+ Inst.setOpcode(ARM::t2LDRpci);
+ return true;
+ case ARM::t2LDRBpcrel:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ return true;
+ case ARM::t2LDRHpcrel:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ return true;
+ case ARM::t2LDRSBpcrel:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ return true;
+ case ARM::t2LDRSHpcrel:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ return true;
+ // Handle NEON VST complex aliases.
+ case ARM::VST1LNdWB_register_Asm_8:
+ case ARM::VST1LNdWB_register_Asm_16:
+ case ARM::VST1LNdWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdWB_register_Asm_8:
+ case ARM::VST2LNdWB_register_Asm_16:
+ case ARM::VST2LNdWB_register_Asm_32:
+ case ARM::VST2LNqWB_register_Asm_16:
+ case ARM::VST2LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3LNdWB_register_Asm_8:
+ case ARM::VST3LNdWB_register_Asm_16:
+ case ARM::VST3LNdWB_register_Asm_32:
+ case ARM::VST3LNqWB_register_Asm_16:
+ case ARM::VST3LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4LNdWB_register_Asm_8:
+ case ARM::VST4LNdWB_register_Asm_16:
+ case ARM::VST4LNdWB_register_Asm_32:
+ case ARM::VST4LNqWB_register_Asm_16:
+ case ARM::VST4LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST1LNdWB_fixed_Asm_8:
+ case ARM::VST1LNdWB_fixed_Asm_16:
+ case ARM::VST1LNdWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdWB_fixed_Asm_8:
+ case ARM::VST2LNdWB_fixed_Asm_16:
+ case ARM::VST2LNdWB_fixed_Asm_32:
+ case ARM::VST2LNqWB_fixed_Asm_16:
+ case ARM::VST2LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3LNdWB_fixed_Asm_8:
+ case ARM::VST3LNdWB_fixed_Asm_16:
+ case ARM::VST3LNdWB_fixed_Asm_32:
+ case ARM::VST3LNqWB_fixed_Asm_16:
+ case ARM::VST3LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4LNdWB_fixed_Asm_8:
+ case ARM::VST4LNdWB_fixed_Asm_16:
+ case ARM::VST4LNdWB_fixed_Asm_32:
+ case ARM::VST4LNqWB_fixed_Asm_16:
+ case ARM::VST4LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST1LNdAsm_8:
+ case ARM::VST1LNdAsm_16:
+ case ARM::VST1LNdAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdAsm_8:
+ case ARM::VST2LNdAsm_16:
+ case ARM::VST2LNdAsm_32:
+ case ARM::VST2LNqAsm_16:
+ case ARM::VST2LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3LNdAsm_8:
+ case ARM::VST3LNdAsm_16:
+ case ARM::VST3LNdAsm_32:
+ case ARM::VST3LNqAsm_16:
+ case ARM::VST3LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4LNdAsm_8:
+ case ARM::VST4LNdAsm_16:
+ case ARM::VST4LNdAsm_32:
+ case ARM::VST4LNqAsm_16:
+ case ARM::VST4LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // Handle NEON VLD complex aliases.
+ case ARM::VLD1LNdWB_register_Asm_8:
+ case ARM::VLD1LNdWB_register_Asm_16:
+ case ARM::VLD1LNdWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdWB_register_Asm_8:
+ case ARM::VLD2LNdWB_register_Asm_16:
+ case ARM::VLD2LNdWB_register_Asm_32:
+ case ARM::VLD2LNqWB_register_Asm_16:
+ case ARM::VLD2LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3LNdWB_register_Asm_8:
+ case ARM::VLD3LNdWB_register_Asm_16:
+ case ARM::VLD3LNdWB_register_Asm_32:
+ case ARM::VLD3LNqWB_register_Asm_16:
+ case ARM::VLD3LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4LNdWB_register_Asm_8:
+ case ARM::VLD4LNdWB_register_Asm_16:
+ case ARM::VLD4LNdWB_register_Asm_32:
+ case ARM::VLD4LNqWB_register_Asm_16:
+ case ARM::VLD4LNqWB_register_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD1LNdWB_fixed_Asm_8:
+ case ARM::VLD1LNdWB_fixed_Asm_16:
+ case ARM::VLD1LNdWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdWB_fixed_Asm_8:
+ case ARM::VLD2LNdWB_fixed_Asm_16:
+ case ARM::VLD2LNdWB_fixed_Asm_32:
+ case ARM::VLD2LNqWB_fixed_Asm_16:
+ case ARM::VLD2LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3LNdWB_fixed_Asm_8:
+ case ARM::VLD3LNdWB_fixed_Asm_16:
+ case ARM::VLD3LNdWB_fixed_Asm_32:
+ case ARM::VLD3LNqWB_fixed_Asm_16:
+ case ARM::VLD3LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4LNdWB_fixed_Asm_8:
+ case ARM::VLD4LNdWB_fixed_Asm_16:
+ case ARM::VLD4LNdWB_fixed_Asm_32:
+ case ARM::VLD4LNqWB_fixed_Asm_16:
+ case ARM::VLD4LNqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD1LNdAsm_8:
+ case ARM::VLD1LNdAsm_16:
+ case ARM::VLD1LNdAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdAsm_8:
+ case ARM::VLD2LNdAsm_16:
+ case ARM::VLD2LNdAsm_32:
+ case ARM::VLD2LNqAsm_16:
+ case ARM::VLD2LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3LNdAsm_8:
+ case ARM::VLD3LNdAsm_16:
+ case ARM::VLD3LNdAsm_32:
+ case ARM::VLD3LNqAsm_16:
+ case ARM::VLD3LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4LNdAsm_8:
+ case ARM::VLD4LNdAsm_16:
+ case ARM::VLD4LNdAsm_32:
+ case ARM::VLD4LNqAsm_16:
+ case ARM::VLD4LNqAsm_32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD3DUP single 3-element structure to all lanes instructions.
+ case ARM::VLD3DUPdAsm_8:
+ case ARM::VLD3DUPdAsm_16:
+ case ARM::VLD3DUPdAsm_32:
+ case ARM::VLD3DUPqAsm_8:
+ case ARM::VLD3DUPqAsm_16:
+ case ARM::VLD3DUPqAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3DUPdWB_fixed_Asm_8:
+ case ARM::VLD3DUPdWB_fixed_Asm_16:
+ case ARM::VLD3DUPdWB_fixed_Asm_32:
+ case ARM::VLD3DUPqWB_fixed_Asm_8:
+ case ARM::VLD3DUPqWB_fixed_Asm_16:
+ case ARM::VLD3DUPqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3DUPdWB_register_Asm_8:
+ case ARM::VLD3DUPdWB_register_Asm_16:
+ case ARM::VLD3DUPdWB_register_Asm_32:
+ case ARM::VLD3DUPqWB_register_Asm_8:
+ case ARM::VLD3DUPqWB_register_Asm_16:
+ case ARM::VLD3DUPqWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD3 multiple 3-element structure instructions.
+ case ARM::VLD3dAsm_8:
+ case ARM::VLD3dAsm_16:
+ case ARM::VLD3dAsm_32:
+ case ARM::VLD3qAsm_8:
+ case ARM::VLD3qAsm_16:
+ case ARM::VLD3qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3dWB_fixed_Asm_8:
+ case ARM::VLD3dWB_fixed_Asm_16:
+ case ARM::VLD3dWB_fixed_Asm_32:
+ case ARM::VLD3qWB_fixed_Asm_8:
+ case ARM::VLD3qWB_fixed_Asm_16:
+ case ARM::VLD3qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD3dWB_register_Asm_8:
+ case ARM::VLD3dWB_register_Asm_16:
+ case ARM::VLD3dWB_register_Asm_32:
+ case ARM::VLD3qWB_register_Asm_8:
+ case ARM::VLD3qWB_register_Asm_16:
+ case ARM::VLD3qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD4DUP single 3-element structure to all lanes instructions.
+ case ARM::VLD4DUPdAsm_8:
+ case ARM::VLD4DUPdAsm_16:
+ case ARM::VLD4DUPdAsm_32:
+ case ARM::VLD4DUPqAsm_8:
+ case ARM::VLD4DUPqAsm_16:
+ case ARM::VLD4DUPqAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4DUPdWB_fixed_Asm_8:
+ case ARM::VLD4DUPdWB_fixed_Asm_16:
+ case ARM::VLD4DUPdWB_fixed_Asm_32:
+ case ARM::VLD4DUPqWB_fixed_Asm_8:
+ case ARM::VLD4DUPqWB_fixed_Asm_16:
+ case ARM::VLD4DUPqWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4DUPdWB_register_Asm_8:
+ case ARM::VLD4DUPdWB_register_Asm_16:
+ case ARM::VLD4DUPdWB_register_Asm_32:
+ case ARM::VLD4DUPqWB_register_Asm_8:
+ case ARM::VLD4DUPqWB_register_Asm_16:
+ case ARM::VLD4DUPqWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VLD4 multiple 4-element structure instructions.
+ case ARM::VLD4dAsm_8:
+ case ARM::VLD4dAsm_16:
+ case ARM::VLD4dAsm_32:
+ case ARM::VLD4qAsm_8:
+ case ARM::VLD4qAsm_16:
+ case ARM::VLD4qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4dWB_fixed_Asm_8:
+ case ARM::VLD4dWB_fixed_Asm_16:
+ case ARM::VLD4dWB_fixed_Asm_32:
+ case ARM::VLD4qWB_fixed_Asm_8:
+ case ARM::VLD4qWB_fixed_Asm_16:
+ case ARM::VLD4qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD4dWB_register_Asm_8:
+ case ARM::VLD4dWB_register_Asm_16:
+ case ARM::VLD4dWB_register_Asm_32:
+ case ARM::VLD4qWB_register_Asm_8:
+ case ARM::VLD4qWB_register_Asm_16:
+ case ARM::VLD4qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VST3 multiple 3-element structure instructions.
+ case ARM::VST3dAsm_8:
+ case ARM::VST3dAsm_16:
+ case ARM::VST3dAsm_32:
+ case ARM::VST3qAsm_8:
+ case ARM::VST3qAsm_16:
+ case ARM::VST3qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3dWB_fixed_Asm_8:
+ case ARM::VST3dWB_fixed_Asm_16:
+ case ARM::VST3dWB_fixed_Asm_32:
+ case ARM::VST3qWB_fixed_Asm_8:
+ case ARM::VST3qWB_fixed_Asm_16:
+ case ARM::VST3qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST3dWB_register_Asm_8:
+ case ARM::VST3dWB_register_Asm_16:
+ case ARM::VST3dWB_register_Asm_32:
+ case ARM::VST3qWB_register_Asm_8:
+ case ARM::VST3qWB_register_Asm_16:
+ case ARM::VST3qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // VST4 multiple 3-element structure instructions.
+ case ARM::VST4dAsm_8:
+ case ARM::VST4dAsm_16:
+ case ARM::VST4dAsm_32:
+ case ARM::VST4qAsm_8:
+ case ARM::VST4qAsm_16:
+ case ARM::VST4qAsm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4dWB_fixed_Asm_8:
+ case ARM::VST4dWB_fixed_Asm_16:
+ case ARM::VST4dWB_fixed_Asm_32:
+ case ARM::VST4qWB_fixed_Asm_8:
+ case ARM::VST4qWB_fixed_Asm_16:
+ case ARM::VST4qWB_fixed_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST4dWB_register_Asm_8:
+ case ARM::VST4dWB_register_Asm_16:
+ case ARM::VST4dWB_register_Asm_32:
+ case ARM::VST4qWB_register_Asm_8:
+ case ARM::VST4qWB_register_Asm_16:
+ case ARM::VST4qWB_register_Asm_32: {
+ MCInst TmpInst;
+ unsigned Spacing;
+ TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // alignment
+ TmpInst.addOperand(Inst.getOperand(3)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 2));
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+ Spacing * 3));
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ // Handle encoding choice for the shift-immediate instructions.
+ case ARM::t2LSLri:
+ case ARM::t2LSRri:
+ case ARM::t2ASRri: {
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
+ !(static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) {
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode");
+ case ARM::t2LSLri: NewOpc = ARM::tLSLri; break;
+ case ARM::t2LSRri: NewOpc = ARM::tLSRri; break;
+ case ARM::t2ASRri: NewOpc = ARM::tASRri; break;
+ }
+ // The Thumb1 operands aren't in the same order. Awesome, eh?
+ MCInst TmpInst;
+ TmpInst.setOpcode(NewOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+
+ // Handle the Thumb2 mode MOV complex aliases.
+ case ARM::t2MOVsr:
+ case ARM::t2MOVSsr: {
+ // Which instruction to expand to depends on the CCOut operand and
+ // whether we're in an IT block if the register operands are low
+ // registers.
+ bool isNarrow = false;
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg()) &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr))
+ isNarrow = true;
+ MCInst TmpInst;
+ unsigned newOpc;
+ switch(ARM_AM::getSORegShOp(Inst.getOperand(3).getImm())) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRrr : ARM::t2ASRrr; break;
+ case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRrr : ARM::t2LSRrr; break;
+ case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLrr : ARM::t2LSLrr; break;
+ case ARM_AM::ror: newOpc = isNarrow ? ARM::tROR : ARM::t2RORrr; break;
+ }
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ if (isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // Rm
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ if (!isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2MOVsi:
+ case ARM::t2MOVSsi: {
+ // Which instruction to expand to depends on the CCOut operand and
+ // whether we're in an IT block if the register operands are low
+ // registers.
+ bool isNarrow = false;
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi))
+ isNarrow = true;
+ MCInst TmpInst;
+ unsigned newOpc;
+ switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
+ case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
+ case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
+ case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
+ case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break;
+ }
+ unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
+ if (Ammount == 32) Ammount = 0;
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ if (isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ if (newOpc != ARM::t2RRX)
+ TmpInst.addOperand(MCOperand::CreateImm(Ammount));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ if (!isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+ Inst = TmpInst;
+ return true;
+ }
+ // Handle the ARM mode MOV complex aliases.
+ case ARM::ASRr:
+ case ARM::LSRr:
+ case ARM::LSLr:
+ case ARM::RORr: {
+ ARM_AM::ShiftOpc ShiftTy;
+ switch(Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::ASRr: ShiftTy = ARM_AM::asr; break;
+ case ARM::LSRr: ShiftTy = ARM_AM::lsr; break;
+ case ARM::LSLr: ShiftTy = ARM_AM::lsl; break;
+ case ARM::RORr: ShiftTy = ARM_AM::ror; break;
+ }
+ unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, 0);
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVsr);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // Rm
+ TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5)); // cc_out
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::ASRi:
+ case ARM::LSRi:
+ case ARM::LSLi:
+ case ARM::RORi: {
+ ARM_AM::ShiftOpc ShiftTy;
+ switch(Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::ASRi: ShiftTy = ARM_AM::asr; break;
+ case ARM::LSRi: ShiftTy = ARM_AM::lsr; break;
+ case ARM::LSLi: ShiftTy = ARM_AM::lsl; break;
+ case ARM::RORi: ShiftTy = ARM_AM::ror; break;
+ }
+ // A shift by zero is a plain MOVr, not a MOVsi.
+ unsigned Amt = Inst.getOperand(2).getImm();
+ unsigned Opc = Amt == 0 ? ARM::MOVr : ARM::MOVsi;
+ unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, Amt);
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ if (Opc == ARM::MOVsi)
+ TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5)); // cc_out
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::RRXi: {
+ unsigned Shifter = ARM_AM::getSORegOpc(ARM_AM::rrx, 0);
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVsi);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4)); // cc_out
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2LDMIA_UPD: {
+ // If this is a load of a single register, then we should use
+ // a post-indexed LDR instruction instead, per the ARM ARM.
+ if (Inst.getNumOperands() != 5)
+ return false;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2LDR_POST);
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateImm(4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2STMDB_UPD: {
+ // If this is a store of a single register, then we should use
+ // a pre-indexed STR instruction instead, per the ARM ARM.
+ if (Inst.getNumOperands() != 5)
+ return false;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2STR_PRE);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateImm(-4));
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ Inst = TmpInst;
+ return true;
+ }
case ARM::LDMIA_UPD:
// If this is a load of a single register via a 'pop', then we should use
// a post-indexed LDR instruction instead, per the ARM ARM.
@@ -4160,6 +6870,7 @@ processInstruction(MCInst &Inst,
TmpInst.addOperand(Inst.getOperand(2)); // CondCode
TmpInst.addOperand(Inst.getOperand(3));
Inst = TmpInst;
+ return true;
}
break;
case ARM::STMDB_UPD:
@@ -4178,41 +6889,117 @@ processInstruction(MCInst &Inst,
Inst = TmpInst;
}
break;
+ case ARM::t2ADDri12:
+ // If the immediate fits for encoding T3 (t2ADDri) and the generic "add"
+ // mnemonic was used (not "addw"), encoding T3 is preferred.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" ||
+ ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+ break;
+ Inst.setOpcode(ARM::t2ADDri);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ break;
+ case ARM::t2SUBri12:
+ // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub"
+ // mnemonic was used (not "subw"), encoding T3 is preferred.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" ||
+ ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+ break;
+ Inst.setOpcode(ARM::t2SUBri);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ break;
case ARM::tADDi8:
// If the immediate is in the range 0-7, we want tADDi3 iff Rd was
// explicitly specified. From the ARM ARM: "Encoding T1 is preferred
// to encoding T2 if <Rd> is specified and encoding T2 is preferred
// to encoding T1 if <Rd> is omitted."
- if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6)
+ if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) {
Inst.setOpcode(ARM::tADDi3);
+ return true;
+ }
break;
case ARM::tSUBi8:
// If the immediate is in the range 0-7, we want tADDi3 iff Rd was
// explicitly specified. From the ARM ARM: "Encoding T1 is preferred
// to encoding T2 if <Rd> is specified and encoding T2 is preferred
// to encoding T1 if <Rd> is omitted."
- if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6)
+ if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) {
Inst.setOpcode(ARM::tSUBi3);
+ return true;
+ }
break;
+ case ARM::t2ADDri:
+ case ARM::t2SUBri: {
+ // If the destination and first source operand are the same, and
+ // the flags are compatible with the current IT status, use encoding T2
+ // instead of T3. For compatibility with the system 'as'. Make sure the
+ // wide encoding wasn't explicit.
+ if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+ !isARMLowRegister(Inst.getOperand(0).getReg()) ||
+ (unsigned)Inst.getOperand(2).getImm() > 255 ||
+ ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+ break;
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ?
+ ARM::tADDi8 : ARM::tSUBi8);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2ADDrr: {
+ // If the destination and first source operand are the same, and
+ // there's no setting of the flags, use encoding T2 instead of T3.
+ // Note that this is only for ADD, not SUB. This mirrors the system
+ // 'as' behaviour. Make sure the wide encoding wasn't explicit.
+ if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+ Inst.getOperand(5).getReg() != 0 ||
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+ break;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tADDhirr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
case ARM::tB:
// A Thumb conditional branch outside of an IT block is a tBcc.
- if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock())
+ if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) {
Inst.setOpcode(ARM::tBcc);
+ return true;
+ }
break;
case ARM::t2B:
// A Thumb2 conditional branch outside of an IT block is a t2Bcc.
- if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock())
+ if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()){
Inst.setOpcode(ARM::t2Bcc);
+ return true;
+ }
break;
case ARM::t2Bcc:
// If the conditional is AL or we're in an IT block, we really want t2B.
- if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock())
+ if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock()) {
Inst.setOpcode(ARM::t2B);
+ return true;
+ }
break;
case ARM::tBcc:
// If the conditional is AL, we really want tB.
- if (Inst.getOperand(1).getImm() == ARMCC::AL)
+ if (Inst.getOperand(1).getImm() == ARMCC::AL) {
Inst.setOpcode(ARM::tB);
+ return true;
+ }
break;
case ARM::tLDMIA: {
// If the register list contains any high registers, or if the writeback
@@ -4235,6 +7022,7 @@ processInstruction(MCInst &Inst,
if (hasWritebackToken)
Inst.insert(Inst.begin(),
MCOperand::CreateReg(Inst.getOperand(0).getReg()));
+ return true;
}
break;
}
@@ -4248,14 +7036,40 @@ processInstruction(MCInst &Inst,
// 16-bit encoding isn't sufficient. Switch to the 32-bit version.
assert (isThumbTwo());
Inst.setOpcode(ARM::t2STMIA_UPD);
+ return true;
}
break;
}
+ case ARM::tPOP: {
+ bool listContainsBase;
+ // If the register list contains any high registers, we need to use
+ // the 32-bit encoding instead if we're in Thumb2. Otherwise, this
+ // should have generated an error in validateInstruction().
+ if (!checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase))
+ return false;
+ assert (isThumbTwo());
+ Inst.setOpcode(ARM::t2LDMIA_UPD);
+ // Add the base register and writeback operands.
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+ return true;
+ }
+ case ARM::tPUSH: {
+ bool listContainsBase;
+ if (!checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase))
+ return false;
+ assert (isThumbTwo());
+ Inst.setOpcode(ARM::t2STMDB_UPD);
+ // Add the base register and writeback operands.
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+ return true;
+ }
case ARM::t2MOVi: {
// If we can use the 16-bit encoding and the user didn't explicitly
// request the 32-bit variant, transform it here.
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
- Inst.getOperand(1).getImm() <= 255 &&
+ (unsigned)Inst.getOperand(1).getImm() <= 255 &&
((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
Inst.getOperand(4).getReg() == ARM::CPSR) ||
(inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
@@ -4270,6 +7084,7 @@ processInstruction(MCInst &Inst,
TmpInst.addOperand(Inst.getOperand(2));
TmpInst.addOperand(Inst.getOperand(3));
Inst = TmpInst;
+ return true;
}
break;
}
@@ -4290,6 +7105,7 @@ processInstruction(MCInst &Inst,
TmpInst.addOperand(Inst.getOperand(2));
TmpInst.addOperand(Inst.getOperand(3));
Inst = TmpInst;
+ return true;
}
break;
}
@@ -4320,9 +7136,61 @@ processInstruction(MCInst &Inst,
TmpInst.addOperand(Inst.getOperand(3));
TmpInst.addOperand(Inst.getOperand(4));
Inst = TmpInst;
+ return true;
}
break;
}
+ case ARM::MOVsi: {
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm());
+ if (SOpc == ARM_AM::rrx) return false;
+ if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) {
+ // Shifting by zero is accepted as a vanilla 'MOVr'
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+ case ARM::ANDrsi:
+ case ARM::ORRrsi:
+ case ARM::EORrsi:
+ case ARM::BICrsi:
+ case ARM::SUBrsi:
+ case ARM::ADDrsi: {
+ unsigned newOpc;
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(3).getImm());
+ if (SOpc == ARM_AM::rrx) return false;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::ANDrsi: newOpc = ARM::ANDrr; break;
+ case ARM::ORRrsi: newOpc = ARM::ORRrr; break;
+ case ARM::EORrsi: newOpc = ARM::EORrr; break;
+ case ARM::BICrsi: newOpc = ARM::BICrr; break;
+ case ARM::SUBrsi: newOpc = ARM::SUBrr; break;
+ case ARM::ADDrsi: newOpc = ARM::ADDrr; break;
+ }
+ // If the shift is by zero, use the non-shifted instruction definition.
+ if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5));
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+ return false;
+ }
+ case ARM::ITasm:
case ARM::t2IT: {
// The mask bits for all but the first condition are represented as
// the low bit of the condition code value implies 't'. We currently
@@ -4352,13 +7220,14 @@ processInstruction(MCInst &Inst,
break;
}
}
+ return false;
}
unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
// 16-bit thumb arithmetic instructions either require or preclude the 'S'
// suffix depending on whether they're in an IT block or not.
unsigned Opc = Inst.getOpcode();
- MCInstrDesc &MCID = getInstDesc(Opc);
+ const MCInstrDesc &MCID = getInstDesc(Opc);
if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
assert(MCID.hasOptionalDef() &&
"optionally flag setting instruction missing optional def operand");
@@ -4417,14 +7286,23 @@ MatchAndEmitInstruction(SMLoc IDLoc,
}
// Some instructions need post-processing to, for example, tweak which
- // encoding is selected.
- processInstruction(Inst, Operands);
+ // encoding is selected. Loop on it while changes happen so the
+ // individual transformations can chain off each other. E.g.,
+ // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
+ while (processInstruction(Inst, Operands))
+ ;
// Only move forward at the very end so that everything in validate
// and process gets a consistent answer about whether we're in an IT
// block.
forwardITPosition();
+ // ITasm is an ARM mode pseudo-instruction that just sets the ITblock and
+ // doesn't actually encode.
+ if (Inst.getOpcode() == ARM::ITasm)
+ return false;
+
+ Inst.setLoc(IDLoc);
Out.EmitInstruction(Inst);
return false;
case Match_MissingFeature:
@@ -4458,7 +7336,6 @@ MatchAndEmitInstruction(SMLoc IDLoc,
}
llvm_unreachable("Implement any new match types added!");
- return true;
}
/// parseDirective parses the arm specific directives
@@ -4468,12 +7345,20 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveWord(4, DirectiveID.getLoc());
else if (IDVal == ".thumb")
return parseDirectiveThumb(DirectiveID.getLoc());
+ else if (IDVal == ".arm")
+ return parseDirectiveARM(DirectiveID.getLoc());
else if (IDVal == ".thumb_func")
return parseDirectiveThumbFunc(DirectiveID.getLoc());
else if (IDVal == ".code")
return parseDirectiveCode(DirectiveID.getLoc());
else if (IDVal == ".syntax")
return parseDirectiveSyntax(DirectiveID.getLoc());
+ else if (IDVal == ".unreq")
+ return parseDirectiveUnreq(DirectiveID.getLoc());
+ else if (IDVal == ".arch")
+ return parseDirectiveArch(DirectiveID.getLoc());
+ else if (IDVal == ".eabi_attribute")
+ return parseDirectiveEabiAttr(DirectiveID.getLoc());
return true;
}
@@ -4509,9 +7394,22 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
return Error(L, "unexpected token in directive");
Parser.Lex();
- // TODO: set thumb mode
- // TODO: tell the MC streamer the mode
- // getParser().getStreamer().Emit???();
+ if (!isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ return false;
+}
+
+/// parseDirectiveARM
+/// ::= .arm
+bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+
+ if (isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
return false;
}
@@ -4521,24 +7419,33 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo();
bool isMachO = MAI.hasSubsectionsViaSymbols();
StringRef Name;
+ bool needFuncName = true;
- // Darwin asm has function name after .thumb_func direction
+ // Darwin asm has (optionally) function name after .thumb_func direction
// ELF doesn't
if (isMachO) {
const AsmToken &Tok = Parser.getTok();
- if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
- return Error(L, "unexpected token in .thumb_func directive");
- Name = Tok.getString();
- Parser.Lex(); // Consume the identifier token.
+ if (Tok.isNot(AsmToken::EndOfStatement)) {
+ if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
+ return Error(L, "unexpected token in .thumb_func directive");
+ Name = Tok.getIdentifier();
+ Parser.Lex(); // Consume the identifier token.
+ needFuncName = false;
+ }
}
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(L, "unexpected token in directive");
- Parser.Lex();
+
+ // Eat the end of statement and any blank lines that follow.
+ while (getLexer().is(AsmToken::EndOfStatement))
+ Parser.Lex();
// FIXME: assuming function name will be the line following .thumb_func
- if (!isMachO) {
- Name = Parser.getTok().getString();
+ // We really should be checking the next symbol definition even if there's
+ // stuff in between.
+ if (needFuncName) {
+ Name = Parser.getTok().getIdentifier();
}
// Mark symbol as a thumb symbol.
@@ -4601,6 +7508,57 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
return false;
}
+/// parseDirectiveReq
+/// ::= name .req registername
+bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+ Parser.Lex(); // Eat the '.req' token.
+ unsigned Reg;
+ SMLoc SRegLoc, ERegLoc;
+ if (ParseRegister(Reg, SRegLoc, ERegLoc)) {
+ Parser.EatToEndOfStatement();
+ return Error(SRegLoc, "register name expected");
+ }
+
+ // Shouldn't be anything else.
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
+ Parser.EatToEndOfStatement();
+ return Error(Parser.getTok().getLoc(),
+ "unexpected input in .req directive.");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+
+ if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg)
+ return Error(SRegLoc, "redefinition of '" + Name +
+ "' does not match original.");
+
+ return false;
+}
+
+/// parseDirectiveUneq
+/// ::= .unreq registername
+bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Parser.EatToEndOfStatement();
+ return Error(L, "unexpected input in .unreq directive.");
+ }
+ RegisterReqs.erase(Parser.getTok().getIdentifier());
+ Parser.Lex(); // Eat the identifier.
+ return false;
+}
+
+/// parseDirectiveArch
+/// ::= .arch token
+bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
+ return true;
+}
+
+/// parseDirectiveEabiAttr
+/// ::= .eabi_attribute int, int
+bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
+ return true;
+}
+
extern "C" void LLVMInitializeARMAsmLexer();
/// Force static initialization.
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
index 3f5ad39debc3..e24a1b17867a 100644
--- a/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -6,11 +6,3 @@ add_llvm_library(LLVMARMAsmParser
)
add_dependencies(LLVMARMAsmParser ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMAsmParser
- LLVMARMDesc
- LLVMARMInfo
- LLVMMC
- LLVMMCParser
- LLVMSupport
- )
diff --git a/lib/Target/ARM/AsmParser/LLVMBuild.txt b/lib/Target/ARM/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..f0184b675dac
--- /dev/null
+++ b/lib/Target/ARM/AsmParser/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/AsmParser/LLVMBuild.txt -----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMAsmParser
+parent = ARM
+required_libraries = ARMDesc ARMInfo MC MCParser Support
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index f045e839a616..9a2aab5304ab 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -1,18 +1,18 @@
set(LLVM_TARGET_DEFINITIONS ARM.td)
-llvm_tablegen(ARMGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(ARMGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(ARMGenCodeEmitter.inc -gen-emitter)
-llvm_tablegen(ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
-llvm_tablegen(ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
-llvm_tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
-llvm_tablegen(ARMGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(ARMGenFastISel.inc -gen-fast-isel)
-llvm_tablegen(ARMGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(ARMGenSubtargetInfo.inc -gen-subtarget)
-llvm_tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
-llvm_tablegen(ARMGenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM ARMGenCodeEmitter.inc -gen-emitter)
+tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
+tablegen(LLVM ARMGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM ARMGenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM ARMGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM ARMGenFastISel.inc -gen-fast-isel)
+tablegen(LLVM ARMGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
+tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler)
add_public_tablegen_target(ARMCommonTableGen)
add_llvm_target(ARMCodeGen
@@ -26,7 +26,6 @@ add_llvm_target(ARMCodeGen
ARMExpandPseudoInsts.cpp
ARMFastISel.cpp
ARMFrameLowering.cpp
- ARMGlobalMerge.cpp
ARMHazardRecognizer.cpp
ARMISelDAGToDAG.cpp
ARMISelLowering.cpp
@@ -34,6 +33,7 @@ add_llvm_target(ARMCodeGen
ARMJITInfo.cpp
ARMLoadStoreOptimizer.cpp
ARMMCInstLower.cpp
+ ARMMachineFunctionInfo.cpp
ARMRegisterInfo.cpp
ARMSelectionDAGInfo.cpp
ARMSubtarget.cpp
@@ -49,22 +49,8 @@ add_llvm_target(ARMCodeGen
Thumb2SizeReduction.cpp
)
-add_llvm_library_dependencies(LLVMARMCodeGen
- LLVMARMAsmPrinter
- LLVMARMDesc
- LLVMARMInfo
- LLVMAnalysis
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
-# workaround for hanging compilation on MSVC10
-if( MSVC_VERSION EQUAL 1600 )
+# workaround for hanging compilation on MSVC9, 10
+if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 )
set_property(
SOURCE ARMISelLowering.cpp
PROPERTY COMPILE_FLAGS "/Od"
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 8f2f813b676a..2f504b756b1b 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -1,4 +1,4 @@
-//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===//
+//===-- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,17 +9,16 @@
#define DEBUG_TYPE "arm-disassembler"
-#include "ARM.h"
-#include "ARMRegisterInfo.h"
-#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMMCExpr.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/ErrorHandling.h"
@@ -52,7 +51,7 @@ public:
raw_ostream &cStream) const;
/// getEDInfo - See MCDisassembler.
- EDInstInfo *getEDInfo() const;
+ const EDInstInfo *getEDInfo() const;
private:
};
@@ -77,7 +76,7 @@ public:
raw_ostream &cStream) const;
/// getEDInfo - See MCDisassembler.
- EDInstInfo *getEDInfo() const;
+ const EDInstInfo *getEDInfo() const;
private:
mutable std::vector<unsigned> ITBlock;
DecodeStatus AddThumbPredicate(MCInst&) const;
@@ -97,224 +96,236 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
Out = In;
return false;
}
- return false;
+ llvm_unreachable("Invalid DecodeStatus!");
}
// Forward declare these because the autogenerated code will reference them.
// Definitions are further down.
-static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeGPRnopcRegisterClass(llvm::MCInst &Inst,
+static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst,
+static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
-static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst,
+static DecodeStatus DecodeAddrMode2IdxInstruction(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode3Instruction(llvm::MCInst &Inst,unsigned Insn,
+static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst,unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst & Inst,
+static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst & Inst,
unsigned Insn,
uint64_t Adddress,
const void *Decoder);
-static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn,
+static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeNEONModImmInstruction(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeNEONModImmInstruction(llvm::MCInst &Inst,unsigned Val,
+static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
+
+static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Val,
+static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbBCCTargetOperand(llvm::MCInst &Inst,unsigned Val,
+static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2LDRDPreInstruction(llvm::MCInst &Inst,unsigned Insn,
+static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst,unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2STRDPreInstruction(llvm::MCInst &Inst,unsigned Insn,
+static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst,unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-
-
+static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
#include "ARMGenDisassemblerTables.inc"
#include "ARMGenInstrInfo.inc"
#include "ARMGenEDInfo.inc"
@@ -327,11 +338,11 @@ static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtarge
return new ThumbDisassembler(STI);
}
-EDInstInfo *ARMDisassembler::getEDInfo() const {
+const EDInstInfo *ARMDisassembler::getEDInfo() const {
return instInfoARM;
}
-EDInstInfo *ThumbDisassembler::getEDInfo() const {
+const EDInstInfo *ThumbDisassembler::getEDInfo() const {
return instInfoARM;
}
@@ -415,7 +426,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
namespace llvm {
-extern MCInstrDesc ARMInsts[];
+extern const MCInstrDesc ARMInsts[];
}
/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
@@ -435,40 +446,38 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
MCInst &MI, const void *Decoder) {
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
- if (!getOpInfo)
- return false;
-
struct LLVMOpInfo1 SymbolicOp;
+ memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
SymbolicOp.Value = Value;
void *DisInfo = Dis->getDisInfoBlock();
- if (!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
- if (isBranch) {
- LLVMSymbolLookupCallback SymbolLookUp =
- Dis->getLLVMSymbolLookupCallback();
- if (SymbolLookUp) {
- uint64_t ReferenceType;
- ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
- const char *ReferenceName;
- const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
- &ReferenceName);
- if (Name) {
- SymbolicOp.AddSymbol.Name = Name;
- SymbolicOp.AddSymbol.Present = true;
- SymbolicOp.Value = 0;
- }
- else {
- SymbolicOp.Value = Value;
- }
- if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
- (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
- }
- else {
- return false;
- }
- }
- else {
+
+ if (!getOpInfo ||
+ !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
+ // Clear SymbolicOp.Value from above and also all other fields.
+ memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
+ LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
+ if (!SymbolLookUp)
return false;
+ uint64_t ReferenceType;
+ if (isBranch)
+ ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
+ else
+ ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
+ const char *ReferenceName;
+ const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
+ &ReferenceName);
+ if (Name) {
+ SymbolicOp.AddSymbol.Name = Name;
+ SymbolicOp.AddSymbol.Present = true;
}
+ // For branches always create an MCExpr so it gets printed as hex address.
+ else if (isBranch) {
+ SymbolicOp.Value = Value;
+ }
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
+ (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
+ if (!Name && !isBranch)
+ return false;
}
MCContext *Ctx = Dis->getMCContext();
@@ -527,8 +536,8 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
MI.addOperand(MCOperand::CreateExpr(Expr));
- else
- assert(0 && "bad SymbolicOp.VariantKind");
+ else
+ llvm_unreachable("bad SymbolicOp.VariantKind");
return true;
}
@@ -543,7 +552,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
/// a literal 'C' string if the referenced address of the literal pool's entry
/// is an address into a section with 'C' string literals.
static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value,
- const void *Decoder) {
+ const void *Decoder) {
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
if (SymbolLookUp) {
@@ -841,14 +850,14 @@ extern "C" void LLVMInitializeARMDisassembler() {
createThumbDisassembler);
}
-static const unsigned GPRDecoderTable[] = {
+static const uint16_t GPRDecoderTable[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R4, ARM::R5, ARM::R6, ARM::R7,
ARM::R8, ARM::R9, ARM::R10, ARM::R11,
ARM::R12, ARM::SP, ARM::LR, ARM::PC
};
-static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
@@ -859,20 +868,26 @@ static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
}
static DecodeStatus
-DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
- if (RegNo == 15) return MCDisassembler::Fail;
- return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo == 15)
+ S = MCDisassembler::SoftFail;
+
+ Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+
+ return S;
}
-static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
unsigned Register = 0;
switch (RegNo) {
@@ -902,13 +917,13 @@ static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail;
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static const unsigned SPRDecoderTable[] = {
+static const uint16_t SPRDecoderTable[] = {
ARM::S0, ARM::S1, ARM::S2, ARM::S3,
ARM::S4, ARM::S5, ARM::S6, ARM::S7,
ARM::S8, ARM::S9, ARM::S10, ARM::S11,
@@ -919,7 +934,7 @@ static const unsigned SPRDecoderTable[] = {
ARM::S28, ARM::S29, ARM::S30, ARM::S31
};
-static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -929,7 +944,7 @@ static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static const unsigned DPRDecoderTable[] = {
+static const uint16_t DPRDecoderTable[] = {
ARM::D0, ARM::D1, ARM::D2, ARM::D3,
ARM::D4, ARM::D5, ARM::D6, ARM::D7,
ARM::D8, ARM::D9, ARM::D10, ARM::D11,
@@ -940,7 +955,7 @@ static const unsigned DPRDecoderTable[] = {
ARM::D28, ARM::D29, ARM::D30, ARM::D31
};
-static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -950,7 +965,7 @@ static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
@@ -958,14 +973,14 @@ static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
}
static DecodeStatus
-DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static const unsigned QPRDecoderTable[] = {
+static const uint16_t QPRDecoderTable[] = {
ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7,
ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11,
@@ -973,7 +988,7 @@ static const unsigned QPRDecoderTable[] = {
};
-static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -984,7 +999,49 @@ static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
+static const uint16_t DPairDecoderTable[] = {
+ ARM::Q0, ARM::D1_D2, ARM::Q1, ARM::D3_D4, ARM::Q2, ARM::D5_D6,
+ ARM::Q3, ARM::D7_D8, ARM::Q4, ARM::D9_D10, ARM::Q5, ARM::D11_D12,
+ ARM::Q6, ARM::D13_D14, ARM::Q7, ARM::D15_D16, ARM::Q8, ARM::D17_D18,
+ ARM::Q9, ARM::D19_D20, ARM::Q10, ARM::D21_D22, ARM::Q11, ARM::D23_D24,
+ ARM::Q12, ARM::D25_D26, ARM::Q13, ARM::D27_D28, ARM::Q14, ARM::D29_D30,
+ ARM::Q15
+};
+
+static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 30)
+ return MCDisassembler::Fail;
+
+ unsigned Register = DPairDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const uint16_t DPairSpacedDecoderTable[] = {
+ ARM::D0_D2, ARM::D1_D3, ARM::D2_D4, ARM::D3_D5,
+ ARM::D4_D6, ARM::D5_D7, ARM::D6_D8, ARM::D7_D9,
+ ARM::D8_D10, ARM::D9_D11, ARM::D10_D12, ARM::D11_D13,
+ ARM::D12_D14, ARM::D13_D15, ARM::D14_D16, ARM::D15_D17,
+ ARM::D16_D18, ARM::D17_D19, ARM::D18_D20, ARM::D19_D21,
+ ARM::D20_D22, ARM::D21_D23, ARM::D22_D24, ARM::D23_D25,
+ ARM::D24_D26, ARM::D25_D27, ARM::D26_D28, ARM::D27_D29,
+ ARM::D28_D30, ARM::D29_D31
+};
+
+static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 29)
+ return MCDisassembler::Fail;
+
+ unsigned Register = DPairSpacedDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
if (Val == 0xF) return MCDisassembler::Fail;
// AL predicate is not allowed on Thumb1 branches.
@@ -998,7 +1055,7 @@ static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
if (Val)
Inst.addOperand(MCOperand::CreateReg(ARM::CPSR));
@@ -1007,7 +1064,7 @@ static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
uint32_t imm = Val & 0xFF;
uint32_t rot = (Val & 0xF00) >> 7;
@@ -1016,7 +1073,7 @@ static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1053,7 +1110,7 @@ static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1088,7 +1145,7 @@ static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1123,7 +1180,7 @@ static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1140,7 +1197,7 @@ static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1157,7 +1214,7 @@ static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
// This operand encodes a mask of contiguous zeros between a specified MSB
// and LSB. To decode it, we create the mask of all bits MSB-and-lower,
@@ -1178,7 +1235,7 @@ static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1234,16 +1291,6 @@ static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
- unsigned P = fieldFromInstruction32(Insn, 24, 1);
- unsigned W = fieldFromInstruction32(Insn, 21, 1);
-
- bool writeback = (P == 0) || (W == 1);
- unsigned idx_mode = 0;
- if (P && writeback)
- idx_mode = ARMII::IndexModePre;
- else if (!P && writeback)
- idx_mode = ARMII::IndexModePost;
-
switch (Inst.getOpcode()) {
case ARM::t2LDC2_OFFSET:
case ARM::t2LDC2L_OFFSET:
@@ -1333,7 +1380,7 @@ static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
}
static DecodeStatus
-DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1436,7 +1483,7 @@ DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1477,7 +1524,7 @@ static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val,
}
static DecodeStatus
-DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1490,6 +1537,7 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
unsigned pred = fieldFromInstruction32(Insn, 28, 4);
unsigned W = fieldFromInstruction32(Insn, 21, 1);
unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ unsigned Rt2 = Rt + 1;
bool writeback = (W == 1) | (P == 0);
@@ -1501,7 +1549,86 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::LDRD:
case ARM::LDRD_PRE:
case ARM::LDRD_POST:
- if (Rt & 0x1) return MCDisassembler::Fail;
+ if (Rt & 0x1) S = MCDisassembler::SoftFail;
+ break;
+ default:
+ break;
+ }
+ switch (Inst.getOpcode()) {
+ case ARM::STRD:
+ case ARM::STRD_PRE:
+ case ARM::STRD_POST:
+ if (P == 0 && W == 1)
+ S = MCDisassembler::SoftFail;
+
+ if (writeback && (Rn == 15 || Rn == Rt || Rn == Rt2))
+ S = MCDisassembler::SoftFail;
+ if (type && Rm == 15)
+ S = MCDisassembler::SoftFail;
+ if (Rt2 == 15)
+ S = MCDisassembler::SoftFail;
+ if (!type && fieldFromInstruction32(Insn, 8, 4))
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::STRH:
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ if (writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail;
+ if (!type && Rm == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::LDRD:
+ case ARM::LDRD_PRE:
+ case ARM::LDRD_POST:
+ if (type && Rn == 15){
+ if (Rt2 == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ }
+ if (P == 0 && W == 1)
+ S = MCDisassembler::SoftFail;
+ if (!type && (Rt2 == 15 || Rm == 15 || Rm == Rt || Rm == Rt2))
+ S = MCDisassembler::SoftFail;
+ if (!type && writeback && Rn == 15)
+ S = MCDisassembler::SoftFail;
+ if (writeback && (Rn == Rt || Rn == Rt2))
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::LDRH:
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ if (type && Rn == 15){
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ }
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ if (!type && Rm == 15)
+ S = MCDisassembler::SoftFail;
+ if (!type && writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail;
+ break;
+ case ARM::LDRSH:
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ case ARM::LDRSB:
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ if (type && Rn == 15){
+ if (Rt == 15)
+ S = MCDisassembler::SoftFail;
+ break;
+ }
+ if (type && (Rt == 15 || (writeback && Rn == Rt)))
+ S = MCDisassembler::SoftFail;
+ if (!type && (Rt == 15 || Rm == 15))
+ S = MCDisassembler::SoftFail;
+ if (!type && writeback && (Rn == 15 || Rn == Rt))
+ S = MCDisassembler::SoftFail;
break;
default:
break;
@@ -1588,7 +1715,7 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1617,7 +1744,7 @@ static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst,
+static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1702,7 +1829,7 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst,
return S;
}
-static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
unsigned imod = fieldFromInstruction32(Insn, 18, 2);
unsigned M = fieldFromInstruction32(Insn, 17, 1);
@@ -1742,7 +1869,7 @@ static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
unsigned imod = fieldFromInstruction32(Insn, 9, 2);
unsigned M = fieldFromInstruction32(Insn, 8, 1);
@@ -1782,7 +1909,7 @@ static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1806,7 +1933,7 @@ static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1832,7 +1959,7 @@ static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1860,7 +1987,7 @@ static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1880,7 +2007,7 @@ static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1899,13 +2026,28 @@ static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
return DecodeGPRRegisterClass(Inst, Val, Address, Decoder);
}
static DecodeStatus
-DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned imm = (fieldFromInstruction32(Insn, 0, 11) << 0) |
+ (fieldFromInstruction32(Insn, 11, 1) << 18) |
+ (fieldFromInstruction32(Insn, 13, 1) << 17) |
+ (fieldFromInstruction32(Insn, 16, 6) << 11) |
+ (fieldFromInstruction32(Insn, 26, 1) << 19);
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1)));
+ return S;
+}
+
+static DecodeStatus
+DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1915,12 +2057,14 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn,
if (pred == 0xF) {
Inst.setOpcode(ARM::BLXi);
imm |= fieldFromInstruction32(Insn, 24, 1) << 1;
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
+ true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
return S;
}
- if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true,
- 4, Inst, Decoder))
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
+ true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
return MCDisassembler::Fail;
@@ -1929,13 +2073,7 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn,
}
-static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(64 - Val));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1952,7 +2090,7 @@ static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -1964,47 +2102,38 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
// First output register
- if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: case ARM::VLD1q8:
+ case ARM::VLD1q16wb_fixed: case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_fixed: case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_fixed: case ARM::VLD1q64wb_register:
+ case ARM::VLD1q8wb_fixed: case ARM::VLD1q8wb_register:
+ case ARM::VLD2d16: case ARM::VLD2d32: case ARM::VLD2d8:
+ case ARM::VLD2d16wb_fixed: case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_fixed: case ARM::VLD2d32wb_register:
+ case ARM::VLD2d8wb_fixed: case ARM::VLD2d8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2b16:
+ case ARM::VLD2b32:
+ case ARM::VLD2b8:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b16wb_register:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2b32wb_register:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b8wb_register:
+ if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
// Second output register
switch (Inst.getOpcode()) {
- case ARM::VLD1q8:
- case ARM::VLD1q16:
- case ARM::VLD1q32:
- case ARM::VLD1q64:
- case ARM::VLD1q8_UPD:
- case ARM::VLD1q16_UPD:
- case ARM::VLD1q32_UPD:
- case ARM::VLD1q64_UPD:
- case ARM::VLD1d8T:
- case ARM::VLD1d16T:
- case ARM::VLD1d32T:
- case ARM::VLD1d64T:
- case ARM::VLD1d8T_UPD:
- case ARM::VLD1d16T_UPD:
- case ARM::VLD1d32T_UPD:
- case ARM::VLD1d64T_UPD:
- case ARM::VLD1d8Q:
- case ARM::VLD1d16Q:
- case ARM::VLD1d32Q:
- case ARM::VLD1d64Q:
- case ARM::VLD1d8Q_UPD:
- case ARM::VLD1d16Q_UPD:
- case ARM::VLD1d32Q_UPD:
- case ARM::VLD1d64Q_UPD:
- case ARM::VLD2d8:
- case ARM::VLD2d16:
- case ARM::VLD2d32:
- case ARM::VLD2d8_UPD:
- case ARM::VLD2d16_UPD:
- case ARM::VLD2d32_UPD:
- case ARM::VLD2q8:
- case ARM::VLD2q16:
- case ARM::VLD2q32:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
case ARM::VLD3d8:
case ARM::VLD3d16:
case ARM::VLD3d32:
@@ -2020,12 +2149,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
return MCDisassembler::Fail;
break;
- case ARM::VLD2b8:
- case ARM::VLD2b16:
- case ARM::VLD2b32:
- case ARM::VLD2b8_UPD:
- case ARM::VLD2b16_UPD:
- case ARM::VLD2b32_UPD:
case ARM::VLD3q8:
case ARM::VLD3q16:
case ARM::VLD3q32:
@@ -2046,28 +2169,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
// Third output register
switch(Inst.getOpcode()) {
- case ARM::VLD1d8T:
- case ARM::VLD1d16T:
- case ARM::VLD1d32T:
- case ARM::VLD1d64T:
- case ARM::VLD1d8T_UPD:
- case ARM::VLD1d16T_UPD:
- case ARM::VLD1d32T_UPD:
- case ARM::VLD1d64T_UPD:
- case ARM::VLD1d8Q:
- case ARM::VLD1d16Q:
- case ARM::VLD1d32Q:
- case ARM::VLD1d64Q:
- case ARM::VLD1d8Q_UPD:
- case ARM::VLD1d16Q_UPD:
- case ARM::VLD1d32Q_UPD:
- case ARM::VLD1d64Q_UPD:
- case ARM::VLD2q8:
- case ARM::VLD2q16:
- case ARM::VLD2q32:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
case ARM::VLD3d8:
case ARM::VLD3d16:
case ARM::VLD3d32:
@@ -2104,20 +2205,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
// Fourth output register
switch (Inst.getOpcode()) {
- case ARM::VLD1d8Q:
- case ARM::VLD1d16Q:
- case ARM::VLD1d32Q:
- case ARM::VLD1d64Q:
- case ARM::VLD1d8Q_UPD:
- case ARM::VLD1d16Q_UPD:
- case ARM::VLD1d32Q_UPD:
- case ARM::VLD1d64Q_UPD:
- case ARM::VLD2q8:
- case ARM::VLD2q16:
- case ARM::VLD2q32:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
case ARM::VLD4d8:
case ARM::VLD4d16:
case ARM::VLD4d32:
@@ -2142,31 +2229,58 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
// Writeback operand
switch (Inst.getOpcode()) {
- case ARM::VLD1d8_UPD:
- case ARM::VLD1d16_UPD:
- case ARM::VLD1d32_UPD:
- case ARM::VLD1d64_UPD:
- case ARM::VLD1q8_UPD:
- case ARM::VLD1q16_UPD:
- case ARM::VLD1q32_UPD:
- case ARM::VLD1q64_UPD:
- case ARM::VLD1d8T_UPD:
- case ARM::VLD1d16T_UPD:
- case ARM::VLD1d32T_UPD:
- case ARM::VLD1d64T_UPD:
- case ARM::VLD1d8Q_UPD:
- case ARM::VLD1d16Q_UPD:
- case ARM::VLD1d32Q_UPD:
- case ARM::VLD1d64Q_UPD:
- case ARM::VLD2d8_UPD:
- case ARM::VLD2d16_UPD:
- case ARM::VLD2d32_UPD:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
- case ARM::VLD2b8_UPD:
- case ARM::VLD2b16_UPD:
- case ARM::VLD2b32_UPD:
+ case ARM::VLD1d8wb_fixed:
+ case ARM::VLD1d16wb_fixed:
+ case ARM::VLD1d32wb_fixed:
+ case ARM::VLD1d64wb_fixed:
+ case ARM::VLD1d8wb_register:
+ case ARM::VLD1d16wb_register:
+ case ARM::VLD1d32wb_register:
+ case ARM::VLD1d64wb_register:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
+ case ARM::VLD1d8Twb_fixed:
+ case ARM::VLD1d8Twb_register:
+ case ARM::VLD1d16Twb_fixed:
+ case ARM::VLD1d16Twb_register:
+ case ARM::VLD1d32Twb_fixed:
+ case ARM::VLD1d32Twb_register:
+ case ARM::VLD1d64Twb_fixed:
+ case ARM::VLD1d64Twb_register:
+ case ARM::VLD1d8Qwb_fixed:
+ case ARM::VLD1d8Qwb_register:
+ case ARM::VLD1d16Qwb_fixed:
+ case ARM::VLD1d16Qwb_register:
+ case ARM::VLD1d32Qwb_fixed:
+ case ARM::VLD1d32Qwb_register:
+ case ARM::VLD1d64Qwb_fixed:
+ case ARM::VLD1d64Qwb_register:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8wb_register:
+ case ARM::VLD2q16wb_register:
+ case ARM::VLD2q32wb_register:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2b8wb_register:
+ case ARM::VLD2b16wb_register:
+ case ARM::VLD2b32wb_register:
+ Inst.addOperand(MCOperand::CreateImm(0));
+ break;
case ARM::VLD3d8_UPD:
case ARM::VLD3d16_UPD:
case ARM::VLD3d32_UPD:
@@ -2191,17 +2305,66 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
// AddrMode6 Offset (register)
- if (Rm == 0xD)
- Inst.addOperand(MCOperand::CreateReg(0));
- else if (Rm != 0xF) {
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ switch (Inst.getOpcode()) {
+ default:
+ // The below have been updated to have explicit am6offset split
+ // between fixed and register offset. For those instructions not
+ // yet updated, we need to add an additional reg0 operand for the
+ // fixed variant.
+ //
+ // The fixed offset encodes as Rm == 0xd, so we check for that.
+ if (Rm == 0xd) {
+ Inst.addOperand(MCOperand::CreateReg(0));
+ break;
+ }
+ // Fall through to handle the register offset variant.
+ case ARM::VLD1d8wb_fixed:
+ case ARM::VLD1d16wb_fixed:
+ case ARM::VLD1d32wb_fixed:
+ case ARM::VLD1d64wb_fixed:
+ case ARM::VLD1d8Twb_fixed:
+ case ARM::VLD1d16Twb_fixed:
+ case ARM::VLD1d32Twb_fixed:
+ case ARM::VLD1d64Twb_fixed:
+ case ARM::VLD1d8Qwb_fixed:
+ case ARM::VLD1d16Qwb_fixed:
+ case ARM::VLD1d32Qwb_fixed:
+ case ARM::VLD1d64Qwb_fixed:
+ case ARM::VLD1d8wb_register:
+ case ARM::VLD1d16wb_register:
+ case ARM::VLD1d32wb_register:
+ case ARM::VLD1d64wb_register:
+ case ARM::VLD1q8wb_fixed:
+ case ARM::VLD1q16wb_fixed:
+ case ARM::VLD1q32wb_fixed:
+ case ARM::VLD1q64wb_fixed:
+ case ARM::VLD1q8wb_register:
+ case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_register:
+ // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
+ // variant encodes Rm == 0xf. Anything else is a register offset post-
+ // increment and we need to add the register operand to the instruction.
+ if (Rm != 0xD && Rm != 0xF &&
+ !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ break;
}
return S;
}
-static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2214,31 +2377,60 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Writeback Operand
switch (Inst.getOpcode()) {
- case ARM::VST1d8_UPD:
- case ARM::VST1d16_UPD:
- case ARM::VST1d32_UPD:
- case ARM::VST1d64_UPD:
- case ARM::VST1q8_UPD:
- case ARM::VST1q16_UPD:
- case ARM::VST1q32_UPD:
- case ARM::VST1q64_UPD:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2d8_UPD:
- case ARM::VST2d16_UPD:
- case ARM::VST2d32_UPD:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
- case ARM::VST2b8_UPD:
- case ARM::VST2b16_UPD:
- case ARM::VST2b32_UPD:
+ case ARM::VST1d8wb_fixed:
+ case ARM::VST1d16wb_fixed:
+ case ARM::VST1d32wb_fixed:
+ case ARM::VST1d64wb_fixed:
+ case ARM::VST1d8wb_register:
+ case ARM::VST1d16wb_register:
+ case ARM::VST1d32wb_register:
+ case ARM::VST1d64wb_register:
+ case ARM::VST1q8wb_fixed:
+ case ARM::VST1q16wb_fixed:
+ case ARM::VST1q32wb_fixed:
+ case ARM::VST1q64wb_fixed:
+ case ARM::VST1q8wb_register:
+ case ARM::VST1q16wb_register:
+ case ARM::VST1q32wb_register:
+ case ARM::VST1q64wb_register:
+ case ARM::VST1d8Twb_fixed:
+ case ARM::VST1d16Twb_fixed:
+ case ARM::VST1d32Twb_fixed:
+ case ARM::VST1d64Twb_fixed:
+ case ARM::VST1d8Twb_register:
+ case ARM::VST1d16Twb_register:
+ case ARM::VST1d32Twb_register:
+ case ARM::VST1d64Twb_register:
+ case ARM::VST1d8Qwb_fixed:
+ case ARM::VST1d16Qwb_fixed:
+ case ARM::VST1d32Qwb_fixed:
+ case ARM::VST1d64Qwb_fixed:
+ case ARM::VST1d8Qwb_register:
+ case ARM::VST1d16Qwb_register:
+ case ARM::VST1d32Qwb_register:
+ case ARM::VST1d64Qwb_register:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2d8wb_register:
+ case ARM::VST2d16wb_register:
+ case ARM::VST2d32wb_register:
+ case ARM::VST2q8wb_fixed:
+ case ARM::VST2q16wb_fixed:
+ case ARM::VST2q32wb_fixed:
+ case ARM::VST2q8wb_register:
+ case ARM::VST2q16wb_register:
+ case ARM::VST2q32wb_register:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b32wb_fixed:
+ case ARM::VST2b8wb_register:
+ case ARM::VST2b16wb_register:
+ case ARM::VST2b32wb_register:
+ if (Rm == 0xF)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(0));
+ break;
case ARM::VST3d8_UPD:
case ARM::VST3d16_UPD:
case ARM::VST3d32_UPD:
@@ -2263,55 +2455,89 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
// AddrMode6 Offset (register)
- if (Rm == 0xD)
- Inst.addOperand(MCOperand::CreateReg(0));
- else if (Rm != 0xF) {
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
- return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ default:
+ if (Rm == 0xD)
+ Inst.addOperand(MCOperand::CreateReg(0));
+ else if (Rm != 0xF) {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ break;
+ case ARM::VST1d8wb_fixed:
+ case ARM::VST1d16wb_fixed:
+ case ARM::VST1d32wb_fixed:
+ case ARM::VST1d64wb_fixed:
+ case ARM::VST1q8wb_fixed:
+ case ARM::VST1q16wb_fixed:
+ case ARM::VST1q32wb_fixed:
+ case ARM::VST1q64wb_fixed:
+ case ARM::VST1d8Twb_fixed:
+ case ARM::VST1d16Twb_fixed:
+ case ARM::VST1d32Twb_fixed:
+ case ARM::VST1d64Twb_fixed:
+ case ARM::VST1d8Qwb_fixed:
+ case ARM::VST1d16Qwb_fixed:
+ case ARM::VST1d32Qwb_fixed:
+ case ARM::VST1d64Qwb_fixed:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2q8wb_fixed:
+ case ARM::VST2q16wb_fixed:
+ case ARM::VST2q32wb_fixed:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b32wb_fixed:
+ break;
}
+
// First input register
- if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ case ARM::VST1q16:
+ case ARM::VST1q32:
+ case ARM::VST1q64:
+ case ARM::VST1q8:
+ case ARM::VST1q16wb_fixed:
+ case ARM::VST1q16wb_register:
+ case ARM::VST1q32wb_fixed:
+ case ARM::VST1q32wb_register:
+ case ARM::VST1q64wb_fixed:
+ case ARM::VST1q64wb_register:
+ case ARM::VST1q8wb_fixed:
+ case ARM::VST1q8wb_register:
+ case ARM::VST2d16:
+ case ARM::VST2d32:
+ case ARM::VST2d8:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d16wb_register:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2d32wb_register:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VST2b16:
+ case ARM::VST2b32:
+ case ARM::VST2b8:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b16wb_register:
+ case ARM::VST2b32wb_fixed:
+ case ARM::VST2b32wb_register:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b8wb_register:
+ if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
// Second input register
switch (Inst.getOpcode()) {
- case ARM::VST1q8:
- case ARM::VST1q16:
- case ARM::VST1q32:
- case ARM::VST1q64:
- case ARM::VST1q8_UPD:
- case ARM::VST1q16_UPD:
- case ARM::VST1q32_UPD:
- case ARM::VST1q64_UPD:
- case ARM::VST1d8T:
- case ARM::VST1d16T:
- case ARM::VST1d32T:
- case ARM::VST1d64T:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2d8:
- case ARM::VST2d16:
- case ARM::VST2d32:
- case ARM::VST2d8_UPD:
- case ARM::VST2d16_UPD:
- case ARM::VST2d32_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST3d8:
case ARM::VST3d16:
case ARM::VST3d32:
@@ -2327,12 +2553,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
return MCDisassembler::Fail;
break;
- case ARM::VST2b8:
- case ARM::VST2b16:
- case ARM::VST2b32:
- case ARM::VST2b8_UPD:
- case ARM::VST2b16_UPD:
- case ARM::VST2b32_UPD:
case ARM::VST3q8:
case ARM::VST3q16:
case ARM::VST3q32:
@@ -2354,28 +2574,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Third input register
switch (Inst.getOpcode()) {
- case ARM::VST1d8T:
- case ARM::VST1d16T:
- case ARM::VST1d32T:
- case ARM::VST1d64T:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST3d8:
case ARM::VST3d16:
case ARM::VST3d32:
@@ -2412,20 +2610,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Fourth input register
switch (Inst.getOpcode()) {
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST4d8:
case ARM::VST4d16:
case ARM::VST4d32:
@@ -2451,7 +2635,7 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2461,15 +2645,21 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
unsigned align = fieldFromInstruction32(Insn, 4, 1);
unsigned size = fieldFromInstruction32(Insn, 6, 2);
- unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1;
align *= (1 << size);
- if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
- if (regs == 2) {
- if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16wb_fixed: case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_fixed: case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD1DUPq8wb_fixed: case ARM::VLD1DUPq8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
+ break;
}
if (Rm != 0xF) {
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
@@ -2480,17 +2670,17 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(align));
- if (Rm == 0xD)
- Inst.addOperand(MCOperand::CreateReg(0));
- else if (Rm != 0xF) {
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
- return MCDisassembler::Fail;
- }
+ // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
+ // variant encodes Rm == 0xf. Anything else is a register offset post-
+ // increment and we need to add the register operand to the instruction.
+ if (Rm != 0xD && Rm != 0xF &&
+ !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
return S;
}
-static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2500,18 +2690,33 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
unsigned align = fieldFromInstruction32(Insn, 4, 1);
unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2);
- unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+ unsigned pred = fieldFromInstruction32(Insn, 22, 4);
align *= 2*size;
- if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
- if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
- return MCDisassembler::Fail;
- if (Rm != 0xF) {
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ switch (Inst.getOpcode()) {
+ case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register:
+ case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2DUPd16x2: case ARM::VLD2DUPd32x2: case ARM::VLD2DUPd8x2:
+ case ARM::VLD2DUPd16x2wb_fixed: case ARM::VLD2DUPd16x2wb_register:
+ case ARM::VLD2DUPd32x2wb_fixed: case ARM::VLD2DUPd32x2wb_register:
+ case ARM::VLD2DUPd8x2wb_fixed: case ARM::VLD2DUPd8x2wb_register:
+ if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
+ break;
}
+ if (Rm != 0xF)
+ Inst.addOperand(MCOperand::CreateImm(0));
+
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(align));
@@ -2523,10 +2728,13 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
}
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
return S;
}
-static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2561,7 +2769,7 @@ static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2614,7 +2822,7 @@ static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn,
}
static DecodeStatus
-DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2659,7 +2867,7 @@ DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2678,31 +2886,31 @@ static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
Inst.addOperand(MCOperand::CreateImm(8 - Val));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
Inst.addOperand(MCOperand::CreateImm(16 - Val));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
Inst.addOperand(MCOperand::CreateImm(32 - Val));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
Inst.addOperand(MCOperand::CreateImm(64 - Val));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2713,7 +2921,6 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
unsigned op = fieldFromInstruction32(Insn, 6, 1);
- unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1;
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2722,9 +2929,15 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail; // Writeback
}
- for (unsigned i = 0; i < length; ++i) {
- if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder)))
- return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ case ARM::VTBL2:
+ case ARM::VTBX2:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
}
if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
@@ -2733,7 +2946,7 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2757,25 +2970,31 @@ static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
return S;
}
-static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1)));
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4,
+ true, 2, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1)));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val)));
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val)));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1)));
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4,
+ true, 2, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1)));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2790,7 +3009,7 @@ static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2804,7 +3023,7 @@ static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
unsigned imm = Val << 2;
@@ -2814,7 +3033,7 @@ static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
Inst.addOperand(MCOperand::CreateReg(ARM::SP));
Inst.addOperand(MCOperand::CreateImm(Val));
@@ -2822,7 +3041,7 @@ static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2839,7 +3058,7 @@ static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2894,7 +3113,7 @@ static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
int imm = Val & 0xFF;
if (!(Val & 0x100)) imm *= -1;
@@ -2903,7 +3122,7 @@ static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2918,7 +3137,7 @@ static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2933,7 +3152,7 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
return S;
}
-static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
int imm = Val & 0xFF;
if (Val == 0)
@@ -2946,7 +3165,7 @@ static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
}
-static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -2977,7 +3196,7 @@ static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3007,7 +3226,7 @@ static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3022,7 +3241,7 @@ static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
}
-static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder) {
unsigned imm = fieldFromInstruction16(Insn, 0, 7);
@@ -3033,7 +3252,7 @@ static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3058,7 +3277,7 @@ static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
return S;
}
-static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder) {
unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2;
unsigned flags = fieldFromInstruction16(Insn, 0, 3);
@@ -3069,29 +3288,29 @@ static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
return MCDisassembler::Success;
}
-static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
unsigned add = fieldFromInstruction32(Insn, 4, 1);
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(add));
return S;
}
-static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- if (!tryAddingSymbolicOperand(Address,
+ if (!tryAddingSymbolicOperand(Address,
(Address & ~2u) + SignExtend32<22>(Val << 1) + 4,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
if (Val == 0xA || Val == 0xB)
return MCDisassembler::Fail;
@@ -3101,7 +3320,7 @@ static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val,
}
static DecodeStatus
-DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn,
+DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3117,7 +3336,7 @@ DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn,
}
static DecodeStatus
-DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3159,7 +3378,7 @@ DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn,
// Decode a shifted immediate operand. These basically consist
// of an 8-bit value, and a 4-bit directive that specifies either
// a splat operation or a rotation.
-static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
unsigned ctrl = fieldFromInstruction32(Val, 10, 2);
if (ctrl == 0) {
@@ -3191,19 +3410,23 @@ static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
}
static DecodeStatus
-DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val,
+DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder){
- Inst.addOperand(MCOperand::CreateImm(Val << 1));
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<8>(Val<<1) + 4,
+ true, 2, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<8>(Val << 1)));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder){
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4,
+ true, 4, Inst, Decoder))
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
switch (Val) {
default:
@@ -3223,14 +3446,14 @@ static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
if (!Val) return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(Val));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3253,7 +3476,7 @@ static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
}
-static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder){
DecodeStatus S = MCDisassembler::Success;
@@ -3280,7 +3503,7 @@ static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3305,7 +3528,7 @@ static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3333,7 +3556,7 @@ static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
}
-static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3358,7 +3581,7 @@ static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3383,7 +3606,7 @@ static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3442,7 +3665,7 @@ static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3500,7 +3723,7 @@ static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
}
-static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3567,7 +3790,7 @@ static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3631,7 +3854,7 @@ static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
}
-static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3701,7 +3924,7 @@ static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3765,7 +3988,7 @@ static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
}
-static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3839,7 +4062,7 @@ static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3904,7 +4127,7 @@ static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
@@ -3930,7 +4153,7 @@ static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
@@ -3956,7 +4179,7 @@ static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned pred = fieldFromInstruction16(Insn, 4, 4);
@@ -3983,7 +4206,7 @@ static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn,
}
static DecodeStatus
-DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4020,7 +4243,7 @@ DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
}
static DecodeStatus
-DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4054,7 +4277,7 @@ DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn,
+static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
uint64_t Address, const void *Decoder) {
unsigned sign1 = fieldFromInstruction32(Insn, 21, 1);
unsigned sign2 = fieldFromInstruction32(Insn, 23, 1);
@@ -4069,7 +4292,7 @@ static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val,
+static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
uint64_t Address,
const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4080,3 +4303,109 @@ static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val,
return S;
}
+static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+ if (pred == 0xF)
+ return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
+
+ DecodeStatus S = MCDisassembler::Success;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
+ Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
+ unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
+ Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
+ unsigned imm = fieldFromInstruction32(Insn, 16, 6);
+ unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // VMOVv2f32 is ambiguous with these decodings.
+ if (!(imm & 0x38) && cmode == 0xF) {
+ Inst.setOpcode(ARM::VMOVv2f32);
+ return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail);
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(64 - imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
+ Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
+ unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
+ Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
+ unsigned imm = fieldFromInstruction32(Insn, 16, 6);
+ unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // VMOVv4f32 is ambiguous with these decodings.
+ if (!(imm & 0x38) && cmode == 0xF) {
+ Inst.setOpcode(ARM::VMOVv4f32);
+ return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail);
+
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Vm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(64 - imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction32(Val, 16, 4);
+ unsigned Rt = fieldFromInstruction32(Val, 12, 4);
+ unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+ Rm |= (fieldFromInstruction32(Val, 23, 1) << 4);
+ unsigned Cond = fieldFromInstruction32(Val, 28, 4);
+
+ if (fieldFromInstruction32(Val, 8, 4) != 0 || Rn == Rt)
+ S = MCDisassembler::SoftFail;
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePostIdxReg(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, Cond, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
diff --git a/lib/Target/ARM/Disassembler/CMakeLists.txt b/lib/Target/ARM/Disassembler/CMakeLists.txt
index da87751150e8..9de6e5c511bd 100644
--- a/lib/Target/ARM/Disassembler/CMakeLists.txt
+++ b/lib/Target/ARM/Disassembler/CMakeLists.txt
@@ -11,11 +11,3 @@ set_property(
)
endif()
add_dependencies(LLVMARMDisassembler ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMDisassembler
- LLVMARMCodeGen
- LLVMARMDesc
- LLVMARMInfo
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..52d833893270
--- /dev/null
+++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/Disassembler/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMDisassembler
+parent = ARM
+required_libraries = ARMDesc ARMInfo MC Support
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index ccdac3ebeb47..b3eeafe08314 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -18,11 +18,11 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-#define GET_INSTRUCTION_NAME
#include "ARMGenAsmWriter.inc"
/// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing.
@@ -36,16 +36,14 @@ static unsigned translateShiftImm(unsigned imm) {
ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) :
- MCInstPrinter(MAI) {
+ MCInstPrinter(MAI, MII, MRI) {
// Initialize the set of available features.
setAvailableFeatures(STI.getFeatureBits());
}
-StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const {
- return getInstructionName(Opcode);
-}
-
void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << getRegisterName(RegNo);
}
@@ -101,7 +99,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
// A8.6.123 PUSH
if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP) {
+ MI->getOperand(0).getReg() == ARM::SP &&
+ MI->getNumOperands() > 5) {
+ // Should only print PUSH if there are at least two registers in the list.
O << '\t' << "push";
printPredicateOperand(MI, 2, O);
if (Opcode == ARM::t2STMDB_UPD)
@@ -122,7 +122,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
// A8.6.122 POP
if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP) {
+ MI->getOperand(0).getReg() == ARM::SP &&
+ MI->getNumOperands() > 5) {
+ // Should only print POP if there are at least two registers in the list.
O << '\t' << "pop";
printPredicateOperand(MI, 2, O);
if (Opcode == ARM::t2LDMIA_UPD)
@@ -250,7 +252,7 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
if (ShOpc == ARM_AM::rrx)
return;
-
+
O << ' ' << getRegisterName(MO2.getReg());
assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
}
@@ -433,6 +435,12 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ if (!MO1.isReg()) { // For label symbolic references.
+ printOperand(MI, Op, O);
+ return;
+ }
+
const MCOperand &MO3 = MI->getOperand(Op+2);
unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm());
@@ -636,7 +644,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
if (getAvailableFeatures() & ARM::FeatureMClass) {
switch (Op.getImm()) {
- default: assert(0 && "Unexpected mask value!");
+ default: llvm_unreachable("Unexpected mask value!");
case 0: O << "apsr"; return;
case 1: O << "iapsr"; return;
case 2: O << "eapsr"; return;
@@ -659,12 +667,11 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) {
O << "APSR_";
switch (Mask) {
- default: assert(0);
+ default: llvm_unreachable("Unexpected mask value!");
case 4: O << "g"; return;
case 8: O << "nzcvq"; return;
case 12: O << "nzcvqg"; return;
}
- llvm_unreachable("Unexpected mask value!");
}
if (SpecRegRBit)
@@ -684,7 +691,10 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
- if (CC != ARMCC::AL)
+ // Handle the undefined 15 CC value here for printing so we don't abort().
+ if ((unsigned)CC == 15)
+ O << "<und>";
+ else if (CC != ARMCC::AL)
O << ARMCondCodeToString(CC);
}
@@ -882,6 +892,11 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ if (!MO1.isReg()) { // For label symbolic references.
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
O << "[" << getRegisterName(MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm() / 4;
@@ -963,7 +978,8 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
unsigned EncodedImm = MI->getOperand(OpNum).getImm();
unsigned EltBits;
uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
- O << "#0x" << utohexstr(Val);
+ O << "#0x";
+ O.write_hex(Val);
}
void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
@@ -986,7 +1002,153 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
}
}
+void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "#" << 16 - MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "#" << 32 - MI->getOperand(OpNum).getImm();
+}
+
void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
O << "[" << MI->getOperand(OpNum).getImm() << "]";
}
+
+void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "}";
+}
+
+void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+ O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+ O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
+}
+
+void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
+}
+
+void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}";
+}
+
+void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+ O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+ O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "}";
+}
+
+void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "}";
+}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 5c2173fcde62..8acb7eef019b 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===//
+//===- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -23,14 +23,12 @@ class MCOperand;
class ARMInstPrinter : public MCInstPrinter {
public:
- ARMInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI);
+ ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
- virtual StringRef getOpcodeName(unsigned Opcode) const;
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- static const char *getInstructionName(unsigned Opcode);
-
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
@@ -128,7 +126,33 @@ public:
void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
};
} // end namespace llvm
diff --git a/lib/Target/ARM/InstPrinter/CMakeLists.txt b/lib/Target/ARM/InstPrinter/CMakeLists.txt
index fa0b4957ccde..e2d4819b4b4a 100644
--- a/lib/Target/ARM/InstPrinter/CMakeLists.txt
+++ b/lib/Target/ARM/InstPrinter/CMakeLists.txt
@@ -5,8 +5,3 @@ add_llvm_library(LLVMARMAsmPrinter
)
add_dependencies(LLVMARMAsmPrinter ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMAsmPrinter
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/InstPrinter/LLVMBuild.txt b/lib/Target/ARM/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..6f4fa365358c
--- /dev/null
+++ b/lib/Target/ARM/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMAsmPrinter
+parent = ARM
+required_libraries = MC Support
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt
new file mode 100644
index 000000000000..fd4b3a33de1a
--- /dev/null
+++ b/lib/Target/ARM/LLVMBuild.txt
@@ -0,0 +1,35 @@
+;===- ./lib/Target/ARM/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = ARM
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_disassembler = 1
+has_jit = 1
+
+[component_1]
+type = Library
+name = ARMCodeGen
+parent = ARM
+required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index 9982fa68a578..62473b2bfdee 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -1,4 +1,4 @@
-//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
+//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,6 +16,7 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
@@ -43,7 +44,7 @@ namespace ARM_AM {
static inline const char *getShiftOpcStr(ShiftOpc Op) {
switch (Op) {
- default: assert(0 && "Unknown shift opc!");
+ default: llvm_unreachable("Unknown shift opc!");
case ARM_AM::asr: return "asr";
case ARM_AM::lsl: return "lsl";
case ARM_AM::lsr: return "lsr";
@@ -54,7 +55,7 @@ namespace ARM_AM {
static inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
switch (Op) {
- default: assert(0 && "Unknown shift opc!");
+ default: llvm_unreachable("Unknown shift opc!");
case ARM_AM::asr: return 2;
case ARM_AM::lsl: return 0;
case ARM_AM::lsr: return 1;
@@ -72,7 +73,7 @@ namespace ARM_AM {
static inline const char *getAMSubModeStr(AMSubMode Mode) {
switch (Mode) {
- default: assert(0 && "Unknown addressing sub-mode!");
+ default: llvm_unreachable("Unknown addressing sub-mode!");
case ARM_AM::ia: return "ia";
case ARM_AM::ib: return "ib";
case ARM_AM::da: return "da";
@@ -569,7 +570,7 @@ namespace ARM_AM {
}
EltBits = 64;
} else {
- assert(false && "Unsupported NEON immediate");
+ llvm_unreachable("Unsupported NEON immediate");
}
return Val;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index c31c5e6b8452..d10bfc104a3c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -11,17 +11,18 @@
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
@@ -31,8 +32,8 @@ using namespace llvm;
namespace {
class ARMELFObjectWriter : public MCELFObjectTargetWriter {
public:
- ARMELFObjectWriter(Triple::OSType OSType)
- : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSType, ELF::EM_ARM,
+ ARMELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM,
/*HasRelocationAddend*/ false) {}
};
@@ -60,15 +61,16 @@ public:
// ARMFixupKinds.h.
//
// Name Offset (bits) Size (bits) Flags
-{ "fixup_arm_ldst_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_pcrel_10", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{ "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_adr_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{ "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
@@ -76,6 +78,9 @@ public:
{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
@@ -100,13 +105,50 @@ public:
return Infos[Kind - FirstTargetFixupKind];
}
- bool MayNeedRelaxation(const MCInst &Inst) const;
+ /// processFixupValue - Target hook to process the literal value of a fixup
+ /// if necessary.
+ void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ MCValue &Target, uint64_t &Value,
+ bool &IsResolved) {
+ const MCSymbolRefExpr *A = Target.getSymA();
+ // Some fixups to thumb function symbols need the low bit (thumb bit)
+ // twiddled.
+ if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
+ (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
+ if (A) {
+ const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+ if (Asm.isThumbFunc(&Sym))
+ Value |= 1;
+ }
+ }
+ // We must always generate a relocation for BL/BLX instructions if we have
+ // a symbol to reference, as the linker relies on knowing the destination
+ // symbol's thumb-ness to get interworking right.
+ if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_blx ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl))
+ IsResolved = false;
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst) const;
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const;
- void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
- bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
- void HandleAssemblerFlag(MCAssemblerFlag Flag) {
+ void handleAssemblerFlag(MCAssemblerFlag Flag) {
switch (Flag) {
default: break;
case MCAF_Code16:
@@ -124,21 +166,81 @@ public:
};
} // end anonymous namespace
-bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
- // FIXME: Thumb targets, different move constant targets..
+static unsigned getRelaxedOpcode(unsigned Op) {
+ switch (Op) {
+ default: return Op;
+ case ARM::tBcc: return ARM::t2Bcc;
+ case ARM::tLDRpciASM: return ARM::t2LDRpci;
+ case ARM::tADR: return ARM::t2ADR;
+ case ARM::tB: return ARM::t2B;
+ }
+}
+
+bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
+ if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode())
+ return true;
return false;
}
-void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
- assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented");
- return;
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ switch ((unsigned)Fixup.getKind()) {
+ case ARM::fixup_arm_thumb_br: {
+ // Relaxing tB to t2B. tB has a signed 12-bit displacement with the
+ // low bit being an implied zero. There's an implied +4 offset for the
+ // branch, so we adjust the other way here to determine what's
+ // encodable.
+ //
+ // Relax if the value is too big for a (signed) i8.
+ int64_t Offset = int64_t(Value) - 4;
+ return Offset > 2046 || Offset < -2048;
+ }
+ case ARM::fixup_arm_thumb_bcc: {
+ // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
+ // low bit being an implied zero. There's an implied +4 offset for the
+ // branch, so we adjust the other way here to determine what's
+ // encodable.
+ //
+ // Relax if the value is too big for a (signed) i8.
+ int64_t Offset = int64_t(Value) - 4;
+ return Offset > 254 || Offset < -256;
+ }
+ case ARM::fixup_thumb_adr_pcrel_10:
+ case ARM::fixup_arm_thumb_cp: {
+ // If the immediate is negative, greater than 1020, or not a multiple
+ // of four, the wide version of the instruction must be used.
+ int64_t Offset = int64_t(Value) - 4;
+ return Offset > 1020 || Offset < 0 || Offset & 3;
+ }
+ }
+ llvm_unreachable("Unexpected fixup kind in fixupNeedsRelaxation()!");
+}
+
+void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
+
+ // Sanity check w/ diagnostic if we get here w/ a bogus instruction.
+ if (RelaxedOp == Inst.getOpcode()) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ Inst.dump_pretty(OS);
+ OS << "\n";
+ report_fatal_error("unexpected instruction to relax: " + OS.str());
+ }
+
+ // The instructions we're relaxing have (so far) the same operands.
+ // We just need to update to the proper opcode.
+ Res = Inst;
+ Res.setOpcode(RelaxedOp);
}
-bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0
- const uint32_t ARMv6T2_NopEncoding = 0xe3207800; // NOP
+ const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
if (isThumb()) {
const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
: Thumb1_16bitNopEncoding;
@@ -269,6 +371,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case ARM::fixup_arm_condbranch:
case ARM::fixup_arm_uncondbranch:
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_blx:
// These values don't encode the low two bits since they're always zero.
// Offset by 8 just as above.
return 0xffffff & ((Value - 8) >> 2);
@@ -359,6 +464,19 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case ARM::fixup_arm_thumb_bcc:
// Offset by 4 and don't encode the lower bit, which is always 0.
return ((Value - 4) >> 1) & 0xff;
+ case ARM::fixup_arm_pcrel_10_unscaled: {
+ Value = Value - 8; // ARM fixups offset by an additional word and don't
+ // need to adjust for the half-word ordering.
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
+ assert ((Value < 256) && "Out of range pc-relative fixup value!");
+ Value = (Value & 0xf) | ((Value & 0xf0) << 4);
+ return Value | (isAdd << 23);
+ }
case ARM::fixup_arm_pcrel_10:
Value = Value - 4; // ARM fixups offset by an additional word and don't
// need to adjust for the half-word ordering.
@@ -376,8 +494,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
assert ((Value < 256) && "Out of range pc-relative fixup value!");
Value |= isAdd << 23;
- // Same addressing mode as fixup_arm_pcrel_10,
- // but with 16-bit halfwords swapped.
+ // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
+ // swapped.
if (Kind == ARM::fixup_t2_pcrel_10) {
uint32_t swapped = (Value & 0xFFFF0000) >> 16;
swapped |= (Value & 0x0000FFFF) << 16;
@@ -395,22 +513,21 @@ namespace {
// ELF is an ELF of course...
class ELFARMAsmBackend : public ARMAsmBackend {
public:
- Triple::OSType OSType;
+ uint8_t OSABI;
ELFARMAsmBackend(const Target &T, const StringRef TT,
- Triple::OSType _OSType)
- : ARMAsmBackend(T, TT), OSType(_OSType) { }
+ uint8_t _OSABI)
+ : ARMAsmBackend(T, TT), OSABI(_OSABI) { }
- void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const;
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createELFObjectWriter(new ARMELFObjectWriter(OSType), OS,
- /*IsLittleEndian*/ true);
+ return createARMELFObjectWriter(OS, OSABI);
}
};
// FIXME: Raise this to share code between Darwin and ELF.
-void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value) const {
unsigned NumBytes = 4; // FIXME: 2 for Thumb
Value = adjustFixupValue(Fixup.getKind(), Value);
@@ -439,7 +556,7 @@ public:
Subtype);
}
- void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const;
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
@@ -464,9 +581,13 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case ARM::fixup_arm_thumb_cb:
return 2;
+ case ARM::fixup_arm_pcrel_10_unscaled:
case ARM::fixup_arm_ldst_pcrel_12:
case ARM::fixup_arm_pcrel_10:
case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_blx:
case ARM::fixup_arm_condbranch:
case ARM::fixup_arm_uncondbranch:
return 3;
@@ -491,7 +612,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
}
}
-void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
Value = adjustFixupValue(Fixup.getKind(), Value);
@@ -527,5 +648,6 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) {
if (TheTriple.isOSWindows())
assert(0 && "Windows not supported on ARM");
- return new ELFARMAsmBackend(T, TT, Triple(TT).getOS());
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+ return new ELFARMAsmBackend(T, TT, OSABI);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index ec4b6ffcfe83..ae11be888137 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -67,7 +67,6 @@ namespace ARMCC {
inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
switch (CC) {
- default: llvm_unreachable("Unknown condition code");
case ARMCC::EQ: return "eq";
case ARMCC::NE: return "ne";
case ARMCC::HS: return "hs";
@@ -84,6 +83,7 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
case ARMCC::LE: return "le";
case ARMCC::AL: return "al";
}
+ llvm_unreachable("Unknown condition code");
}
namespace ARM_PROC {
@@ -185,6 +185,39 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) {
case S29: case D29: return 29;
case S30: case D30: return 30;
case S31: case D31: return 31;
+
+ // Composite registers use the regnum of the first register in the list.
+ /* Q0 */ case D0_D2: return 0;
+ case D1_D2: case D1_D3: return 1;
+ /* Q1 */ case D2_D4: return 2;
+ case D3_D4: case D3_D5: return 3;
+ /* Q2 */ case D4_D6: return 4;
+ case D5_D6: case D5_D7: return 5;
+ /* Q3 */ case D6_D8: return 6;
+ case D7_D8: case D7_D9: return 7;
+ /* Q4 */ case D8_D10: return 8;
+ case D9_D10: case D9_D11: return 9;
+ /* Q5 */ case D10_D12: return 10;
+ case D11_D12: case D11_D13: return 11;
+ /* Q6 */ case D12_D14: return 12;
+ case D13_D14: case D13_D15: return 13;
+ /* Q7 */ case D14_D16: return 14;
+ case D15_D16: case D15_D17: return 15;
+ /* Q8 */ case D16_D18: return 16;
+ case D17_D18: case D17_D19: return 17;
+ /* Q9 */ case D18_D20: return 18;
+ case D19_D20: case D19_D21: return 19;
+ /* Q10 */ case D20_D22: return 20;
+ case D21_D22: case D21_D23: return 21;
+ /* Q11 */ case D22_D24: return 22;
+ case D23_D24: case D23_D25: return 23;
+ /* Q12 */ case D24_D26: return 24;
+ case D25_D26: case D25_D27: return 25;
+ /* Q13 */ case D26_D28: return 26;
+ case D27_D28: case D27_D29: return 27;
+ /* Q14 */ case D28_D30: return 28;
+ case D29_D30: case D29_D31: return 29;
+ /* Q15 */
}
}
@@ -237,7 +270,6 @@ namespace ARMII {
inline static const char *AddrModeToString(AddrMode addrmode) {
switch (addrmode) {
- default: llvm_unreachable("Unknown memory operation");
case AddrModeNone: return "AddrModeNone";
case AddrMode1: return "AddrMode1";
case AddrMode2: return "AddrMode2";
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
new file mode 100644
index 000000000000..aa649badaf82
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -0,0 +1,283 @@
+//===-- ARMELFObjectWriter.cpp - ARM ELF Writer ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCValue.h"
+
+using namespace llvm;
+
+namespace {
+ class ARMELFObjectWriter : public MCELFObjectTargetWriter {
+ enum { DefaultEABIVersion = 0x05000000U };
+ unsigned GetRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+
+
+ public:
+ ARMELFObjectWriter(uint8_t OSABI);
+
+ virtual ~ARMELFObjectWriter();
+
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const;
+ virtual unsigned getEFlags() const;
+ virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+ };
+}
+
+ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI,
+ ELF::EM_ARM,
+ /*HasRelocationAddend*/ false) {}
+
+ARMELFObjectWriter::~ARMELFObjectWriter() {}
+
+// FIXME: get the real EABI Version from the Triple.
+unsigned ARMELFObjectWriter::getEFlags() const {
+ return ELF::EF_ARM_EABIMASK & DefaultEABIVersion;
+}
+
+// In ARM, _MergedGlobals and other most symbols get emitted directly.
+// I.e. not as an offset to a section symbol.
+// This code is an approximation of what ARM/gcc does.
+
+STATISTIC(PCRelCount, "Total number of PIC Relocations");
+STATISTIC(NonPCRelCount, "Total number of non-PIC relocations");
+
+const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol();
+ bool EmitThisSym = false;
+
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(Symbol.getSection());
+ bool InNormalSection = true;
+ unsigned RelocType = 0;
+ RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel);
+
+ DEBUG(
+ const MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind();
+ MCSymbolRefExpr::VariantKind Kind2;
+ Kind2 = Target.getSymB() ? Target.getSymB()->getKind() :
+ MCSymbolRefExpr::VK_None;
+ dbgs() << "considering symbol "
+ << Section.getSectionName() << "/"
+ << Symbol.getName() << "/"
+ << " Rel:" << (unsigned)RelocType
+ << " Kind: " << (int)Kind << "/" << (int)Kind2
+ << " Tmp:"
+ << Symbol.isAbsolute() << "/" << Symbol.isDefined() << "/"
+ << Symbol.isVariable() << "/" << Symbol.isTemporary()
+ << " Counts:" << PCRelCount << "/" << NonPCRelCount << "\n");
+
+ if (IsPCRel) { ++PCRelCount;
+ switch (RelocType) {
+ default:
+ // Most relocation types are emitted as explicit symbols
+ InNormalSection =
+ StringSwitch<bool>(Section.getSectionName())
+ .Case(".data.rel.ro.local", false)
+ .Case(".data.rel", false)
+ .Case(".bss", false)
+ .Default(true);
+ EmitThisSym = true;
+ break;
+ case ELF::R_ARM_ABS32:
+ // But things get strange with R_ARM_ABS32
+ // In this case, most things that go in .rodata show up
+ // as section relative relocations
+ InNormalSection =
+ StringSwitch<bool>(Section.getSectionName())
+ .Case(".data.rel.ro.local", false)
+ .Case(".data.rel", false)
+ .Case(".rodata", false)
+ .Case(".bss", false)
+ .Default(true);
+ EmitThisSym = false;
+ break;
+ }
+ } else {
+ NonPCRelCount++;
+ InNormalSection =
+ StringSwitch<bool>(Section.getSectionName())
+ .Case(".data.rel.ro.local", false)
+ .Case(".rodata", false)
+ .Case(".data.rel", false)
+ .Case(".bss", false)
+ .Default(true);
+
+ switch (RelocType) {
+ default: EmitThisSym = true; break;
+ case ELF::R_ARM_ABS32: EmitThisSym = false; break;
+ }
+ }
+
+ if (EmitThisSym)
+ return &Symbol;
+ if (! Symbol.isTemporary() && InNormalSection) {
+ return &Symbol;
+ }
+ return NULL;
+}
+
+// Need to examine the Fixup when determining whether to
+// emit the relocation as an explicit symbol or as a section relative
+// offset
+unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ return GetRelocTypeInner(Target, Fixup, IsPCRel);
+}
+
+unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+ unsigned Type = 0;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("Unimplemented");
+ case FK_Data_4:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_REL32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSGD:
+ llvm_unreachable("unimplemented");
+ case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+ Type = ELF::R_ARM_TLS_IE32;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_blx:
+ case ARM::fixup_arm_uncondbranch:
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_ARM_PLT:
+ Type = ELF::R_ARM_PLT32;
+ break;
+ default:
+ Type = ELF::R_ARM_CALL;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_condbranch:
+ Type = ELF::R_ARM_JUMP24;
+ break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ Type = ELF::R_ARM_MOVT_PREL;
+ break;
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ Type = ELF::R_ARM_MOVW_PREL_NC;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ Type = ELF::R_ARM_THM_MOVT_PREL;
+ break;
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ Type = ELF::R_ARM_THM_MOVW_PREL_NC;
+ break;
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ Type = ELF::R_ARM_THM_CALL;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_4:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_ARM_GOT:
+ Type = ELF::R_ARM_GOT_BREL;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSGD:
+ Type = ELF::R_ARM_TLS_GD32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TPOFF:
+ Type = ELF::R_ARM_TLS_LE32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+ Type = ELF::R_ARM_TLS_IE32;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_ABS32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOTOFF:
+ Type = ELF::R_ARM_GOTOFF32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TARGET1:
+ Type = ELF::R_ARM_TARGET1;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_cb:
+ case ARM::fixup_arm_thumb_cp:
+ case ARM::fixup_arm_thumb_br:
+ llvm_unreachable("Unimplemented");
+ case ARM::fixup_arm_uncondbranch:
+ Type = ELF::R_ARM_CALL;
+ break;
+ case ARM::fixup_arm_condbranch:
+ Type = ELF::R_ARM_JUMP24;
+ break;
+ case ARM::fixup_arm_movt_hi16:
+ Type = ELF::R_ARM_MOVT_ABS;
+ break;
+ case ARM::fixup_arm_movw_lo16:
+ Type = ELF::R_ARM_MOVW_ABS_NC;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ Type = ELF::R_ARM_THM_MOVT_ABS;
+ break;
+ case ARM::fixup_t2_movw_lo16:
+ Type = ELF::R_ARM_THM_MOVW_ABS_NC;
+ break;
+ }
+ }
+
+ return Type;
+}
+
+MCObjectWriter *llvm::createARMELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 350c92decdce..0085feb82069 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -1,4 +1,4 @@
-//===-- ARM/ARMFixupKinds.h - ARM Specific Fixup Entries --------*- C++ -*-===//
+//===-- ARMFixupKinds.h - ARM Specific Fixup Entries ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -23,6 +23,9 @@ enum Fixups {
// the 16-bit halfwords reordered.
fixup_t2_ldst_pcrel_12,
+ // fixup_arm_pcrel_10_unscaled - 10-bit PC relative relocation for symbol
+ // addresses used in LDRD/LDRH/LDRB/etc. instructions. All bits are encoded.
+ fixup_arm_pcrel_10_unscaled,
// fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
// used in VFP instructions where the lower 2 bits are not encoded
// (so it's encoded as an 8-bit immediate).
@@ -56,6 +59,25 @@ enum Fixups {
// fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
fixup_arm_thumb_br,
+ // The following fixups handle the ARM BL instructions. These can be
+ // conditionalised; however, the ARM ELF ABI requires a different relocation
+ // in that case: R_ARM_JUMP24 instead of R_ARM_CALL. The difference is that
+ // R_ARM_CALL is allowed to change the instruction to a BLX inline, which has
+ // no conditional version; R_ARM_JUMP24 would have to insert a veneer.
+ //
+ // MachO does not draw a distinction between the two cases, so it will treat
+ // fixup_arm_uncondbl and fixup_arm_condbl as identical fixups.
+
+ // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions.
+ fixup_arm_uncondbl,
+
+ // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial
+ // conditionalisation.
+ fixup_arm_condbl,
+
+ // fixup_arm_blx - Fixup for ARM BLX instructions.
+ fixup_arm_blx,
+
// fixup_arm_thumb_bl - Fixup for Thumb BL instructions.
fixup_arm_thumb_bl,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 1c109e015280..03e8d5f83ae7 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMCAsmInfo.cpp - ARM asm properties -------------------*- C++ -*-===//
+//===-- ARMMCAsmInfo.cpp - ARM asm properties -----------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -48,6 +48,8 @@ static const char *const arm_asm_table[] = {
0,0
};
+void ARMMCAsmInfoDarwin::anchor() { }
+
ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
AsmTransCBE = arm_asm_table;
Data64bitsDirective = 0;
@@ -61,6 +63,8 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
ExceptionsType = ExceptionHandling::SjLj;
}
+void ARMELFMCAsmInfo::anchor() { }
+
ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 90f7822ea580..f0b289c6f3b6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- ARMMCAsmInfo.h - ARM asm properties -------------*- C++ -*--====//
+//===-- ARMMCAsmInfo.h - ARM asm properties --------------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,11 +18,15 @@
namespace llvm {
- struct ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+ public:
explicit ARMMCAsmInfoDarwin();
};
- struct ARMELFMCAsmInfo : public MCAsmInfo {
+ class ARMELFMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
explicit ARMELFMCAsmInfo();
};
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 865c3e22b842..10d1c48876ed 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -64,7 +64,7 @@ public:
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
- unsigned getBinaryCodeForInstr(const MCInst &MI,
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups) const;
/// getMachineOpValue - Return binary encoding of operand. If the machine
@@ -118,8 +118,10 @@ public:
/// branch target.
uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
uint32_t getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups) const;
/// getAdrLabelOpValue - Return encoding info for 12-bit immediate
/// ADR label target.
@@ -166,7 +168,7 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const {
ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm();
switch (Mode) {
- default: assert(0 && "Unknown addressing sub-mode!");
+ default: llvm_unreachable("Unknown addressing sub-mode!");
case ARM_AM::da: return 0;
case ARM_AM::ia: return 1;
case ARM_AM::db: return 2;
@@ -177,7 +179,6 @@ public:
///
unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
switch (ShOpc) {
- default: llvm_unreachable("Unknown shift opc!");
case ARM_AM::no_shift:
case ARM_AM::lsl: return 0;
case ARM_AM::lsr: return 1;
@@ -185,7 +186,7 @@ public:
case ARM_AM::ror:
case ARM_AM::rrx: return 3;
}
- return 0;
+ llvm_unreachable("Invalid ShiftOpc!");
}
/// getAddrMode2OpValue - Return encoding for addrmode2 operands.
@@ -423,7 +424,6 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
}
llvm_unreachable("Unable to encode MCOperand!");
- return 0;
}
/// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand.
@@ -466,7 +466,7 @@ static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
assert(MO.isExpr() && "Unexpected branch target type!");
const MCExpr *Expr = MO.getExpr();
MCFixupKind Kind = MCFixupKind(FixupKind);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
// All of the information is in the fixup.
return 0;
@@ -594,17 +594,26 @@ getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
}
uint32_t ARMMCCodeEmitter::
-getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand MO = MI.getOperand(OpIdx);
if (MO.isExpr()) {
if (HasConditionalBranch(MI))
- return ::getBranchTargetOpValue(MI, OpIdx,
- ARM::fixup_arm_condbranch, Fixups);
- return ::getBranchTargetOpValue(MI, OpIdx,
- ARM::fixup_arm_uncondbranch, Fixups);
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbl, Fixups);
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups);
}
+ return MO.getImm() >> 2;
+}
+
+uint32_t ARMMCCodeEmitter::
+getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_blx, Fixups);
+
return MO.getImm() >> 1;
}
@@ -718,12 +727,13 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
else
Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
++MCNumCPRelocations;
} else {
Reg = ARM::PC;
int32_t Offset = MO.getImm();
+ // FIXME: Handle #-0.
if (Offset < 0) {
Offset *= -1;
isAdd = false;
@@ -791,8 +801,8 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
assert(MO.isExpr() && "Unexpected machine operand type!");
const MCExpr *Expr = MO.getExpr();
- MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
++MCNumCPRelocations;
} else
@@ -833,7 +843,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
// but this is good enough for now.
static bool EvaluateAsPCRel(const MCExpr *Expr) {
switch (Expr->getKind()) {
- default: assert(0 && "Unexpected expression type");
+ default: llvm_unreachable("Unexpected expression type");
case MCExpr::SymbolRef: return false;
case MCExpr::Binary: return true;
}
@@ -857,7 +867,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
MCFixupKind Kind;
switch (ARM16Expr->getKind()) {
- default: assert(0 && "Unsupported ARMFixup");
+ default: llvm_unreachable("Unsupported ARMFixup");
case ARMMCExpr::VK_ARM_HI16:
if (!isTargetDarwin() && EvaluateAsPCRel(E))
Kind = MCFixupKind(isThumb2()
@@ -879,12 +889,11 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
: ARM::fixup_arm_movw_lo16);
break;
}
- Fixups.push_back(MCFixup::Create(0, E, Kind));
+ Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc()));
return 0;
};
llvm_unreachable("Unsupported MCExpr type in MCOperand!");
- return 0;
}
uint32_t ARMMCCodeEmitter::
@@ -993,6 +1002,19 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx+1);
const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+
+ // If The first operand isn't a register, we have a label reference.
+ if (!MO.isReg()) {
+ unsigned Rn = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10_unscaled);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+
+ ++MCNumCPRelocations;
+ return (Rn << 9) | (1 << 13);
+ }
unsigned Rn = getARMRegisterNumbering(MO.getReg());
unsigned Imm = MO2.getImm();
bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
@@ -1066,7 +1088,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
else
Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
++MCNumCPRelocations;
} else {
@@ -1312,8 +1334,8 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
// LDM/STM:
// {15-0} = Bitfield of GPRs.
unsigned Reg = MI.getOperand(Op).getReg();
- bool SPRRegs = llvm::ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
- bool DPRRegs = llvm::ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
+ bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
+ bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
unsigned Binary = 0;
@@ -1372,11 +1394,11 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
switch (Imm.getImm()) {
default: break;
- case 2:
- case 4:
case 8:
- case 16: Align = 0x00; break;
- case 32: Align = 0x03; break;
+ case 16:
+ case 32: // Default '0' value for invalid alignments of 8, 16, 32 bytes.
+ case 2: Align = 0x00; break;
+ case 4: Align = 0x03; break;
}
return RegNo | (Align << 4);
@@ -1412,7 +1434,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(Op);
if (MO.getReg() == 0) return 0x0D;
- return MO.getReg();
+ return getARMRegisterNumbering(MO.getReg());
}
unsigned ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 2727ba8c8aa5..22e14a2281de 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -21,7 +21,7 @@ ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
switch (Kind) {
- default: assert(0 && "Invalid kind!");
+ default: llvm_unreachable("Invalid kind!");
case VK_ARM_HI16: OS << ":upper16:"; break;
case VK_ARM_LO16: OS << ":lower16:"; break;
}
@@ -45,8 +45,7 @@ ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
switch (Value->getKind()) {
case MCExpr::Target:
- assert(0 && "Can't handle nested target expr!");
- break;
+ llvm_unreachable("Can't handle nested target expr!");
case MCExpr::Constant:
break;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 0a2e883deb1d..a727e087d291 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -1,4 +1,4 @@
-//===-- ARMMCExpr.h - ARM specific MC expression classes ------------------===//
+//===-- ARMMCExpr.h - ARM specific MC expression classes --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index a55c41075d40..e3512cda3ae3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions -----------*- C++ -*-===//
+//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -89,14 +89,6 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) {
ARMArchFeature += ",+thumb-mode";
}
- Triple TheTriple(TT);
- if (TheTriple.getOS() == Triple::NativeClient) {
- if (ARMArchFeature.empty())
- ARMArchFeature = "+nacl-mode";
- else
- ARMArchFeature += ",+nacl-mode";
- }
-
return ARMArchFeature;
}
@@ -137,14 +129,15 @@ static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
if (RM == Reloc::Default) {
Triple TheTriple(TT);
// Default relocation model on Darwin is PIC, not DynamicNoPIC.
RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
}
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
@@ -158,22 +151,23 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
Triple TheTriple(TT);
if (TheTriple.isOSDarwin())
- return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, false);
if (TheTriple.isOSWindows()) {
llvm_unreachable("ARM does not support Windows COFF format");
- return NULL;
}
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack);
}
static MCInstPrinter *createARMMCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
- return new ARMInstPrinter(MAI, STI);
+ return new ARMInstPrinter(MAI, MII, MRI, STI);
return 0;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 9b3d3bd32183..88472d7ffc3f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -46,6 +46,10 @@ MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT);
+/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
+MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI);
+
/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
bool Is64Bit,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 352c73e84df1..8057cb6687a6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -13,9 +13,11 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
@@ -32,12 +34,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
MCValue Target,
unsigned Log2Size,
uint64_t &FixedValue);
- void RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue);
+ void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue);
public:
ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
@@ -80,6 +82,9 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
case ARM::fixup_arm_adr_pcrel_12:
case ARM::fixup_arm_condbranch:
case ARM::fixup_arm_uncondbranch:
+ case ARM::fixup_arm_uncondbl:
+ case ARM::fixup_arm_condbl:
+ case ARM::fixup_arm_blx:
RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
// Report as 'long', even though that is not quite accurate.
Log2Size = llvm::Log2_32(4);
@@ -98,34 +103,47 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
Log2Size = llvm::Log2_32(4);
return true;
+ // For movw/movt r_type relocations they always have a pair following them and
+ // the r_length bits are used differently. The encoding of the r_length is as
+ // follows:
+ // low bit of r_length:
+ // 0 - :lower16: for movw instructions
+ // 1 - :upper16: for movt instructions
+ // high bit of r_length:
+ // 0 - arm instructions
+ // 1 - thumb instructions
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 1;
+ return true;
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_HalfDifference);
- // Report as 'long', even though that is not quite accurate.
- Log2Size = llvm::Log2_32(4);
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 3;
return true;
case ARM::fixup_arm_movw_lo16:
case ARM::fixup_arm_movw_lo16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 0;
+ return true;
case ARM::fixup_t2_movw_lo16:
case ARM::fixup_t2_movw_lo16_pcrel:
RelocType = unsigned(macho::RIT_ARM_Half);
- // Report as 'long', even though that is not quite accurate.
- Log2Size = llvm::Log2_32(4);
+ Log2Size = 2;
return true;
}
}
void ARMMachObjectWriter::
-RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup,
- MCValue Target,
- uint64_t &FixedValue) {
+RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Type = macho::RIT_ARM_Half;
@@ -135,7 +153,8 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
MCSymbolData *A_SD = &Asm.getSymbolData(*A);
if (!A_SD->getFragment())
- report_fatal_error("symbol '" + A->getName() +
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
@@ -148,7 +167,8 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
if (!B_SD->getFragment())
- report_fatal_error("symbol '" + B->getSymbol().getName() +
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression");
// Select the appropriate difference relocation type.
@@ -178,9 +198,16 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
MovtBit = 1;
+ // The thumb bit shouldn't be set in the 'other-half' bit of the
+ // relocation, but it will be set in FixedValue if the base symbol
+ // is a thumb function. Clear it out here.
+ if (A_SD->getFlags() & SF_ThumbFunc)
+ FixedValue &= 0xfffffffe;
break;
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
+ if (A_SD->getFlags() & SF_ThumbFunc)
+ FixedValue &= 0xfffffffe;
MovtBit = 1;
// Fallthrough
case ARM::fixup_t2_movw_lo16:
@@ -189,7 +216,6 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
break;
}
-
if (Type == macho::RIT_ARM_HalfDifference) {
uint32_t OtherHalf = MovtBit
? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
@@ -233,7 +259,8 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
MCSymbolData *A_SD = &Asm.getSymbolData(*A);
if (!A_SD->getFragment())
- report_fatal_error("symbol '" + A->getName() +
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
@@ -245,7 +272,8 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
if (!B_SD->getFragment())
- report_fatal_error("symbol '" + B->getSymbol().getName() +
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression");
// Select the appropriate difference relocation type.
@@ -287,19 +315,21 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Log2Size;
unsigned RelocType = macho::RIT_Vanilla;
- if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
- report_fatal_error("unknown ARM fixup kind!");
- return;
- }
+ if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size))
+ // If we failed to get fixup kind info, it's because there's no legal
+ // relocation type for the fixup kind. This happens when it's a fixup that's
+ // expected to always be resolvable at assembly time and not have any
+ // relocations needed.
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "unsupported relocation on symbol");
// If this is a difference or a defined symbol plus an offset, then we need a
// scattered relocation entry. Differences always require scattered
// relocations.
if (Target.getSymB()) {
- if (RelocType == macho::RIT_ARM_Half ||
- RelocType == macho::RIT_ARM_HalfDifference)
- return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, FixedValue);
+ if (RelocType == macho::RIT_ARM_Half)
+ return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment,
+ Fixup, Target, FixedValue);
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
Target, Log2Size, FixedValue);
}
@@ -374,6 +404,30 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
(Log2Size << 25) |
(IsExtern << 27) |
(Type << 28));
+
+ // Even when it's not a scattered relocation, movw/movt always uses
+ // a PAIR relocation.
+ if (Type == macho::RIT_ARM_Half) {
+ // The other-half value only gets populated for the movt relocation.
+ uint32_t Value = 0;;
+ switch ((unsigned)Fixup.getKind()) {
+ default: break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ Value = FixedValue;
+ break;
+ }
+ macho::RelocationEntry MREPair;
+ MREPair.Word0 = Value;
+ MREPair.Word1 = ((0xffffff) |
+ (Log2Size << 25) |
+ (macho::RIT_Pair << 28));
+
+ Writer->addRelocation(Fragment->getParent(), MREPair);
+ }
+
Writer->addRelocation(Fragment->getParent(), MRE);
}
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index adc37cbf582e..256599412e8b 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -1,19 +1,14 @@
add_llvm_library(LLVMARMDesc
ARMAsmBackend.cpp
+ ARMELFObjectWriter.cpp
ARMMCAsmInfo.cpp
ARMMCCodeEmitter.cpp
ARMMCExpr.cpp
ARMMCTargetDesc.cpp
ARMMachObjectWriter.cpp
+ ARMELFObjectWriter.cpp
)
add_dependencies(LLVMARMDesc ARMCommonTableGen)
# Hack: we need to include 'main' target directory to grab private headers
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
-
-add_llvm_library_dependencies(LLVMARMDesc
- LLVMARMInfo
- LLVMARMAsmPrinter
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..2a7fe6188b50
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMDesc
+parent = ARM
+required_libraries = ARMAsmPrinter ARMInfo MC Support
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 2df00538b39f..28998361c7a0 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -1,4 +1,4 @@
-//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=//
+//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -139,7 +139,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
// FIXME: Detect integer instructions properly.
const MCInstrDesc &MCID = MI->getDesc();
unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
- if (MCID.mayStore())
+ if (MI->mayStore())
return false;
unsigned Opcode = MCID.getOpcode();
if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
@@ -222,14 +222,14 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg)
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
.addReg(Src1Reg, getKillRegState(Src1Kill))
.addReg(Src2Reg, getKillRegState(Src2Kill));
if (HasLane)
MIB.addImm(LaneImm);
MIB.addImm(Pred).addReg(PredReg);
- MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2)
+ MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
if (NegAcc) {
@@ -274,7 +274,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
}
const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.isBarrier()) {
+ if (MI->isBarrier()) {
clearStack();
Skip = 0;
++MII;
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 2f6842e8cb60..3eddda812f84 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -501,11 +501,6 @@ those operations and the ARMv6 scalar versions.
//===---------------------------------------------------------------------===//
-ARM::MOVCCr is commutable (by flipping the condition). But we need to implement
-ARMInstrInfo::commuteInstruction() to support it.
-
-//===---------------------------------------------------------------------===//
-
Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting
LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g.
ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg)
@@ -699,3 +694,19 @@ test is equality test so it's more a conditional move rather than a select:
Currently this is a ARM specific dag combine. We probably should make it into a
target-neutral one.
+
+//===---------------------------------------------------------------------===//
+
+Optimize unnecessary checks for zero with __builtin_clz/ctz. Those builtins
+are specified to be undefined at zero, so portable code must check for zero
+and handle it as a special case. That is unnecessary on ARM where those
+operations are implemented in a way that is well-defined for zero. For
+example:
+
+int f(int x) { return x ? __builtin_clz(x) : sizeof(int)*8; }
+
+should just be implemented with a CLZ instruction. Since there are other
+targets, e.g., PPC, that share this behavior, it would be best to implement
+this in a target-independent way: we should probably fold that (when using
+"undefined at zero" semantics) to set the "defined at zero" bit and have
+the code generator expand out the right code.
diff --git a/lib/Target/ARM/TargetInfo/CMakeLists.txt b/lib/Target/ARM/TargetInfo/CMakeLists.txt
index 8b38b136ce64..533e747894ca 100644
--- a/lib/Target/ARM/TargetInfo/CMakeLists.txt
+++ b/lib/Target/ARM/TargetInfo/CMakeLists.txt
@@ -5,9 +5,3 @@ add_llvm_library(LLVMARMInfo
)
add_dependencies(LLVMARMInfo ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/Target/ARM/TargetInfo/LLVMBuild.txt b/lib/Target/ARM/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..a07a94047d4e
--- /dev/null
+++ b/lib/Target/ARM/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMInfo
+parent = ARM
+required_libraries = MC Support Target
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index d8481778c0da..edd73c20c0be 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -1,4 +1,4 @@
-//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====//
+//===-- Thumb1FrameLowering.cpp - Thumb1 Frame Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "Thumb1FrameLowering.h"
-#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -101,7 +100,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R11:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- if (STI.isTargetDarwin()) {
+ if (STI.isTargetIOS()) {
AFI->addGPRCalleeSavedArea2Frame(FI);
GPRCS2Size += 4;
} else {
@@ -175,14 +174,14 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setShouldRestoreSPFromFP(true);
}
-static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
for (unsigned i = 0; CSRegs[i]; ++i)
if (Reg == CSRegs[i])
return true;
return false;
}
-static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
if (MI->getOpcode() == ARM::tLDRspi &&
MI->getOperand(1).isFI() &&
isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
@@ -214,7 +213,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
int NumBytes = (int)MFI->getStackSize();
- const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
if (!AFI->hasStackFrame()) {
@@ -278,8 +277,11 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
- .addReg(ARM::R3, RegState::Kill));
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
+ .addReg(ARM::R3, RegState::Kill);
+ AddDefaultPred(MIB);
+ MIB->copyImplicitOps(&*MBBI);
// erase the old tBX_RET instruction
MBB.erase(MBBI);
}
@@ -350,6 +352,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
continue;
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ MIB->copyImplicitOps(&*MI);
MI = MBB.erase(MI);
}
MIB.addReg(Reg, getDefRegState(true));
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 218311d78d30..e03e75815c7e 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===//
+//===-- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,14 +13,11 @@
#include "Thumb1InstrInfo.h"
#include "ARM.h"
-#include "ARMMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/ADT/SmallVector.h"
-#include "Thumb1InstrInfo.h"
+#include "llvm/MC/MCInst.h"
using namespace llvm;
@@ -28,6 +25,15 @@ Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI(*this, STI) {
}
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(ARM::tMOVr);
+ NopInst.addOperand(MCOperand::CreateReg(ARM::R8));
+ NopInst.addOperand(MCOperand::CreateReg(ARM::R8));
+ NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+}
+
unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
@@ -60,8 +66,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
@@ -89,8 +94,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 17ef2f758ef4..36af20492d4e 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===//
+//===-- Thumb1InstrInfo.h - Thumb-1 Instruction Information -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,9 +14,8 @@
#ifndef THUMB1INSTRUCTIONINFO_H
#define THUMB1INSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "ARM.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
#include "Thumb1RegisterInfo.h"
namespace llvm {
@@ -27,6 +26,9 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo {
public:
explicit Thumb1InstrInfo(const ARMSubtarget &STI);
+ /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+ void getNoopForMachoTarget(MCInst &NopInst) const;
+
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index e8ed482a66fa..ef77bbd21a4e 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===//
+//===-- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,12 +12,11 @@
//
//===----------------------------------------------------------------------===//
+#include "Thumb1RegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
-#include "Thumb1InstrInfo.h"
-#include "Thumb1RegisterInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -28,6 +27,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
@@ -570,6 +570,11 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
// If this instruction affects R12, adjust our restore point.
for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = II->getOperand(i);
+ if (MO.isRegMask() && MO.clobbersPhysReg(ARM::R12)) {
+ UseMI = II;
+ done = true;
+ break;
+ }
if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
@@ -624,6 +629,21 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
FrameReg = BasePtr;
}
+ // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the
+ // call frame setup/destroy instructions have already been eliminated. That
+ // means the stack pointer cannot be used to access the emergency spill slot
+ // when !hasReservedCallFrame().
+#ifndef NDEBUG
+ if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+ assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions without a reserved call frame");
+ assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
+ "Cannot use SP to access the emergency spill slot in "
+ "functions with variable sized frame objects");
+ }
+#endif // NDEBUG
+
// Special handling of dbg_value instructions.
if (MI.isDebugValue()) {
MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
@@ -643,14 +663,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(Offset && "This code isn't needed if offset already handled!");
unsigned Opcode = MI.getOpcode();
- const MCInstrDesc &Desc = MI.getDesc();
// Remove predicate first.
int PIdx = MI.findFirstPredOperandIdx();
if (PIdx != -1)
removeOperands(MI, PIdx);
- if (Desc.mayLoad()) {
+ if (MI.mayLoad()) {
// Use the destination register to materialize sp + offset.
unsigned TmpReg = MI.getOperand(0).getReg();
bool UseRR = false;
@@ -673,7 +692,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
- } else if (Desc.mayStore()) {
+ } else if (MI.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
bool UseRR = false;
@@ -695,11 +714,11 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// register. The offset is already handled in the vreg value.
MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
} else {
- assert(false && "Unexpected opcode!");
+ llvm_unreachable("Unexpected opcode!");
}
// Add predicate back if it's needed.
- if (MI.getDesc().isPredicable()) {
+ if (MI.isPredicable()) {
MachineInstrBuilder MIB(&MI);
AddDefaultPred(MIB);
}
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 9060e59e5980..69718424e735 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -16,13 +16,12 @@
#define THUMB1REGISTERINFO_H
#include "ARM.h"
-#include "ARMRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
- class Type;
struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
public:
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index b6274007b237..ecb4c2f0e5da 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -1,4 +1,4 @@
-//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===//
+//===-- Thumb2ITBlockPass.cpp - Insert Thumb-2 IT blocks ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,6 +13,7 @@
#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
@@ -75,7 +76,7 @@ static void TrackDefUses(MachineInstr *MI,
for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
unsigned Reg = LocalUses[i];
Uses.insert(Reg);
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg)
Uses.insert(*Subreg);
}
@@ -83,7 +84,7 @@ static void TrackDefUses(MachineInstr *MI,
for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
unsigned Reg = LocalDefs[i];
Defs.insert(Reg);
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg)
Defs.insert(*Subreg);
if (Reg == ARM::CPSR)
@@ -141,7 +142,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
// rsb r2, 0
//
const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.hasOptionalDef() &&
+ if (MI->hasOptionalDef() &&
MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR)
return false;
@@ -153,7 +154,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
++I;
if (I != E) {
unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+ ARMCC::CondCodes NCC = getITInstrPredicate(I, NPredReg);
if (NCC == CC || NCC == OCC)
return true;
}
@@ -170,7 +171,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
MachineInstr *MI = &*MBBI;
DebugLoc dl = MI->getDebugLoc();
unsigned PredReg = 0;
- ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
if (CC == ARMCC::AL) {
++MBBI;
continue;
@@ -198,7 +199,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Branches, including tricky ones like LDM_RET, need to end an IT
// block so check the instruction we just put in the block.
for (; MBBI != E && Pos &&
- (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+ (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
if (MBBI->isDebugValue())
continue;
@@ -206,7 +207,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
MI = NMI;
unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg);
+ ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
if (NCC == CC || NCC == OCC) {
Mask |= (NCC & 1) << Pos;
// Add implicit use of ITSTATE.
@@ -237,6 +238,10 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Last instruction in IT block kills ITSTATE.
LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+ // Finalize the bundle.
+ MachineBasicBlock::instr_iterator LI = LastITMI;
+ finalizeBundle(MBB, InsertPos.getInstrIterator(), llvm::next(LI));
+
Modified = true;
++NumITs;
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index cf040c822de9..8ab486b0c1bf 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===//
+//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,13 +15,11 @@
#include "ARM.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
-#include "Thumb2InstrInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -35,6 +33,13 @@ Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI(*this, STI) {
}
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(ARM::tNOP);
+ NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ NopInst.addOperand(MCOperand::CreateReg(0));
+}
+
unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
// FIXME
return 0;
@@ -53,7 +58,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
// If the first instruction of Tail is predicated, we may have to update
// the IT instruction.
unsigned PredReg = 0;
- ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg);
+ ARMCC::CondCodes CC = getInstrPredicate(Tail, PredReg);
MachineBasicBlock::iterator MBBI = Tail;
if (CC != ARMCC::AL)
// Expecting at least the t2IT instruction before it.
@@ -101,7 +106,7 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
}
unsigned PredReg = 0;
- return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
+ return getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
}
void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -130,8 +135,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
@@ -158,8 +162,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
@@ -570,7 +573,7 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
return;
unsigned PredReg = 0;
- ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg);
+ ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg);
if (CC == ARMCC::AL || PredReg != ARM::CPSR)
return;
@@ -586,10 +589,10 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
continue;
MachineInstr *NMI = &*MBBI;
- ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg);
+ ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg);
if (!(NCC == CC || NCC == OCC) ||
NMI->modifiesRegister(SrcReg, &TRI) ||
- NMI->definesRegister(ARM::CPSR))
+ NMI->modifiesRegister(ARM::CPSR, &TRI))
break;
if (++NumInsts == 4)
// Too many in a row!
@@ -607,5 +610,5 @@ llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
unsigned Opc = MI->getOpcode();
if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
return ARMCC::AL;
- return llvm::getInstrPredicate(MI, PredReg);
+ return getInstrPredicate(MI, PredReg);
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index f2637d7fbcab..0911f8a597ce 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===//
+//===-- Thumb2InstrInfo.h - Thumb-2 Instruction Information -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,9 +14,8 @@
#ifndef THUMB2INSTRUCTIONINFO_H
#define THUMB2INSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "ARM.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
#include "Thumb2RegisterInfo.h"
namespace llvm {
@@ -28,6 +27,9 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo {
public:
explicit Thumb2InstrInfo(const ARMSubtarget &STI);
+ /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+ void getNoopForMachoTarget(MCInst &NopInst) const;
+
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 355c3bf0352c..29a87d016227 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information ----*- C++ -*-===//
+//===-- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,10 +12,10 @@
//
//===----------------------------------------------------------------------===//
+#include "Thumb2RegisterInfo.h"
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
-#include "Thumb2InstrInfo.h"
-#include "Thumb2RegisterInfo.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index 824378aeab4e..6b397e869683 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -16,13 +16,12 @@
#define THUMB2REGISTERINFO_H
#include "ARM.h"
-#include "ARMRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
- class Type;
struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
public:
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 89a155c5a7f5..b5a397e61685 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -39,9 +39,9 @@ namespace {
/// ReduceTable - A static table with information on mapping from wide
/// opcodes to narrow
struct ReduceEntry {
- unsigned WideOpc; // Wide opcode
- unsigned NarrowOpc1; // Narrow opcode to transform to
- unsigned NarrowOpc2; // Narrow opcode when it's two-address
+ uint16_t WideOpc; // Wide opcode
+ uint16_t NarrowOpc1; // Narrow opcode to transform to
+ uint16_t NarrowOpc2; // Narrow opcode when it's two-address
uint8_t Imm1Limit; // Limit of immediate field (bits)
uint8_t Imm2Limit; // Limit of immediate field when it's two-address
unsigned LowRegs1 : 1; // Only possible if low-registers are used
@@ -146,7 +146,8 @@ namespace {
/// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
DenseMap<unsigned, unsigned> ReduceOpcodeMap;
- bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use);
+ bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
+ bool IsSelfLoop);
bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
bool is2Addr, ARMCC::CondCodes Pred,
@@ -157,19 +158,21 @@ namespace {
bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry, bool LiveCPSR,
- MachineInstr *CPSRDef);
+ MachineInstr *CPSRDef, bool IsSelfLoop);
/// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
/// instruction.
bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef);
+ bool LiveCPSR, MachineInstr *CPSRDef,
+ bool IsSelfLoop);
/// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
/// non-two-address instruction.
bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef);
+ bool LiveCPSR, MachineInstr *CPSRDef,
+ bool IsSelfLoop);
/// ReduceMBB - Reduce width of instructions in the specified basic block.
bool ReduceMBB(MachineBasicBlock &MBB);
@@ -186,7 +189,7 @@ Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
}
static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
- for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs)
+ for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
if (*Regs == ARM::CPSR)
return true;
return false;
@@ -210,10 +213,17 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
/// In this case it would have been ok to narrow the mul.w to muls since there
/// are indirect RAW dependency between the muls and the mul.w
bool
-Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) {
- if (!Def || !STI->avoidCPSRPartialUpdate())
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
+ bool FirstInSelfLoop) {
+ // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder).
+ if (!STI->avoidCPSRPartialUpdate())
return false;
+ if (!Def)
+ // If this BB loops back to itself, conservatively avoid narrowing the
+ // first instruction that does partial flag update.
+ return FirstInSelfLoop;
+
SmallSet<unsigned, 2> Defs;
for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = Def->getOperand(i);
@@ -442,7 +452,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit load / store instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
if (!isLdStMul) {
MIB.addOperand(MI->getOperand(0));
MIB.addOperand(MI->getOperand(1));
@@ -468,7 +478,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumLdSts;
return true;
}
@@ -476,15 +486,16 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef) {
+ bool LiveCPSR, MachineInstr *CPSRDef,
+ bool IsSelfLoop) {
unsigned Opc = MI->getOpcode();
if (Opc == ARM::t2ADDri) {
// If the source register is SP, try to reduce to tADDrSPi, otherwise
// it's a normal reduce.
if (MI->getOperand(1).getReg() != ARM::SP) {
- if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
return true;
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
}
// Try to reduce to tADDrSPi.
unsigned Imm = MI->getOperand(2).getImm();
@@ -502,7 +513,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
return false;
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(),
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(ARM::tADDrSPi))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
@@ -514,7 +525,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumNarrows;
return true;
}
@@ -522,8 +533,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
if (Entry.LowRegs1 && !VerifyLowRegs(MI))
return false;
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.mayLoad() || MCID.mayStore())
+ if (MI->mayLoad() || MI->mayStore())
return ReduceLoadStore(MBB, MI, Entry);
switch (Opc) {
@@ -535,12 +545,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
switch (Opc) {
default: break;
case ARM::t2ADDSri: {
- if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
return true;
// fallthrough
}
case ARM::t2ADDSrr:
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
}
}
break;
@@ -552,13 +562,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
case ARM::t2UXTB:
case ARM::t2UXTH:
if (MI->getOperand(2).getImm() == 0)
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
break;
case ARM::t2MOVi16:
// Can convert only 'pure' immediate operands, not immediates obtained as
// globals' addresses.
if (MI->getOperand(1).isImm())
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
break;
case ARM::t2CMPrr: {
// Try to reduce to the lo-reg only version first. Why there are two
@@ -568,9 +578,9 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
// source insn opcode. So for now, we hack a local entry record to use.
static const ReduceEntry NarrowEntry =
{ ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
- if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef))
+ if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop))
return true;
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
}
}
return false;
@@ -579,14 +589,32 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef) {
+ bool LiveCPSR, MachineInstr *CPSRDef,
+ bool IsSelfLoop) {
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
- if (Reg0 != Reg1) {
+ // t2MUL is "special". The tied source operand is second, not first.
+ if (MI->getOpcode() == ARM::t2MUL) {
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ // Early exit if the regs aren't all low regs.
+ if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
+ || !isARMLowRegister(Reg2))
+ return false;
+ if (Reg0 != Reg2) {
+ // If the other operand also isn't the same as the destination, we
+ // can't reduce.
+ if (Reg1 != Reg0)
+ return false;
+ // Try to commute the operands to make it a 2-address instruction.
+ MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ if (!CommutedMI)
+ return false;
+ }
+ } else if (Reg0 != Reg1) {
// Try to commute the operands to make it a 2-address instruction.
unsigned CommOpIdx1, CommOpIdx2;
if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
@@ -637,12 +665,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Avoid adding a false dependency on partial flag update by some 16-bit
// instructions which has the 's' bit set.
if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
- canAddPseudoFlagDep(CPSRDef, MI))
+ canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
return false;
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
MIB.addOperand(MI->getOperand(0));
if (NewMCID.hasOptionalDef()) {
if (HasCC)
@@ -666,7 +694,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++Num2Addrs;
return true;
}
@@ -674,7 +702,8 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef) {
+ bool LiveCPSR, MachineInstr *CPSRDef,
+ bool IsSelfLoop) {
if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
return false;
@@ -727,12 +756,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Avoid adding a false dependency on partial flag update by some 16-bit
// instructions which has the 's' bit set.
if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
- canAddPseudoFlagDep(CPSRDef, MI))
+ canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
return false;
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
MIB.addOperand(MI->getOperand(0));
if (NewMCID.hasOptionalDef()) {
if (HasCC)
@@ -772,7 +801,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumNarrows;
return true;
}
@@ -817,13 +846,22 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Yes, CPSR could be livein.
bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
MachineInstr *CPSRDef = 0;
+ MachineInstr *BundleMI = 0;
- MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
- MachineBasicBlock::iterator NextMII;
+ // If this BB loops back to itself, conservatively avoid narrowing the
+ // first instruction that does partial flag update.
+ bool IsSelfLoop = MBB.isSuccessor(&MBB);
+ MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end();
+ MachineBasicBlock::instr_iterator NextMII;
for (; MII != E; MII = NextMII) {
NextMII = llvm::next(MII);
MachineInstr *MI = &*MII;
+ if (MI->isBundle()) {
+ BundleMI = MI;
+ continue;
+ }
+
LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
unsigned Opcode = MI->getOpcode();
@@ -832,9 +870,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
const ReduceEntry &Entry = ReduceTable[OPI->second];
// Ignore "special" cases for now.
if (Entry.Special) {
- if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+ if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
}
goto ProcessNext;
@@ -842,31 +880,46 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Try to transform to a 16-bit two-address instruction.
if (Entry.NarrowOpc2 &&
- ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+ ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
goto ProcessNext;
}
// Try to transform to a 16-bit non-two-address instruction.
if (Entry.NarrowOpc1 &&
- ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+ ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
}
}
ProcessNext:
+ if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) {
+ // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
+ // marker is only on the BUNDLE instruction. Process the BUNDLE
+ // instruction as we finish with the bundled instruction to work around
+ // the inconsistency.
+ if (BundleMI->killsRegister(ARM::CPSR))
+ LiveCPSR = false;
+ MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
+ if (MO && !MO->isDead())
+ LiveCPSR = true;
+ }
+
bool DefCPSR = false;
LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
- if (MI->getDesc().isCall())
+ if (MI->isCall()) {
// Calls don't really set CPSR.
CPSRDef = 0;
- else if (DefCPSR)
+ IsSelfLoop = false;
+ } else if (DefCPSR) {
// This is the last CPSR defining instruction.
CPSRDef = MI;
+ IsSelfLoop = false;
+ }
}
return Modified;