aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
-rw-r--r--contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp13
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td94
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp605
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp470
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h28
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp99
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h27
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h71
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td26
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp6
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp50
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h33
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp75
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFPUName.def32
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFPUName.h26
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp375
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFeatures.h93
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp156
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp10
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h13
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp562
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp1722
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h72
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrFormats.td290
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp34
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td885
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td451
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td263
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td849
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td348
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp115
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h78
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h6
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td84
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSchedule.td18
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA9.td196
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td944
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp120
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h79
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp15
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp113
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp1429
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp817
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp284
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h12
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp53
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h61
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp735
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h27
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h3
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp102
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp93
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h16
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp43
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp129
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp157
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h73
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp36
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp71
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp25
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h6
74 files changed, 9562 insertions, 4220 deletions
diff --git a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
index f0d4dbe2bfb3..ff585b41a2aa 100644
--- a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -615,7 +615,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
bool Modified = false;
- for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end();
+ for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end();
I != E; ++I) {
// Follow the def-use chain for this DPR through COPYs, and also through
// PHIs (which are essentially multi-way COPYs). It is because of PHIs that
@@ -630,7 +630,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
elideCopiesAndPHIs(Def, DefSrcs);
- for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(),
+ for (SmallVectorImpl<MachineInstr *>::iterator II = DefSrcs.begin(),
EE = DefSrcs.end(); II != EE; ++II) {
MachineInstr *MI = *II;
@@ -655,8 +655,15 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
if (NewReg != 0) {
Modified = true;
- for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(),
+ for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
+ // Make sure to constrain the register class of the new register to
+ // match what we're replacing. Otherwise we can optimize a DPR_VFP2
+ // reference into a plain DPR, and that will end poorly. NewReg is
+ // always virtual here, so there will always be a matching subclass
+ // to find.
+ MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg()));
+
DEBUG(dbgs() << "Replacing operand "
<< **I << " with "
<< PrintReg(NewReg) << "\n");
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index 2d7470919dc4..36e5680ca4e0 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -38,12 +38,16 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
"Enable Thumb2 instructions">;
def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
- "Does not support ARM mode execution">;
+ "Does not support ARM mode execution",
+ [ModeThumb]>;
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
"Enable VFP4 instructions",
[FeatureVFP3, FeatureFP16]>;
+def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8",
+ "true", "Enable ARMv8 FP",
+ [FeatureVFP4]>;
def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
"Restrict VFP3 to 16 double registers">;
def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
@@ -59,8 +63,15 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
"FP compare + branch is slow">;
def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
"Floating point unit supports single precision only">;
+def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
+ "Enable support for Performance Monitor extensions">;
def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
"Enable support for TrustZone security extensions">;
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+ "Enable support for Cryptography extensions",
+ [FeatureNEON]>;
+def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
+ "Enable support for CRC instructions">;
// Some processors have FP multiply-accumulate instructions that don't
// play nicely with other VFP / NEON instructions, and it's generally better
@@ -108,10 +119,24 @@ def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
"Supports Multiprocessing extension">;
-// M-series ISA?
-def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true",
+// Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8).
+def FeatureVirtualization : SubtargetFeature<"virtualization",
+ "HasVirtualization", "true",
+ "Supports Virtualization extension",
+ [FeatureHWDiv, FeatureHWDivARM]>;
+
+// M-series ISA
+def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass",
"Is microcontroller profile ('M' series)">;
+// R-series ISA
+def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass",
+ "Is realtime profile ('R' series)">;
+
+// A-series ISA
+def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",
+ "Is application profile ('A' series)">;
+
// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too.
// See ARMInstrInfo.td for details.
def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
@@ -129,12 +154,19 @@ def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true",
def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
"Support ARM v6 instructions",
[HasV5TEOps]>;
+def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true",
+ "Support ARM v6M instructions",
+ [HasV6Ops]>;
def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
"Support ARM v6t2 instructions",
- [HasV6Ops, FeatureThumb2]>;
+ [HasV6MOps, FeatureThumb2]>;
def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
"Support ARM v7 instructions",
- [HasV6T2Ops]>;
+ [HasV6T2Ops, FeaturePerfMon]>;
+def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
+ "Support ARM v8 instructions",
+ [HasV7Ops, FeatureVirtualization,
+ FeatureMP]>;
//===----------------------------------------------------------------------===//
// ARM Processors supported.
@@ -170,12 +202,27 @@ def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
// FIXME: It has not been determined if A15 has these features.
def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
"Cortex-A15 ARM processors",
- [FeatureT2XtPk, FeatureFP16,
+ [FeatureT2XtPk, FeatureVFP4,
+ FeatureMP, FeatureHWDiv, FeatureHWDivARM,
FeatureAvoidPartialCPSR,
- FeatureTrustZone]>;
+ FeatureTrustZone, FeatureVirtualization]>;
+
+def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
+ "Cortex-A53 ARM processors",
+ [FeatureHWDiv, FeatureHWDivARM,
+ FeatureTrustZone, FeatureT2XtPk,
+ FeatureCrypto, FeatureCRC]>;
+
+def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
+ "Cortex-A57 ARM processors",
+ [FeatureHWDiv, FeatureHWDivARM,
+ FeatureTrustZone, FeatureT2XtPk,
+ FeatureCrypto, FeatureCRC]>;
+
def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
"Cortex-R5 ARM processors",
- [FeatureSlowFPBrcc, FeatureHWDivARM,
+ [FeatureSlowFPBrcc,
+ FeatureHWDiv, FeatureHWDivARM,
FeatureHasSlowFPVMLx,
FeatureAvoidPartialCPSR,
FeatureT2XtPk]>;
@@ -233,7 +280,7 @@ def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
FeatureHasSlowFPVMLx]>;
// V6M Processors.
-def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6Ops, FeatureNoARM,
+def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
FeatureDB, FeatureMClass]>;
// V6T2 Processors.
@@ -248,26 +295,30 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
def : ProcessorModel<"cortex-a5", CortexA8Model,
[ProcA5, HasV7Ops, FeatureNEON, FeatureDB,
FeatureVFP4, FeatureDSPThumb2,
- FeatureHasRAS]>;
+ FeatureHasRAS, FeatureAClass]>;
def : ProcessorModel<"cortex-a8", CortexA8Model,
[ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS]>;
+ FeatureDSPThumb2, FeatureHasRAS,
+ FeatureAClass]>;
def : ProcessorModel<"cortex-a9", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS]>;
+ FeatureDSPThumb2, FeatureHasRAS,
+ FeatureAClass]>;
def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureMP,
- FeatureHasRAS]>;
+ FeatureHasRAS, FeatureAClass]>;
// FIXME: A15 has currently the same ProcessorModel as A9.
def : ProcessorModel<"cortex-a15", CortexA9Model,
[ProcA15, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS]>;
+ FeatureDSPThumb2, FeatureHasRAS,
+ FeatureAClass]>;
// FIXME: R5 has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r5", CortexA8Model,
[ProcR5, HasV7Ops, FeatureDB,
FeatureVFP3, FeatureDSPThumb2,
- FeatureHasRAS]>;
+ FeatureHasRAS, FeatureVFPOnlySP,
+ FeatureD16, FeatureRClass]>;
// V7M Processors.
def : ProcNoItin<"cortex-m3", [HasV7Ops,
@@ -279,13 +330,22 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureThumb2, FeatureNoARM, FeatureDB,
FeatureHWDiv, FeatureDSPThumb2,
FeatureT2XtPk, FeatureVFP4,
- FeatureVFPOnlySP, FeatureMClass]>;
+ FeatureVFPOnlySP, FeatureD16,
+ FeatureMClass]>;
// Swift uArch Processors.
def : ProcessorModel<"swift", SwiftModel,
[ProcSwift, HasV7Ops, FeatureNEON,
FeatureDB, FeatureDSPThumb2,
- FeatureHasRAS]>;
+ FeatureHasRAS, FeatureAClass]>;
+
+// V8 Processors
+def : ProcNoItin<"cortex-a53", [ProcA53, HasV8Ops, FeatureAClass,
+ FeatureDB, FeatureFPARMv8,
+ FeatureNEON, FeatureDSPThumb2]>;
+def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass,
+ FeatureDB, FeatureFPARMv8,
+ FeatureNEON, FeatureDSPThumb2]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 13ec2087938a..e79f88d4b6f1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -17,6 +17,7 @@
#include "ARM.h"
#include "ARMBuildAttrs.h"
#include "ARMConstantPoolValue.h"
+#include "ARMFPUName.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
@@ -55,235 +56,67 @@
#include <cctype>
using namespace llvm;
-namespace {
-
- // Per section and per symbol attributes are not supported.
- // To implement them we would need the ability to delay this emission
- // until the assembly file is fully parsed/generated as only then do we
- // know the symbol and section numbers.
- class AttributeEmitter {
- public:
- virtual void MaybeSwitchVendor(StringRef Vendor) = 0;
- virtual void EmitAttribute(unsigned Attribute, unsigned Value) = 0;
- virtual void EmitTextAttribute(unsigned Attribute, StringRef String) = 0;
- virtual void Finish() = 0;
- virtual ~AttributeEmitter() {}
- };
-
- class AsmAttributeEmitter : public AttributeEmitter {
- MCStreamer &Streamer;
-
- public:
- AsmAttributeEmitter(MCStreamer &Streamer_) : Streamer(Streamer_) {}
- void MaybeSwitchVendor(StringRef Vendor) { }
-
- void EmitAttribute(unsigned Attribute, unsigned Value) {
- Streamer.EmitRawText("\t.eabi_attribute " +
- Twine(Attribute) + ", " + Twine(Value));
- }
-
- void EmitTextAttribute(unsigned Attribute, StringRef String) {
- switch (Attribute) {
- default: llvm_unreachable("Unsupported Text attribute in ASM Mode");
- case ARMBuildAttrs::CPU_name:
- Streamer.EmitRawText(StringRef("\t.cpu ") + String.lower());
- break;
- /* GAS requires .fpu to be emitted regardless of EABI attribute */
- case ARMBuildAttrs::Advanced_SIMD_arch:
- case ARMBuildAttrs::VFP_arch:
- Streamer.EmitRawText(StringRef("\t.fpu ") + String.lower());
- break;
- }
- }
- void Finish() { }
- };
-
- class ObjectAttributeEmitter : public AttributeEmitter {
- // This structure holds all attributes, accounting for
- // their string/numeric value, so we can later emmit them
- // in declaration order, keeping all in the same vector
- struct AttributeItemType {
- enum {
- HiddenAttribute = 0,
- NumericAttribute,
- TextAttribute
- } Type;
- unsigned Tag;
- unsigned IntValue;
- StringRef StringValue;
- } AttributeItem;
-
- MCObjectStreamer &Streamer;
- StringRef CurrentVendor;
- SmallVector<AttributeItemType, 64> Contents;
-
- // Account for the ULEB/String size of each item,
- // not just the number of items
- size_t ContentsSize;
- // FIXME: this should be in a more generic place, but
- // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf
- size_t getULEBSize(int Value) {
- size_t Size = 0;
- do {
- Value >>= 7;
- Size += sizeof(int8_t); // Is this really necessary?
- } while (Value);
- return Size;
- }
-
- public:
- ObjectAttributeEmitter(MCObjectStreamer &Streamer_) :
- Streamer(Streamer_), CurrentVendor(""), ContentsSize(0) { }
-
- void MaybeSwitchVendor(StringRef Vendor) {
- assert(!Vendor.empty() && "Vendor cannot be empty.");
-
- if (CurrentVendor.empty())
- CurrentVendor = Vendor;
- else if (CurrentVendor == Vendor)
- return;
- else
- Finish();
-
- CurrentVendor = Vendor;
-
- assert(Contents.size() == 0);
- }
-
- void EmitAttribute(unsigned Attribute, unsigned Value) {
- AttributeItemType attr = {
- AttributeItemType::NumericAttribute,
- Attribute,
- Value,
- StringRef("")
- };
- ContentsSize += getULEBSize(Attribute);
- ContentsSize += getULEBSize(Value);
- Contents.push_back(attr);
- }
-
- void EmitTextAttribute(unsigned Attribute, StringRef String) {
- AttributeItemType attr = {
- AttributeItemType::TextAttribute,
- Attribute,
- 0,
- String
- };
- ContentsSize += getULEBSize(Attribute);
- // String + \0
- ContentsSize += String.size()+1;
-
- Contents.push_back(attr);
- }
-
- void Finish() {
- // Vendor size + Vendor name + '\0'
- const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
-
- // Tag + Tag Size
- const size_t TagHeaderSize = 1 + 4;
-
- Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
- Streamer.EmitBytes(CurrentVendor);
- Streamer.EmitIntValue(0, 1); // '\0'
-
- Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
- Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
-
- // Size should have been accounted for already, now
- // emit each field as its type (ULEB or String)
- for (unsigned int i=0; i<Contents.size(); ++i) {
- AttributeItemType item = Contents[i];
- Streamer.EmitULEB128IntValue(item.Tag);
- switch (item.Type) {
- default: llvm_unreachable("Invalid attribute type");
- case AttributeItemType::NumericAttribute:
- Streamer.EmitULEB128IntValue(item.IntValue);
- break;
- case AttributeItemType::TextAttribute:
- Streamer.EmitBytes(item.StringValue.upper());
- Streamer.EmitIntValue(0, 1); // '\0'
- break;
- }
- }
-
- Contents.clear();
- }
- };
-
-} // end of anonymous namespace
-
-MachineLocation ARMAsmPrinter::
-getDebugValueLocation(const MachineInstr *MI) const {
- MachineLocation Location;
- assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
- // Frame address. Currently handles register +- offset only.
- if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
- Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
- else {
- DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
- }
- return Location;
-}
-
/// EmitDwarfRegOp - Emit dwarf register operation.
-void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc,
+ bool Indirect) const {
const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1)
- AsmPrinter::EmitDwarfRegOp(MLoc);
- else {
- unsigned Reg = MLoc.getReg();
- if (Reg >= ARM::S0 && Reg <= ARM::S31) {
- assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering");
- // S registers are described as bit-pieces of a register
- // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0)
- // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32)
-
- unsigned SReg = Reg - ARM::S0;
- bool odd = SReg & 0x1;
- unsigned Rx = 256 + (SReg >> 1);
-
- OutStreamer.AddComment("DW_OP_regx for S register");
- EmitInt8(dwarf::DW_OP_regx);
-
- OutStreamer.AddComment(Twine(SReg));
- EmitULEB128(Rx);
-
- if (odd) {
- OutStreamer.AddComment("DW_OP_bit_piece 32 32");
- EmitInt8(dwarf::DW_OP_bit_piece);
- EmitULEB128(32);
- EmitULEB128(32);
- } else {
- OutStreamer.AddComment("DW_OP_bit_piece 32 0");
- EmitInt8(dwarf::DW_OP_bit_piece);
- EmitULEB128(32);
- EmitULEB128(0);
- }
- } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) {
- assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering");
- // Q registers Q0-Q15 are described by composing two D registers together.
- // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1)
- // DW_OP_piece(8)
-
- unsigned QReg = Reg - ARM::Q0;
- unsigned D1 = 256 + 2 * QReg;
- unsigned D2 = D1 + 1;
-
- OutStreamer.AddComment("DW_OP_regx for Q register: D1");
- EmitInt8(dwarf::DW_OP_regx);
- EmitULEB128(D1);
- OutStreamer.AddComment("DW_OP_piece 8");
- EmitInt8(dwarf::DW_OP_piece);
- EmitULEB128(8);
-
- OutStreamer.AddComment("DW_OP_regx for Q register: D2");
- EmitInt8(dwarf::DW_OP_regx);
- EmitULEB128(D2);
- OutStreamer.AddComment("DW_OP_piece 8");
- EmitInt8(dwarf::DW_OP_piece);
- EmitULEB128(8);
+ if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) {
+ AsmPrinter::EmitDwarfRegOp(MLoc, Indirect);
+ return;
+ }
+ assert(MLoc.isReg() && !Indirect &&
+ "This doesn't support offset/indirection - implement it if needed");
+ unsigned Reg = MLoc.getReg();
+ if (Reg >= ARM::S0 && Reg <= ARM::S31) {
+ assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering");
+ // S registers are described as bit-pieces of a register
+ // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0)
+ // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32)
+
+ unsigned SReg = Reg - ARM::S0;
+ bool odd = SReg & 0x1;
+ unsigned Rx = 256 + (SReg >> 1);
+
+ OutStreamer.AddComment("DW_OP_regx for S register");
+ EmitInt8(dwarf::DW_OP_regx);
+
+ OutStreamer.AddComment(Twine(SReg));
+ EmitULEB128(Rx);
+
+ if (odd) {
+ OutStreamer.AddComment("DW_OP_bit_piece 32 32");
+ EmitInt8(dwarf::DW_OP_bit_piece);
+ EmitULEB128(32);
+ EmitULEB128(32);
+ } else {
+ OutStreamer.AddComment("DW_OP_bit_piece 32 0");
+ EmitInt8(dwarf::DW_OP_bit_piece);
+ EmitULEB128(32);
+ EmitULEB128(0);
}
+ } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) {
+ assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering");
+ // Q registers Q0-Q15 are described by composing two D registers together.
+ // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1)
+ // DW_OP_piece(8)
+
+ unsigned QReg = Reg - ARM::Q0;
+ unsigned D1 = 256 + 2 * QReg;
+ unsigned D2 = D1 + 1;
+
+ OutStreamer.AddComment("DW_OP_regx for Q register: D1");
+ EmitInt8(dwarf::DW_OP_regx);
+ EmitULEB128(D1);
+ OutStreamer.AddComment("DW_OP_piece 8");
+ EmitInt8(dwarf::DW_OP_piece);
+ EmitULEB128(8);
+
+ OutStreamer.AddComment("DW_OP_regx for Q register: D2");
+ EmitInt8(dwarf::DW_OP_regx);
+ EmitULEB128(D2);
+ OutStreamer.AddComment("DW_OP_piece 8");
+ EmitInt8(dwarf::DW_OP_piece);
+ EmitULEB128(8);
}
}
@@ -312,7 +145,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
assert(GV && "C++ constructor pointer was not a GlobalValue!");
- const MCExpr *E = MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+ const MCExpr *E = MCSymbolRefExpr::Create(getSymbol(GV),
(Subtarget->isTargetDarwin()
? MCSymbolRefExpr::VK_None
: MCSymbolRefExpr::VK_ARM_TARGET1),
@@ -373,7 +206,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
(TF & ARMII::MO_HI16))
O << ":upper16:";
- O << *Mang->getSymbol(GV);
+ O << *getSymbol(GV);
printOffset(MO.getOffset(), O);
if (TF == ARMII::MO_PLT)
@@ -474,8 +307,14 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
// This takes advantage of the 2 operand-ness of ldm/stm and that we've
// already got the operands in registers that are operands to the
// inline asm statement.
-
- O << "{" << ARMInstPrinter::getRegisterName(RegBegin);
+ O << "{";
+ if (ARM::GPRPairRegClass.contains(RegBegin)) {
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0);
+ O << ARMInstPrinter::getRegisterName(Reg0) << ", ";;
+ RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1);
+ }
+ O << ARMInstPrinter::getRegisterName(RegBegin);
// FIXME: The register allocator not only may not have given us the
// registers in sequence, but may not be in ascending registers. This
@@ -500,7 +339,38 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (!FlagsOP.isImm())
return true;
unsigned Flags = FlagsOP.getImm();
+
+ // This operand may not be the one that actually provides the register. If
+ // it's tied to a previous one then we should refer instead to that one
+ // for registers and their classes.
+ unsigned TiedIdx;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, TiedIdx)) {
+ for (OpNum = InlineAsm::MIOp_FirstOperand; TiedIdx; --TiedIdx) {
+ unsigned OpFlags = MI->getOperand(OpNum).getImm();
+ OpNum += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+ Flags = MI->getOperand(OpNum).getImm();
+
+ // Later code expects OpNum to be pointing at the register rather than
+ // the flags.
+ OpNum += 1;
+ }
+
unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ unsigned RC;
+ InlineAsm::hasRegClassConstraint(Flags, RC);
+ if (RC == ARM::GPRPairRegClassID) {
+ if (NumVals != 1)
+ return true;
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (!MO.isReg())
+ return true;
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ unsigned Reg = TRI->getSubReg(MO.getReg(), ExtraCode[0] == 'Q' ?
+ ARM::gsub_0 : ARM::gsub_1);
+ O << ARMInstPrinter::getRegisterName(Reg);
+ return false;
+ }
if (NumVals != 2)
return true;
unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1;
@@ -704,11 +574,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// generates code that does this, it is always safe to set.
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
- // FIXME: This should eventually end up somewhere else where more
- // intelligent flag decisions can be made. For now we are just maintaining
- // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
- if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&OutStreamer))
- MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
}
//===----------------------------------------------------------------------===//
@@ -718,145 +583,150 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// to appear in the .ARM.attributes section in ELF.
// Instead of subclassing the MCELFStreamer, we do the work here.
-void ARMAsmPrinter::emitAttributes() {
-
- emitARMAttributeSection();
-
- /* GAS expect .fpu to be emitted, regardless of VFP build attribute */
- bool emitFPU = false;
- AttributeEmitter *AttrEmitter;
- if (OutStreamer.hasRawTextSupport()) {
- AttrEmitter = new AsmAttributeEmitter(OutStreamer);
- emitFPU = true;
- } else {
- MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer);
- AttrEmitter = new ObjectAttributeEmitter(O);
- }
-
- AttrEmitter->MaybeSwitchVendor("aeabi");
-
- std::string CPUString = Subtarget->getCPUString();
-
- if (CPUString == "cortex-a8" ||
- Subtarget->isCortexA8()) {
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8");
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::ApplicationProfile);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
- ARMBuildAttrs::Allowed);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumb32);
- // Fixme: figure out when this is emitted.
- //AttrEmitter->EmitAttribute(ARMBuildAttrs::WMMX_arch,
- // ARMBuildAttrs::AllowWMMXv1);
- //
-
- /// ADD additional Else-cases here!
- } else if (CPUString == "xscale") {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TEJ);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
- ARMBuildAttrs::Allowed);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::Allowed);
- } else if (CPUString == "generic") {
- // For a generic CPU, we assume a standard v7a architecture in Subtarget.
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::ApplicationProfile);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
- ARMBuildAttrs::Allowed);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumb32);
- } else if (Subtarget->hasV7Ops()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumb32);
+static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
+ const ARMSubtarget *Subtarget) {
+ if (CPU == "xscale")
+ return ARMBuildAttrs::v5TEJ;
+
+ if (Subtarget->hasV8Ops())
+ return ARMBuildAttrs::v8;
+ else if (Subtarget->hasV7Ops()) {
+ if (Subtarget->isMClass() && Subtarget->hasThumb2DSP())
+ return ARMBuildAttrs::v7E_M;
+ return ARMBuildAttrs::v7;
} else if (Subtarget->hasV6T2Ops())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6T2);
+ return ARMBuildAttrs::v6T2;
+ else if (Subtarget->hasV6MOps())
+ return ARMBuildAttrs::v6S_M;
else if (Subtarget->hasV6Ops())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6);
+ return ARMBuildAttrs::v6;
else if (Subtarget->hasV5TEOps())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TE);
+ return ARMBuildAttrs::v5TE;
else if (Subtarget->hasV5TOps())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T);
+ return ARMBuildAttrs::v5T;
else if (Subtarget->hasV4TOps())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
+ return ARMBuildAttrs::v4T;
+ else
+ return ARMBuildAttrs::v4;
+}
- if (Subtarget->hasNEON() && emitFPU) {
- /* NEON is not exactly a VFP architecture, but GAS emit one of
- * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
- if (Subtarget->hasVFP4())
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- "neon-vfpv4");
- else
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
- /* If emitted for NEON, omit from VFP below, since you can have both
- * NEON and VFP in build attributes but only one .fpu */
- emitFPU = false;
+void ARMAsmPrinter::emitAttributes() {
+ MCTargetStreamer &TS = OutStreamer.getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+
+ ATS.switchVendor("aeabi");
+
+ std::string CPUString = Subtarget->getCPUString();
+
+ if (CPUString != "generic")
+ ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
+
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch,
+ getArchForCPU(CPUString, Subtarget));
+
+ if (Subtarget->isAClass()) {
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
+ } else if (Subtarget->isRClass()) {
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::RealTimeProfile);
+ } else if (Subtarget->isMClass()){
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::MicroControllerProfile);
}
- /* VFPv4 + .fpu */
- if (Subtarget->hasVFP4()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
- ARMBuildAttrs::AllowFPv4A);
- if (emitFPU)
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4");
-
- /* VFPv3 + .fpu */
- } else if (Subtarget->hasVFP3()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
- ARMBuildAttrs::AllowFPv3A);
- if (emitFPU)
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3");
-
- /* VFPv2 + .fpu */
- } else if (Subtarget->hasVFP2()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
- ARMBuildAttrs::AllowFPv2);
- if (emitFPU)
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2");
+ ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, Subtarget->hasARMOps() ?
+ ARMBuildAttrs::Allowed : ARMBuildAttrs::Not_Allowed);
+ if (Subtarget->isThumb1Only()) {
+ ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::Allowed);
+ } else if (Subtarget->hasThumb2()) {
+ ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
}
- /* TODO: ARMBuildAttrs::Allowed is not completely accurate,
- * since NEON can have 1 (allowed) or 2 (MAC operations) */
if (Subtarget->hasNEON()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- ARMBuildAttrs::Allowed);
+ /* NEON is not exactly a VFP architecture, but GAS emit one of
+ * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
+ if (Subtarget->hasFPARMv8()) {
+ if (Subtarget->hasCrypto())
+ ATS.emitFPU(ARM::CRYPTO_NEON_FP_ARMV8);
+ else
+ ATS.emitFPU(ARM::NEON_FP_ARMV8);
+ }
+ else if (Subtarget->hasVFP4())
+ ATS.emitFPU(ARM::NEON_VFPV4);
+ else
+ ATS.emitFPU(ARM::NEON);
+ // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
+ if (Subtarget->hasV8Ops())
+ ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeonARMv8);
+ } else {
+ if (Subtarget->hasFPARMv8())
+ ATS.emitFPU(ARM::FP_ARMV8);
+ else if (Subtarget->hasVFP4())
+ ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4);
+ else if (Subtarget->hasVFP3())
+ ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV3_D16 : ARM::VFPV3);
+ else if (Subtarget->hasVFP2())
+ ATS.emitFPU(ARM::VFPV2);
}
// Signal various FP modes.
if (!TM.Options.UnsafeFPMath) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
- ARMBuildAttrs::Allowed);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
- ARMBuildAttrs::Allowed);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
+ ARMBuildAttrs::Allowed);
}
if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
- ARMBuildAttrs::Allowed);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::Allowed);
else
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
- ARMBuildAttrs::AllowIEE754);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::AllowIEE754);
// FIXME: add more flags to ARMBuildAttrs.h
// 8-bytes alignment stuff.
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_needed, 1);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_align8_needed, 1);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
+
+ // ABI_HardFP_use attribute to indicate single precision FP.
+ if (Subtarget->isFPOnlySP())
+ ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
+ ARMBuildAttrs::HardFPSinglePrecision);
// Hard float. Use both S and D registers and conform to AAPCS-VFP.
- if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
- }
+ if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard)
+ ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS);
+
// FIXME: Should we signal R9 usage?
- if (Subtarget->hasDivide())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::DIV_use, 1);
+ if (Subtarget->hasFP16())
+ ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
+
+ if (Subtarget->hasMPExtension())
+ ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
+
+ if (Subtarget->hasDivide()) {
+ // Check if hardware divide is only available in thumb2 or ARM as well.
+ ATS.emitAttribute(ARMBuildAttrs::DIV_use,
+ Subtarget->hasDivideInARMMode() ? ARMBuildAttrs::AllowDIVExt :
+ ARMBuildAttrs::AllowDIVIfExists);
+ }
- AttrEmitter->Finish();
- delete AttrEmitter;
+ if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowTZVirtualization);
+ else if (Subtarget->hasTrustZone())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowTZ);
+ else if (Subtarget->hasVirtualization())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowVirtualization);
+
+ ATS.finishAttributeSection();
}
void ARMAsmPrinter::emitARMAttributeSection() {
@@ -908,7 +778,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
bool isIndirect = Subtarget->isTargetDarwin() &&
Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
if (!isIndirect)
- return Mang->getSymbol(GV);
+ return getSymbol(GV);
// FIXME: Remove this when Darwin transition to @GOT like syntax.
MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
@@ -919,7 +789,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
MMIMachO.getGVStubEntry(MCSym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
return MCSym;
}
@@ -1092,27 +962,12 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
}
-void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
- raw_ostream &OS) {
- unsigned NOps = MI->getNumOperands();
- assert(NOps==4);
- OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
- // cast away const; DIetc do not take const operands for some reason.
- DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
- OS << V.getName();
- OS << " <- ";
- // Frame address. Currently handles register +- offset only.
- assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
- OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
- OS << ']';
- OS << "+";
- printOperand(MI, NOps-2, OS);
-}
-
void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
assert(MI->getFlag(MachineInstr::FrameSetup) &&
"Only instruction which are involved into frame setup code are allowed");
+ MCTargetStreamer &TS = OutStreamer.getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
const MachineFunction &MF = *MI->getParent()->getParent();
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
@@ -1175,7 +1030,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
RegList.push_back(SrcReg);
break;
}
- OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
+ ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
} else {
// Changes of stack / frame pointer.
if (SrcReg == ARM::SP) {
@@ -1223,11 +1078,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
if (DstReg == FramePtr && FramePtr != ARM::SP)
// Set-up of the frame pointer. Positive values correspond to "add"
// instruction.
- OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset);
+ ATS.emitSetFP(FramePtr, ARM::SP, -Offset);
else if (DstReg == ARM::SP) {
// Change of SP by an offset. Positive values correspond to "sub"
// instruction.
- OutStreamer.EmitPad(Offset);
+ ATS.emitPad(Offset);
} else {
MI->dump();
llvm_unreachable("Unsupported opcode for unwinding information");
@@ -1272,15 +1127,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned Opc = MI->getOpcode();
switch (Opc) {
case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass");
- case ARM::DBG_VALUE: {
- if (isVerbose() && OutStreamer.hasRawTextSupport()) {
- SmallString<128> TmpStr;
- raw_svector_ostream OS(TmpStr);
- PrintDebugValueComment(MI, OS);
- OutStreamer.EmitRawText(StringRef(OS.str()));
- }
- return;
- }
+ case ARM::DBG_VALUE: llvm_unreachable("Should be handled by generic printing");
case ARM::LEApcrel:
case ARM::tLEApcrel:
case ARM::t2LEApcrel: {
@@ -1376,7 +1223,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(0));
const GlobalValue *GV = MI->getOperand(0).getGlobal();
- MCSymbol *GVSym = Mang->getSymbol(GV);
+ MCSymbol *GVSym = getSymbol(GV);
const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(ARM::Bcc)
.addExpr(GVSymExpr)
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
index c945e4f28699..de72e063e0d5 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -97,13 +97,9 @@ private:
const MachineInstr *MI);
public:
- void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
-
- virtual MachineLocation
- getDebugValueLocation(const MachineInstr *MI) const LLVM_OVERRIDE;
-
/// EmitDwarfRegOp - Emit dwarf register operation.
- virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const LLVM_OVERRIDE;
+ virtual void EmitDwarfRegOp(const MachineLocation &MLoc, bool Indirect) const
+ LLVM_OVERRIDE;
virtual unsigned getISAEncoding() LLVM_OVERRIDE {
// ARM/Darwin adds ISA to the DWARF info for each function.
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 60050542716c..f835a4e5b5fe 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMBaseInstrInfo.h"
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
+#include "ARMFeatures.h"
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
@@ -36,7 +37,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "ARMGenInstrInfo.inc"
using namespace llvm;
@@ -113,8 +114,7 @@ ScheduleHazardRecognizer *ARMBaseInstrInfo::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
if (Subtarget.isThumb2() || Subtarget.hasVFP2())
- return (ScheduleHazardRecognizer *)
- new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
+ return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
}
@@ -273,104 +273,90 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
- // If the block has no terminators, it just falls into the block after it.
+ TBB = 0;
+ FBB = 0;
+
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin())
- return false;
+ return false; // Empty blocks are easy.
--I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return false;
- --I;
- }
-
- // Get the last instruction in the block.
- MachineInstr *LastInst = I;
- unsigned LastOpc = LastInst->getOpcode();
- // Check if it's an indirect branch first, this should return 'unanalyzable'
- // even if it's predicated.
- if (isIndirectBranchOpcode(LastOpc))
- return true;
+ // Walk backwards from the end of the basic block until the branch is
+ // analyzed or we give up.
+ while (isPredicated(I) || I->isTerminator()) {
- if (!isUnpredicatedTerminator(I))
- return false;
+ // Flag to be raised on unanalyzeable instructions. This is useful in cases
+ // where we want to clean up on the end of the basic block before we bail
+ // out.
+ bool CantAnalyze = false;
- // If there is only one terminator instruction, process it.
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
- if (isUncondBranchOpcode(LastOpc)) {
- TBB = LastInst->getOperand(0).getMBB();
- return false;
+ // Skip over DEBUG values and predicated nonterminators.
+ while (I->isDebugValue() || !I->isTerminator()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
}
- if (isCondBranchOpcode(LastOpc)) {
- // Block ends with fall-through condbranch.
- TBB = LastInst->getOperand(0).getMBB();
- Cond.push_back(LastInst->getOperand(1));
- Cond.push_back(LastInst->getOperand(2));
- return false;
+
+ if (isIndirectBranchOpcode(I->getOpcode()) ||
+ isJumpTableBranchOpcode(I->getOpcode())) {
+ // Indirect branches and jump tables can't be analyzed, but we still want
+ // to clean up any instructions at the tail of the basic block.
+ CantAnalyze = true;
+ } else if (isUncondBranchOpcode(I->getOpcode())) {
+ TBB = I->getOperand(0).getMBB();
+ } else if (isCondBranchOpcode(I->getOpcode())) {
+ // Bail out if we encounter multiple conditional branches.
+ if (!Cond.empty())
+ return true;
+
+ assert(!FBB && "FBB should have been null.");
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(I->getOperand(1));
+ Cond.push_back(I->getOperand(2));
+ } else if (I->isReturn()) {
+ // Returns can't be analyzed, but we should run cleanup.
+ CantAnalyze = !isPredicated(I);
+ } else {
+ // We encountered other unrecognized terminator. Bail out immediately.
+ return true;
}
- return true; // Can't handle indirect branch.
- }
- // Get the instruction before it if it is a terminator.
- MachineInstr *SecondLastInst = I;
- unsigned SecondLastOpc = SecondLastInst->getOpcode();
-
- // If AllowModify is true and the block ends with two or more unconditional
- // branches, delete all but the first unconditional branch.
- if (AllowModify && isUncondBranchOpcode(LastOpc)) {
- while (isUncondBranchOpcode(SecondLastOpc)) {
- LastInst->eraseFromParent();
- LastInst = SecondLastInst;
- LastOpc = LastInst->getOpcode();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
- // Return now the only terminator is an unconditional branch.
- TBB = LastInst->getOperand(0).getMBB();
- return false;
- } else {
- SecondLastInst = I;
- SecondLastOpc = SecondLastInst->getOpcode();
+ // Cleanup code - to be run for unpredicated unconditional branches and
+ // returns.
+ if (!isPredicated(I) &&
+ (isUncondBranchOpcode(I->getOpcode()) ||
+ isIndirectBranchOpcode(I->getOpcode()) ||
+ isJumpTableBranchOpcode(I->getOpcode()) ||
+ I->isReturn())) {
+ // Forget any previous condition branch information - it no longer applies.
+ Cond.clear();
+ FBB = 0;
+
+ // If we can modify the function, delete everything below this
+ // unconditional branch.
+ if (AllowModify) {
+ MachineBasicBlock::iterator DI = llvm::next(I);
+ while (DI != MBB.end()) {
+ MachineInstr *InstToDelete = DI;
+ ++DI;
+ InstToDelete->eraseFromParent();
+ }
}
}
- }
- // If there are three terminators, we don't know what sort of block this is.
- if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
- return true;
-
- // If the block ends with a B and a Bcc, handle it.
- if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
- TBB = SecondLastInst->getOperand(0).getMBB();
- Cond.push_back(SecondLastInst->getOperand(1));
- Cond.push_back(SecondLastInst->getOperand(2));
- FBB = LastInst->getOperand(0).getMBB();
- return false;
- }
+ if (CantAnalyze)
+ return true;
- // If the block ends with two unconditional branches, handle it. The second
- // one is not executed, so remove it.
- if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
- TBB = SecondLastInst->getOperand(0).getMBB();
- I = LastInst;
- if (AllowModify)
- I->eraseFromParent();
- return false;
- }
+ if (I == MBB.begin())
+ return false;
- // ...likewise if it ends with a branch table followed by an unconditional
- // branch. The branch folder can create these, and we must get rid of them for
- // correctness of Thumb constant islands.
- if ((isJumpTableBranchOpcode(SecondLastOpc) ||
- isIndirectBranchOpcode(SecondLastOpc)) &&
- isUncondBranchOpcode(LastOpc)) {
- I = LastInst;
- if (AllowModify)
- I->eraseFromParent();
- return true;
+ --I;
}
- // Otherwise, can't handle this.
- return true;
+ // We made it past the terminators without bailing out - we must have
+ // analyzed this branch successfully.
+ return false;
}
@@ -535,11 +521,17 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
if (!MI->isPredicable())
return false;
- if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
- ARMFunctionInfo *AFI =
- MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
- return AFI->isThumb2Function();
+ ARMFunctionInfo *AFI =
+ MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+
+ if (AFI->isThumb2Function()) {
+ if (getSubtarget().restrictIT())
+ return isV8EligibleForIT(MI);
+ } else { // non-Thumb
+ if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
+ return false;
}
+
return true;
}
@@ -660,16 +652,16 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
bool GPRDest = ARM::GPRRegClass.contains(DestReg);
- bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
+ bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
if (GPRDest && GPRSrc) {
AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc))));
+ .addReg(SrcReg, getKillRegState(KillSrc))));
return;
}
bool SPRDest = ARM::SPRRegClass.contains(DestReg);
- bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
+ bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
unsigned Opc = 0;
if (SPRDest && SPRSrc)
@@ -698,26 +690,47 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
int Spacing = 1;
// Use VORRq when possible.
- if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2;
- else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4;
+ if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VORRq;
+ BeginIdx = ARM::qsub_0;
+ SubRegs = 2;
+ } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VORRq;
+ BeginIdx = ARM::qsub_0;
+ SubRegs = 4;
// Fall back to VMOVD.
- else if (ARM::DPairRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2;
- else if (ARM::DTripleRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3;
- else if (ARM::DQuadRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4;
- else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg))
- Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2;
-
- else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2;
- else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2;
- else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2;
+ } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 2;
+ } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 3;
+ } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 4;
+ } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
+ Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
+ BeginIdx = ARM::gsub_0;
+ SubRegs = 2;
+ } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 2;
+ Spacing = 2;
+ } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 3;
+ Spacing = 2;
+ } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 4;
+ Spacing = 2;
+ }
assert(Opc && "Impossible reg-to-reg copy");
@@ -726,26 +739,28 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy register tuples backward when the first Dest reg overlaps with SrcReg.
if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
- BeginIdx = BeginIdx + ((SubRegs-1)*Spacing);
+ BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
Spacing = -Spacing;
}
#ifndef NDEBUG
SmallSet<unsigned, 4> DstRegs;
#endif
for (unsigned i = 0; i != SubRegs; ++i) {
- unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
- unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing);
+ unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
+ unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
assert(Dst && Src && "Bad sub-register");
#ifndef NDEBUG
assert(!DstRegs.count(Src) && "destructive vector copy");
DstRegs.insert(Dst);
#endif
- Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
- .addReg(Src);
+ Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
// VORR takes two source operands.
if (Opc == ARM::VORRq)
Mov.addReg(Src);
Mov = AddDefaultPred(Mov);
+ // MOVr can set CC.
+ if (Opc == ARM::MOVr)
+ Mov = AddDefaultCC(Mov);
}
// Add implicit super-register defs and kills to the last instruction.
Mov->addRegisterDefined(DestReg, TRI);
@@ -1214,16 +1229,6 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
return true;
}
-MachineInstr*
-ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
- int FrameIx, uint64_t Offset,
- const MDNode *MDPtr,
- DebugLoc DL) const {
- MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE))
- .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
- return &*MIB;
-}
-
/// Create a copy of a const pool value. Update CPI to the new index and return
/// the label UID.
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
@@ -1426,9 +1431,11 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
+ case ARM::t2LDRBi8:
case ARM::t2LDRDi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
+ case ARM::t2LDRBi12:
case ARM::t2LDRSHi12:
break;
}
@@ -1445,8 +1452,10 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
+ case ARM::t2LDRBi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
+ case ARM::t2LDRBi12:
case ARM::t2LDRSHi12:
break;
}
@@ -1493,7 +1502,16 @@ bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
if ((Offset2 - Offset1) / 8 > 64)
return false;
- if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+ // Check if the machine opcodes are different. If they are different
+ // then we consider them to not be of the same base address,
+ // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
+ // In this case, they are considered to be the same because they are different
+ // encoding forms of the same basic instruction.
+ if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
+ !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
+ Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
+ (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
+ Load2->getMachineOpcode() == ARM::t2LDRBi8)))
return false; // FIXME: overly conservative?
// Four loads in a row should be sufficient.
@@ -1708,7 +1726,7 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
bool PreferFalse) const {
assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
- const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
bool Invert = !DefMI;
if (!DefMI)
@@ -1716,11 +1734,17 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
if (!DefMI)
return 0;
+ // Find new register class to use.
+ MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
+ unsigned DestReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
+ if (!MRI.constrainRegClass(DestReg, PreviousClass))
+ return 0;
+
// Create a new predicated version of DefMI.
// Rfalse is the first use.
MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- DefMI->getDesc(),
- MI->getOperand(0).getReg());
+ DefMI->getDesc(), DestReg);
// Copy all the DefMI operands, excluding its (null) predicate.
const MCInstrDesc &DefDesc = DefMI->getDesc();
@@ -1743,7 +1767,6 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
// register operand tied to the first def.
// The tie makes the register allocator ensure the FalseReg is allocated the
// same register as operand 0.
- MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
FalseReg.setImplicit();
NewMI.addOperand(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
@@ -1803,6 +1826,14 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
unsigned DestReg, unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ if (NumBytes == 0 && DestReg != BaseReg) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ return;
+ }
+
bool isSub = NumBytes < 0;
if (isSub) NumBytes = -NumBytes;
@@ -1826,6 +1857,115 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
}
}
+bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF,
+ MachineInstr *MI,
+ unsigned NumBytes) {
+ // This optimisation potentially adds lots of load and store
+ // micro-operations, it's only really a great benefit to code-size.
+ if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize))
+ return false;
+
+ // If only one register is pushed/popped, LLVM can use an LDR/STR
+ // instead. We can't modify those so make sure we're dealing with an
+ // instruction we understand.
+ bool IsPop = isPopOpcode(MI->getOpcode());
+ bool IsPush = isPushOpcode(MI->getOpcode());
+ if (!IsPush && !IsPop)
+ return false;
+
+ bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
+ MI->getOpcode() == ARM::VLDMDIA_UPD;
+ bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
+ MI->getOpcode() == ARM::tPOP ||
+ MI->getOpcode() == ARM::tPOP_RET;
+
+ assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
+ MI->getOperand(1).getReg() == ARM::SP)) &&
+ "trying to fold sp update into non-sp-updating push/pop");
+
+ // The VFP push & pop act on D-registers, so we can only fold an adjustment
+ // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
+ // if this is violated.
+ if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
+ return false;
+
+ // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
+ // pred) so the list starts at 4. Thumb1 starts after the predicate.
+ int RegListIdx = IsT1PushPop ? 2 : 4;
+
+ // Calculate the space we'll need in terms of registers.
+ unsigned FirstReg = MI->getOperand(RegListIdx).getReg();
+ unsigned RD0Reg, RegsNeeded;
+ if (IsVFPPushPop) {
+ RD0Reg = ARM::D0;
+ RegsNeeded = NumBytes / 8;
+ } else {
+ RD0Reg = ARM::R0;
+ RegsNeeded = NumBytes / 4;
+ }
+
+ // We're going to have to strip all list operands off before
+ // re-adding them since the order matters, so save the existing ones
+ // for later.
+ SmallVector<MachineOperand, 4> RegList;
+ for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
+ RegList.push_back(MI->getOperand(i));
+
+ MachineBasicBlock *MBB = MI->getParent();
+ const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+
+ // Now try to find enough space in the reglist to allocate NumBytes.
+ for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded;
+ --CurReg) {
+ if (!IsPop) {
+ // Pushing any register is completely harmless, mark the
+ // register involved as undef since we don't care about it in
+ // the slightest.
+ RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
+ false, false, true));
+ --RegsNeeded;
+ continue;
+ }
+
+ // However, we can only pop an extra register if it's not live. For
+ // registers live within the function we might clobber a return value
+ // register; the other way a register can be live here is if it's
+ // callee-saved.
+ if (isCalleeSavedRegister(CurReg, CSRegs) ||
+ MBB->computeRegisterLiveness(TRI, CurReg, MI) !=
+ MachineBasicBlock::LQR_Dead) {
+ // VFP pops don't allow holes in the register list, so any skip is fatal
+ // for our transformation. GPR pops do, so we should just keep looking.
+ if (IsVFPPushPop)
+ return false;
+ else
+ continue;
+ }
+
+ // Mark the unimportant registers as <def,dead> in the POP.
+ RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
+ true));
+ --RegsNeeded;
+ }
+
+ if (RegsNeeded > 0)
+ return false;
+
+ // Finally we know we can profitably perform the optimisation so go
+ // ahead: strip all existing registers off and add them back again
+ // in the right order.
+ for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
+ MI->RemoveOperand(i);
+
+ // Add the complete list back in.
+ MachineInstrBuilder MIB(MF, &*MI);
+ for (int i = RegList.size() - 1; i >= 0; --i)
+ MIB.addOperand(RegList[i]);
+
+ return true;
+}
+
bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int &Offset,
const ARMBaseInstrInfo &TII) {
@@ -2232,8 +2372,32 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
isSafe = true;
break;
}
- // Condition code is after the operand before CPSR.
- ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
+ // Condition code is after the operand before CPSR except for VSELs.
+ ARMCC::CondCodes CC;
+ bool IsInstrVSel = true;
+ switch (Instr.getOpcode()) {
+ default:
+ IsInstrVSel = false;
+ CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
+ break;
+ case ARM::VSELEQD:
+ case ARM::VSELEQS:
+ CC = ARMCC::EQ;
+ break;
+ case ARM::VSELGTD:
+ case ARM::VSELGTS:
+ CC = ARMCC::GT;
+ break;
+ case ARM::VSELGED:
+ case ARM::VSELGES:
+ CC = ARMCC::GE;
+ break;
+ case ARM::VSELVSS:
+ case ARM::VSELVSD:
+ CC = ARMCC::VS;
+ break;
+ }
+
if (Sub) {
ARMCC::CondCodes NewCC = getSwappedCondition(CC);
if (NewCC == ARMCC::AL)
@@ -2244,11 +2408,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// If it is safe to remove CmpInstr, the condition code of these
// operands will be modified.
if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg)
- OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)),
- NewCC));
- }
- else
+ Sub->getOperand(2).getReg() == SrcReg) {
+ // VSel doesn't support condition code update.
+ if (IsInstrVSel)
+ return false;
+ OperandsToUpdate.push_back(
+ std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
+ }
+ } else
switch (CC) {
default:
// CPSR can be used multiple times, we should continue.
@@ -3604,6 +3771,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const {
+ if (MI->isCopyLike() || MI->isInsertSubreg() ||
+ MI->isRegSequence() || MI->isImplicitDef())
+ return 0;
+
+ if (MI->isBundle())
+ return 0;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) {
+ // When predicated, CPSR is an additional source operand for CPSR updating
+ // instructions, this apparently increases their latencies.
+ return 1;
+ }
+ return 0;
+}
+
unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -3685,8 +3870,7 @@ hasHighOperandLatency(const InstrItineraryData *ItinData,
return true;
// Hoist VFP / NEON instructions with 4 or higher latency.
- int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx,
- /*FindMin=*/false);
+ int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
if (Latency < 0)
Latency = getInstrLatency(ItinData, DefMI);
if (Latency <= 3)
@@ -4137,7 +4321,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
// FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
// the full D-register by loading the same value to both lanes. The
// instruction is micro-coded with 2 uops, so don't do this until we can
- // properly schedule micro-coded instuctions. The dispatcher stalls cause
+ // properly schedule micro-coded instructions. The dispatcher stalls cause
// too big regressions.
// Insert the dependency-breaking FCONSTD before MI.
@@ -4152,6 +4336,8 @@ bool ARMBaseInstrInfo::hasNOP() const {
}
bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
+ if (MI->getNumOperands() < 4)
+ return true;
unsigned ShOpVal = MI->getOperand(3).getImm();
unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
// Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 2ef659c23bd6..93e59647d220 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -46,7 +46,7 @@ public:
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const;
- virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
+ virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0;
const ARMSubtarget &getSubtarget() const { return Subtarget; }
ScheduleHazardRecognizer *
@@ -125,12 +125,6 @@ public:
virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
- virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
- int FrameIx,
- uint64_t Offset,
- const MDNode *MDPtr,
- DebugLoc DL) const;
-
virtual void reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
@@ -270,6 +264,8 @@ private:
const MCInstrDesc &UseMCID,
unsigned UseIdx, unsigned UseAlign) const;
+ unsigned getPredicationCost(const MachineInstr *MI) const;
+
unsigned getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost = 0) const;
@@ -366,6 +362,17 @@ bool isIndirectBranchOpcode(int Opc) {
return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
}
+static inline bool isPopOpcode(int Opc) {
+ return Opc == ARM::tPOP_RET || Opc == ARM::LDMIA_RET ||
+ Opc == ARM::t2LDMIA_RET || Opc == ARM::tPOP || Opc == ARM::LDMIA_UPD ||
+ Opc == ARM::t2LDMIA_UPD || Opc == ARM::VLDMDIA_UPD;
+}
+
+static inline bool isPushOpcode(int Opc) {
+ return Opc == ARM::tPUSH || Opc == ARM::t2STMDB_UPD ||
+ Opc == ARM::STMDB_UPD || Opc == ARM::VSTMDDB_UPD;
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
@@ -405,6 +412,13 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
const ARMBaseRegisterInfo& MRI,
unsigned MIFlags = 0);
+/// Tries to add registers to the reglist of a given base-updating
+/// push/pop instruction to adjust the stack by an additional
+/// NumBytes. This can save a few bytes per function in code-size, but
+/// obviously generates more memory traffic. As such, it only takes
+/// effect in functions being optimised for size.
+bool tryFoldSPUpdateIntoPushPop(MachineFunction &MF, MachineInstr *MI,
+ unsigned NumBytes);
/// rewriteARMFrameIndex / rewriteT2FrameIndex -
/// Rewrite MI to access 'Offset' bytes from the FP. Return false if the
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b0d34a76b014..8717dc0cde90 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -43,46 +43,73 @@
using namespace llvm;
-ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
- const ARMSubtarget &sti)
- : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), TII(tii), STI(sti),
+ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti),
FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
BasePtr(ARM::R6) {
}
const uint16_t*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- bool ghcCall = false;
-
- if (MF) {
- const Function *F = MF->getFunction();
- ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
- }
-
- if (ghcCall) {
- return CSR_GHC_SaveList;
- }
- else {
- return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
- ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+ const uint16_t *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ ? CSR_iOS_SaveList
+ : CSR_AAPCS_SaveList;
+
+ if (!MF) return RegList;
+
+ const Function *F = MF->getFunction();
+ if (F->getCallingConv() == CallingConv::GHC) {
+ // GHC set of callee saved regs is empty as all those regs are
+ // used for passing STG regs around
+ return CSR_NoRegs_SaveList;
+ } else if (F->hasFnAttribute("interrupt")) {
+ if (STI.isMClass()) {
+ // M-class CPUs have hardware which saves the registers needed to allow a
+ // function conforming to the AAPCS to function as a handler.
+ return CSR_AAPCS_SaveList;
+ } else if (F->getFnAttribute("interrupt").getValueAsString() == "FIQ") {
+ // Fast interrupt mode gives the handler a private copy of R8-R14, so less
+ // need to be saved to restore user-mode state.
+ return CSR_FIQ_SaveList;
+ } else {
+ // Generally only R13-R14 (i.e. SP, LR) are automatically preserved by
+ // exception handling.
+ return CSR_GenericInt_SaveList;
+ }
}
+
+ return RegList;
}
const uint32_t*
-ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+ if (CC == CallingConv::GHC)
+ // This is academic becase all GHC calls are (supposed to be) tail calls
+ return CSR_NoRegs_RegMask;
return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
}
const uint32_t*
-ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
- return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
- ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask;
+ARMBaseRegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
}
const uint32_t*
-ARMBaseRegisterInfo::getNoPreservedMask() const {
- return CSR_NoRegs_RegMask;
+ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
+ // This should return a register mask that is the same as that returned by
+ // getCallPreservedMask but that additionally preserves the register used for
+ // the first i32 argument (which must also be the register used to return a
+ // single i32 return value)
+ //
+ // In case that the calling convention does not use the same register for
+ // both or otherwise does not want to enable this optimization, the function
+ // should return NULL
+ if (CC == CallingConv::GHC)
+ // This is academic becase all GHC calls are (supposed to be) tail calls
+ return NULL;
+ return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask;
}
BitVector ARMBaseRegisterInfo::
@@ -94,6 +121,7 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(ARM::SP);
Reserved.set(ARM::PC);
Reserved.set(ARM::FPSCR);
+ Reserved.set(ARM::APSR_NZCV);
if (TFI->hasFP(MF))
Reserved.set(FramePtr);
if (hasBasePointer(MF))
@@ -309,7 +337,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
// 1. Dynamic stack realignment is explicitly disabled,
// 2. This is a Thumb1 function (it's not useful, so we don't bother), or
// 3. There are VLAs in the function and the base pointer is disabled.
- if (!MF.getTarget().Options.RealignStack)
+ if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
return false;
if (AFI->isThumb1OnlyFunction())
return false;
@@ -357,14 +385,6 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return ARM::SP;
}
-unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
- llvm_unreachable("What is the exception register");
-}
-
-unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
- llvm_unreachable("What is the exception handler register");
-}
-
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
void ARMBaseRegisterInfo::
@@ -375,6 +395,7 @@ emitLoadConstPool(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred,
unsigned PredReg, unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C =
ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
@@ -556,9 +577,10 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
if (Ins != MBB->end())
DL = Ins->getDebugLoc();
- const MCInstrDesc &MCID = TII.get(ADDriOpc);
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const MCInstrDesc &MCID = TII.get(ADDriOpc);
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg)
@@ -574,6 +596,8 @@ ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
MachineInstr &MI = *I;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
int Off = Offset; // ARM doesn't need the general 64-bit offsets
unsigned i = 0;
@@ -671,6 +695,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
const ARMFrameLowering *TFI =
static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -696,12 +722,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
#endif // NDEBUG
- // Special handling of dbg_value instructions.
- if (MI.isDebugValue()) {
- MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
- return;
- }
+ assert(!MI.isDebugValue() && "DBG_VALUEs should be handled in target-independent code");
// Modify MI as necessary to handle as much of 'Offset' as possible
bool Done = false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 0679919152c0..e28fff68f4e2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -72,9 +72,16 @@ static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
}
}
+static inline bool isCalleeSavedRegister(unsigned Reg,
+ const MCPhysReg *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
protected:
- const ARMBaseInstrInfo &TII;
const ARMSubtarget &STI;
/// FramePtr - ARM physical register used as frame ptr.
@@ -86,8 +93,7 @@ protected:
unsigned BasePtr;
// Can be only subclassed.
- explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
- const ARMSubtarget &STI);
+ explicit ARMBaseRegisterInfo(const ARMSubtarget &STI);
// Return the opcode that implements 'Op', or 0 if no opcode
unsigned getOpcode(int Op) const;
@@ -96,9 +102,18 @@ public:
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID) const;
- const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
const uint32_t *getNoPreservedMask() const;
+ /// getThisReturnPreservedMask - Returns a call preserved mask specific to the
+ /// case that 'returned' is on an i32 first argument if the calling convention
+ /// is one that can (partially) model this attribute with a preserved mask
+ /// (i.e. it is a calling convention that uses the same register for the first
+ /// i32 argument and an i32 return value)
+ ///
+ /// Should return NULL in the case that the calling convention does not have
+ /// this property
+ const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
+
BitVector getReservedRegs(const MachineFunction &MF) const;
const TargetRegisterClass*
@@ -142,10 +157,6 @@ public:
unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getBaseRegister() const { return BasePtr; }
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
-
bool isLowRegister(unsigned Reg) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
index 11bd6a4a8dbc..b16d4ef54b6d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
@@ -15,11 +15,13 @@
#ifndef __TARGET_ARMBUILDATTRS_H__
#define __TARGET_ARMBUILDATTRS_H__
+namespace llvm {
namespace ARMBuildAttrs {
+
enum SpecialAttr {
// This is for the .cpu asm attr. It translates into one or more
// AttrType (below) entries in the .ARM.attributes section in the ELF.
- SEL_CPU
+ SEL_CPU
};
enum AttrType {
@@ -57,7 +59,7 @@ namespace ARMBuildAttrs {
ABI_FP_optimization_goals = 31,
compatibility = 32,
CPU_unaligned_access = 34,
- VFP_HP_extension = 36,
+ FP_HP_extension = 36,
ABI_FP_16bit_format = 38,
MPextension_use = 42, // was 70, 2.08 ABI
DIV_use = 44,
@@ -89,10 +91,11 @@ namespace ARMBuildAttrs {
v7 = 10, // e.g. Cortex A8, Cortex M3
v6_M = 11, // e.g. Cortex M1
v6S_M = 12, // v6_M with the System extensions
- v7E_M = 13 // v7_M with DSP extensions
+ v7E_M = 13, // v7_M with DSP extensions
+ v8 = 14 // v8, AArch32
};
- enum CPUArchProfile { // (=7), uleb128
+ enum CPUArchProfile { // (=7), uleb128
Not_Applicable = 0, // pre v7, or cross-profile code
ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8)
RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4)
@@ -101,31 +104,67 @@ namespace ARMBuildAttrs {
};
// The following have a lot of common use cases
- enum {
- //ARMISAUse (=8), uleb128 and THUMBISAUse (=9), uleb128
+ enum {
Not_Allowed = 0,
Allowed = 1,
- // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10)
+ // Tag_ARM_ISA_use (=8), uleb128
+
+ // Tag_THUMB_ISA_use, (=9), uleb128
+ AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions)
+
+ // Tag_FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10)
AllowFPv2 = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA)
AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA)
- AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31
- AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA)
+ AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31
+ AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA)
AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31
+ AllowFPARMv8A = 7, // Use of the ARM v8-A FP ISA was permitted
+ AllowFPARMv8B = 8, // Use of the ARM v8-A FP ISA was permitted, but only D0-D15, S0-S31
// Tag_WMMX_arch, (=11), uleb128
- AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions)
-
- // Tag_WMMX_arch, (=11), uleb128
- AllowWMMXv1 = 2, // The user permitted this entity to use WMMX v2
+ AllowWMMXv1 = 1, // The user permitted this entity to use WMMX v1
+ AllowWMMXv2 = 2, // The user permitted this entity to use WMMX v2
+
+ // Tag_Advanced_SIMD_arch, (=12), uleb128
+ AllowNeon = 1, // SIMDv1 was permitted
+ AllowNeon2 = 2, // SIMDv2 was permitted (Half-precision FP, MAC operations)
+ AllowNeonARMv8 = 3, // ARM v8-A SIMD was permitted
- // Tag_ABI_FP_denormal, (=20), uleb128
+ // Tag_ABI_FP_denormal, (=20), uleb128
PreserveFPSign = 2, // sign when flushed-to-zero is preserved
// Tag_ABI_FP_number_model, (=23), uleb128
AllowRTABI = 2, // numbers, infinities, and one quiet NaN (see [RTABI])
- AllowIEE754 = 3 // this code to use all the IEEE 754-defined FP encodings
+ AllowIEE754 = 3, // this code to use all the IEEE 754-defined FP encodings
+
+ // Tag_ABI_HardFP_use, (=27), uleb128
+ HardFPImplied = 0, // FP use should be implied by Tag_FP_arch
+ HardFPSinglePrecision = 1, // Single-precision only
+
+ // Tag_ABI_VFP_args, (=28), uleb128
+ BaseAAPCS = 0,
+ HardFPAAPCS = 1,
+
+ // Tag_FP_HP_extension, (=36), uleb128
+ AllowHPFP = 1, // Allow use of Half Precision FP
+
+ // Tag_MPextension_use, (=42), uleb128
+ AllowMP = 1, // Allow use of MP extensions
+
+ // Tag_DIV_use, (=44), uleb128
+ AllowDIVIfExists = 0, // Allow hardware divide if available in arch, or no info exists.
+ DisallowDIV = 1, // Hardware divide explicitly disallowed
+ AllowDIVExt = 2, // Allow hardware divide as optional architecture extension above
+ // the base arch specified by Tag_CPU_arch and Tag_CPU_arch_profile.
+
+ // Tag_Virtualization_use, (=68), uleb128
+ AllowTZ = 1,
+ AllowVirtualization = 2,
+ AllowTZVirtualization = 3
};
-}
+
+} // namespace ARMBuildAttrs
+} // namespace llvm
#endif // __TARGET_ARMBUILDATTRS_H__
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index 8ff666ed2844..9bea4b2d68e9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -207,10 +207,24 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
- (sub CSR_AAPCS_ThisReturn, R9))>;
+ (sub CSR_AAPCS_ThisReturn, R9))>;
+
+// The "interrupt" attribute is used to generate code that is acceptable in
+// exception-handlers of various kinds. It makes us use a different return
+// instruction (handled elsewhere) and affects which registers we must return to
+// our "caller" in the same state as we receive them.
+
+// For most interrupts, all registers except SP and LR are shared with
+// user-space. We mark LR to be saved anyway, since this is what the ARM backend
+// generally does rather than tracking its liveness as a normal register.
+def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>;
+
+// The fast interrupt handlers have more private state and get their own copies
+// of R8-R12, in addition to SP and LR. As before, mark LR for saving too.
+
+// FIXME: we mark R11 as callee-saved since it's often the frame-pointer, and
+// current frame lowering expects to encounter it while processing callee-saved
+// registers.
+def CSR_FIQ : CalleeSavedRegs<(add LR, R11, (sequence "R%u", 7, 0))>;
+
-// GHC set of callee saved regs is empty as all those regs are
-// used for passing STG regs around
-// add is a workaround for not being able to compile empty list:
-// def CSR_GHC : CalleeSavedRegs<()>;
-def CSR_GHC : CalleeSavedRegs<(add)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
index 5e8e1739a984..568ca858c4d2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -167,6 +167,8 @@ namespace {
const { return 0; }
unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val)
const { return 0; }
+ unsigned NEONThumb2V8PostEncoder(const MachineInstr &MI,unsigned Val)
+ const { return 0; }
unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val)
const { return 0; }
unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
@@ -1044,8 +1046,8 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
return;
} else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) {
uint32_t v = ~MI.getOperand(2).getImm();
- int32_t lsb = CountTrailingZeros_32(v);
- int32_t msb = (32 - CountLeadingZeros_32(v)) - 1;
+ int32_t lsb = countTrailingZeros(v);
+ int32_t msb = (32 - countLeadingZeros(v)) - 1;
// Instr{20-16} = msb, Instr{11-7} = lsb
Binary |= (msb & 0x1F) << 16;
Binary |= (lsb & 0x1F) << 7;
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 4891609b336f..cff5ce27bca6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -128,7 +128,7 @@ namespace {
// If the block size isn't a multiple of the known bits, assume the
// worst case padding.
if (Size & ((1u << Bits) - 1))
- Bits = CountTrailingZeros_32(Size);
+ Bits = countTrailingZeros(Size);
return Bits;
}
@@ -753,6 +753,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
Scale = 4;
break;
+ case ARM::LDRBi12:
case ARM::LDRi12:
case ARM::LDRcp:
case ARM::t2LDRpci:
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
index 4e703ec3c1a8..7d41c69f08b8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -163,21 +163,7 @@ const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const {
int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
- unsigned AlignMask = Alignment - 1;
- const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
- for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
- if (Constants[i].isMachineConstantPoolEntry() &&
- (Constants[i].getAlignment() & AlignMask) == 0) {
- ARMConstantPoolValue *CPV =
- (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
- ARMConstantPoolConstant *APC = dyn_cast<ARMConstantPoolConstant>(CPV);
- if (!APC) continue;
- if (APC->CVal == CVal && equals(APC))
- return i;
- }
- }
-
- return -1;
+ return getExistingMachineCPValueImpl<ARMConstantPoolConstant>(CP, Alignment);
}
bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) {
@@ -216,22 +202,7 @@ ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
- unsigned AlignMask = Alignment - 1;
- const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
- for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
- if (Constants[i].isMachineConstantPoolEntry() &&
- (Constants[i].getAlignment() & AlignMask) == 0) {
- ARMConstantPoolValue *CPV =
- (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
- ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV);
- if (!APS) continue;
-
- if (APS->S == S && equals(APS))
- return i;
- }
- }
-
- return -1;
+ return getExistingMachineCPValueImpl<ARMConstantPoolSymbol>(CP, Alignment);
}
bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) {
@@ -271,22 +242,7 @@ ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C,
int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
- unsigned AlignMask = Alignment - 1;
- const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
- for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
- if (Constants[i].isMachineConstantPoolEntry() &&
- (Constants[i].getAlignment() & AlignMask) == 0) {
- ARMConstantPoolValue *CPV =
- (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
- ARMConstantPoolMBB *APMBB = dyn_cast<ARMConstantPoolMBB>(CPV);
- if (!APMBB) continue;
-
- if (APMBB->MBB == MBB && equals(APMBB))
- return i;
- }
- }
-
- return -1;
+ return getExistingMachineCPValueImpl<ARMConstantPoolMBB>(CP, Alignment);
}
bool ARMConstantPoolMBB::hasSameValue(ARMConstantPoolValue *ACPV) {
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
index 93812fe6bb37..7ae7bf46f19d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -15,6 +15,7 @@
#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include <cstddef>
@@ -64,6 +65,26 @@ protected:
ARMConstantPoolValue(LLVMContext &C, unsigned id, ARMCP::ARMCPKind Kind,
unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
bool AddCurrentAddress);
+
+ template <typename Derived>
+ int getExistingMachineCPValueImpl(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ if (Derived *APC = dyn_cast<Derived>(CPV))
+ if (cast<Derived>(this)->equals(APC))
+ return i;
+ }
+ }
+
+ return -1;
+ }
+
public:
virtual ~ARMConstantPoolValue();
@@ -156,6 +177,10 @@ public:
static bool classof(const ARMConstantPoolValue *APV) {
return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA();
}
+
+ bool equals(const ARMConstantPoolConstant *A) const {
+ return CVal == A->CVal && ARMConstantPoolValue::equals(A);
+ }
};
/// ARMConstantPoolSymbol - ARM-specific constantpool values for external
@@ -187,6 +212,10 @@ public:
static bool classof(const ARMConstantPoolValue *ACPV) {
return ACPV->isExtSymbol();
}
+
+ bool equals(const ARMConstantPoolSymbol *A) const {
+ return S == A->S && ARMConstantPoolValue::equals(A);
+ }
};
/// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic
@@ -219,6 +248,10 @@ public:
static bool classof(const ARMConstantPoolValue *ACPV) {
return ACPV->isMachineBasicBlock();
}
+
+ bool equals(const ARMConstantPoolMBB *A) const {
+ return MBB == A->MBB && ARMConstantPoolValue::equals(A);
+ }
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index beb843ca9aa8..e6f7f86c5587 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -692,10 +692,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
MI.getOperand(1).getReg())
- .addReg(MI.getOperand(2).getReg(),
- getKillRegState(MI.getOperand(2).isKill()))
+ .addOperand(MI.getOperand(2))
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg());
+ .addOperand(MI.getOperand(4));
MI.eraseFromParent();
return true;
@@ -705,10 +704,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
- .addReg(MI.getOperand(2).getReg(),
- getKillRegState(MI.getOperand(2).isKill()))
+ .addOperand(MI.getOperand(2))
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg())
+ .addOperand(MI.getOperand(4))
.addReg(0); // 's' bit
MI.eraseFromParent();
@@ -717,39 +715,36 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVCCsi: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
(MI.getOperand(1).getReg()))
- .addReg(MI.getOperand(2).getReg(),
- getKillRegState(MI.getOperand(2).isKill()))
+ .addOperand(MI.getOperand(2))
.addImm(MI.getOperand(3).getImm())
.addImm(MI.getOperand(4).getImm()) // 'pred'
- .addReg(MI.getOperand(5).getReg())
+ .addOperand(MI.getOperand(5))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
-
case ARM::MOVCCsr: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
(MI.getOperand(1).getReg()))
- .addReg(MI.getOperand(2).getReg(),
- getKillRegState(MI.getOperand(2).isKill()))
- .addReg(MI.getOperand(3).getReg(),
- getKillRegState(MI.getOperand(3).isKill()))
+ .addOperand(MI.getOperand(2))
+ .addOperand(MI.getOperand(3))
.addImm(MI.getOperand(4).getImm())
.addImm(MI.getOperand(5).getImm()) // 'pred'
- .addReg(MI.getOperand(6).getReg())
+ .addOperand(MI.getOperand(6))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
+ case ARM::t2MOVCCi16:
case ARM::MOVCCi16: {
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16),
+ unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
MI.getOperand(1).getReg())
.addImm(MI.getOperand(2).getImm())
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg());
-
+ .addOperand(MI.getOperand(4));
MI.eraseFromParent();
return true;
}
@@ -760,23 +755,47 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.getOperand(1).getReg())
.addImm(MI.getOperand(2).getImm())
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg())
+ .addOperand(MI.getOperand(4))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
+ case ARM::t2MVNCCi:
case ARM::MVNCCi: {
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi),
+ unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
.addImm(MI.getOperand(2).getImm())
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg())
+ .addOperand(MI.getOperand(4))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
+ case ARM::t2MOVCClsl:
+ case ARM::t2MOVCClsr:
+ case ARM::t2MOVCCasr:
+ case ARM::t2MOVCCror: {
+ unsigned NewOpc;
+ switch (Opcode) {
+ case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break;
+ case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break;
+ case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break;
+ case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break;
+ default: llvm_unreachable("unexpeced conditional move");
+ }
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
+ MI.getOperand(1).getReg())
+ .addOperand(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .addOperand(MI.getOperand(5))
+ .addReg(0); // 's' bit
+ MI.eraseFromParent();
+ return true;
+ }
case ARM::Int_eh_sjlj_dispatchsetup: {
MachineFunction &MF = *MI.getParent()->getParent();
const ARMBaseInstrInfo *AII =
@@ -823,7 +842,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVsrl_flag:
case ARM::MOVsra_flag: {
- // These are just fancy MOVs insructions.
+ // These are just fancy MOVs instructions.
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
MI.getOperand(0).getReg())
.addOperand(MI.getOperand(1))
@@ -938,6 +957,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandMOV32BitImm(MBB, MBBI);
return true;
+ case ARM::SUBS_PC_LR: {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
+ .addReg(ARM::LR)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(2))
+ .addReg(ARM::CPSR, RegState::Undef);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
case ARM::VLDMQIA: {
unsigned NewOpc = ARM::VLDMDIA;
MachineInstrBuilder MIB =
diff --git a/contrib/llvm/lib/Target/ARM/ARMFPUName.def b/contrib/llvm/lib/Target/ARM/ARMFPUName.def
new file mode 100644
index 000000000000..9a1bbe703d99
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFPUName.def
@@ -0,0 +1,32 @@
+//===-- ARMFPUName.def - List of the ARM FPU names --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the list of the supported ARM FPU names.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef ARM_FPU_NAME
+#error "You must define ARM_FPU_NAME(NAME, ID) before including ARMFPUName.h"
+#endif
+
+ARM_FPU_NAME("vfp", VFP)
+ARM_FPU_NAME("vfpv2", VFPV2)
+ARM_FPU_NAME("vfpv3", VFPV3)
+ARM_FPU_NAME("vfpv3-d16", VFPV3_D16)
+ARM_FPU_NAME("vfpv4", VFPV4)
+ARM_FPU_NAME("vfpv4-d16", VFPV4_D16)
+ARM_FPU_NAME("fp-armv8", FP_ARMV8)
+ARM_FPU_NAME("neon", NEON)
+ARM_FPU_NAME("neon-vfpv4", NEON_VFPV4)
+ARM_FPU_NAME("neon-fp-armv8", NEON_FP_ARMV8)
+ARM_FPU_NAME("crypto-neon-fp-armv8", CRYPTO_NEON_FP_ARMV8)
+
+#undef ARM_FPU_NAME
diff --git a/contrib/llvm/lib/Target/ARM/ARMFPUName.h b/contrib/llvm/lib/Target/ARM/ARMFPUName.h
new file mode 100644
index 000000000000..2a64cce4880d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFPUName.h
@@ -0,0 +1,26 @@
+//===-- ARMFPUName.h - List of the ARM FPU names ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMFPUNAME_H
+#define ARMFPUNAME_H
+
+namespace llvm {
+namespace ARM {
+
+enum FPUKind {
+ INVALID_FPU = 0
+
+#define ARM_FPU_NAME(NAME, ID) , ID
+#include "ARMFPUName.def"
+};
+
+} // namespace ARM
+} // namespace llvm
+
+#endif // ARMFPUNAME_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 5d45f6491240..a4004f32db37 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -20,6 +20,7 @@
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -175,6 +176,8 @@ class ARMFastISel : public FastISel {
// Utility routines.
private:
+ unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned OpNum,
+ unsigned Op);
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
@@ -251,10 +254,10 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
const MCInstrDesc &MCID = MI->getDesc();
- // If we're a thumb2 or not NEON function we were handled via isPredicable.
+ // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
AFI->isThumb2Function())
- return false;
+ return MI->isPredicable();
for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
if (MCID.OpInfo[i].isPredicate())
@@ -275,7 +278,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
// Do we use a predicate? or...
// Are we NEON in ARM mode and have a predicate operand? If so, I know
// we're not predicable but add it anyways.
- if (TII.isPredicable(MI) || isARMNEONPred(MI))
+ if (isARMNEONPred(MI))
AddDefaultPred(MIB);
// Do we optionally set a predicate? Preds is size > 0 iff the predicate
@@ -290,6 +293,23 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
return MIB;
}
+unsigned ARMFastISel::constrainOperandRegClass(const MCInstrDesc &II,
+ unsigned Op, unsigned OpNum) {
+ if (TargetRegisterInfo::isVirtualRegister(Op)) {
+ const TargetRegisterClass *RegClass =
+ TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
+ if (!MRI.constrainRegClass(Op, RegClass)) {
+ // If it's not legal to COPY between the register classes, something
+ // has gone very wrong before we got here.
+ unsigned NewOp = createResultReg(RegClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), NewOp).addReg(Op));
+ return NewOp;
+ }
+ }
+ return Op;
+}
+
unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
const TargetRegisterClass* RC) {
unsigned ResultReg = createResultReg(RC);
@@ -305,6 +325,9 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operand is sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill));
@@ -325,6 +348,11 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operands are sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
+ Op1 = constrainOperandRegClass(II, Op1, 2);
+
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -348,6 +376,12 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operands are sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
+ Op1 = constrainOperandRegClass(II, Op1, 2);
+ Op2 = constrainOperandRegClass(II, Op1, 3);
+
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -372,6 +406,9 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operand is sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -394,6 +431,9 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operand is sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -417,6 +457,10 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operands are sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
+ Op1 = constrainOperandRegClass(II, Op1, 2);
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -609,6 +653,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
.addConstantPoolIndex(Idx));
else
// The extra immediate is for addrmode2.
+ DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::LDRcp), DestReg)
.addConstantPoolIndex(Idx)
@@ -628,6 +673,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
(const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned DestReg = createResultReg(RC);
+ // FastISel TLS support on non-Darwin is broken, punt to SelectionDAG.
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ bool IsThreadLocal = GVar && GVar->isThreadLocal();
+ if (!Subtarget->isTargetDarwin() && IsThreadLocal) return 0;
+
// Use movw+movt when possible, it avoids constant pool entries.
// Darwin targets don't support movt with Reloc::Static, see
// ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support
@@ -679,6 +729,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
AddOptionalDefs(MIB);
} else {
// The extra immediate is for addrmode2.
+ DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
DestReg)
.addConstantPoolIndex(Idx)
@@ -814,22 +865,19 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
switch (Opcode) {
default:
break;
- case Instruction::BitCast: {
+ case Instruction::BitCast:
// Look through bitcasts.
return ARMComputeAddress(U->getOperand(0), Addr);
- }
- case Instruction::IntToPtr: {
+ case Instruction::IntToPtr:
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
return ARMComputeAddress(U->getOperand(0), Addr);
break;
- }
- case Instruction::PtrToInt: {
+ case Instruction::PtrToInt:
// Look past no-op ptrtoints.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
return ARMComputeAddress(U->getOperand(0), Addr);
break;
- }
case Instruction::GetElementPtr: {
Address SavedAddr = Addr;
int TmpOffset = Addr.Offset;
@@ -852,13 +900,8 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
TmpOffset += CI->getSExtValue() * S;
break;
}
- if (isa<AddOperator>(Op) &&
- (!isa<Instruction>(Op) ||
- FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
- == FuncInfo.MBB) &&
- isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
- // An add (in the same block) with a constant operand. Fold the
- // constant.
+ if (canFoldAddIntoGEP(U, Op)) {
+ // A compatible add with a constant operand. Fold the constant.
ConstantInt *CI =
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
TmpOffset += CI->getSExtValue() * S;
@@ -1025,7 +1068,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
useAM3 = true;
}
}
- RC = &ARM::GPRRegClass;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
break;
case MVT::i16:
if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
@@ -1040,7 +1083,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
useAM3 = true;
}
- RC = &ARM::GPRRegClass;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
break;
case MVT::i32:
if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
@@ -1054,7 +1097,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
} else {
Opc = ARM::LDRi12;
}
- RC = &ARM::GPRRegClass;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
break;
case MVT::f32:
if (!Subtarget->hasVFP2()) return false;
@@ -1063,7 +1106,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
needVMOV = true;
VT = MVT::i32;
Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
- RC = &ARM::GPRRegClass;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
} else {
Opc = ARM::VLDRS;
RC = TLI.getRegClassFor(VT);
@@ -1136,6 +1179,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
(const TargetRegisterClass*)&ARM::tGPRRegClass :
(const TargetRegisterClass*)&ARM::GPRRegClass);
unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
+ SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), Res)
.addReg(SrcReg).addImm(1));
@@ -1207,6 +1251,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
ARMSimplifyAddress(Addr, VT, useAM3);
// Create the base instruction, then add the operands.
+ SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(StrOpc))
.addReg(SrcReg);
@@ -1330,6 +1375,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
(isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
unsigned OpReg = getRegForValue(TI->getOperand(0));
+ OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TstOpc))
.addReg(OpReg).addImm(1));
@@ -1367,6 +1413,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// and it left a value for us in a virtual register. Ergo, we test
// the one-bit value left in the virtual register.
unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
+ CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
.addReg(CmpReg).addImm(1));
@@ -1491,13 +1538,15 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
}
}
+ const MCInstrDesc &II = TII.get(CmpOpc);
+ SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0);
if (!UseImm) {
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CmpOpc))
+ SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(SrcReg1).addReg(SrcReg2));
} else {
MachineInstrBuilder MIB;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(SrcReg1);
// Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
@@ -1696,6 +1745,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
}
unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
+ CondReg = constrainOperandRegClass(TII.get(CmpOpc), CondReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
.addReg(CondReg).addImm(0));
@@ -1712,12 +1762,16 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
}
unsigned ResultReg = createResultReg(RC);
- if (!UseImm)
+ if (!UseImm) {
+ Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1);
+ Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
.addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR);
- else
+ } else {
+ Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
.addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ }
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1802,7 +1856,9 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
unsigned SrcReg2 = getRegForValue(I->getOperand(1));
if (SrcReg2 == 0) return false;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
+ SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1);
+ SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addReg(SrcReg1).addReg(SrcReg2));
@@ -1930,7 +1986,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
!VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
return false;
} else {
- switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) {
+ switch (ArgVT.SimpleTy) {
default:
return false;
case MVT::i1:
@@ -1985,7 +2041,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
- assert (Arg != 0 && "Failed to emit a sext");
+ assert (Arg != 0 && "Failed to emit a zext");
ArgVT = DestVT;
break;
}
@@ -2182,10 +2238,14 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
}
unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
+ // Manually compute the global's type to avoid building it when unnecessary.
+ Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);
+ EVT LCREVT = TLI.getValueType(GVTy);
+ if (!LCREVT.isSimple()) return 0;
+
GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
GlobalValue::ExternalLinkage, 0, Name);
- EVT LCREVT = TLI.getValueType(GV->getType());
- if (!LCREVT.isSimple()) return 0;
+ assert(GV->getType() == GVTy && "We miscomputed the type for the global!");
return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
}
@@ -2403,15 +2463,22 @@ bool ARMFastISel::SelectCall(const Instruction *I,
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(CallOpc));
+ unsigned char OpFlags = 0;
+
+ // Add MO_PLT for global address or external symbol in the PIC relocation
+ // model.
+ if (Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+
// ARM calls don't take a predicate, but tBL / tBLX do.
if(isThumb2)
AddDefaultPred(MIB);
if (UseReg)
MIB.addReg(CalleeReg);
else if (!IntrMemName)
- MIB.addGlobalAddress(GV, 0, 0);
+ MIB.addGlobalAddress(GV, 0, OpFlags);
else
- MIB.addExternalSymbol(IntrMemName, 0);
+ MIB.addExternalSymbol(IntrMemName, OpFlags);
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
@@ -2602,47 +2669,136 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
bool isZExt) {
if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
return 0;
+ if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
+ return 0;
- unsigned Opc;
- bool isBoolZext = false;
- const TargetRegisterClass *RC;
- switch (SrcVT.SimpleTy) {
- default: return 0;
- case MVT::i16:
- if (!Subtarget->hasV6Ops()) return 0;
- RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
- if (isZExt)
- Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
- else
- Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
- break;
- case MVT::i8:
- if (!Subtarget->hasV6Ops()) return 0;
- RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
- if (isZExt)
- Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
- else
- Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
- break;
- case MVT::i1:
- if (isZExt) {
- RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
- Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
- isBoolZext = true;
- break;
+ // Table of which combinations can be emitted as a single instruction,
+ // and which will require two.
+ static const uint8_t isSingleInstrTbl[3][2][2][2] = {
+ // ARM Thumb
+ // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops
+ // ext: s z s z s z s z
+ /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
+ /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
+ /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
+ };
+
+ // Target registers for:
+ // - For ARM can never be PC.
+ // - For 16-bit Thumb are restricted to lower 8 registers.
+ // - For 32-bit Thumb are restricted to non-SP and non-PC.
+ static const TargetRegisterClass *RCTbl[2][2] = {
+ // Instructions: Two Single
+ /* ARM */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
+ /* Thumb */ { &ARM::tGPRRegClass, &ARM::rGPRRegClass }
+ };
+
+ // Table governing the instruction(s) to be emitted.
+ static const struct InstructionTable {
+ uint32_t Opc : 16;
+ uint32_t hasS : 1; // Some instructions have an S bit, always set it to 0.
+ uint32_t Shift : 7; // For shift operand addressing mode, used by MOVsi.
+ uint32_t Imm : 8; // All instructions have either a shift or a mask.
+ } IT[2][2][3][2] = {
+ { // Two instructions (first is left shift, second is in this table).
+ { // ARM Opc S Shift Imm
+ /* 1 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 31 },
+ /* 1 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 31 } },
+ /* 8 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 24 },
+ /* 8 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 24 } },
+ /* 16 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 16 },
+ /* 16 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 16 } }
+ },
+ { // Thumb Opc S Shift Imm
+ /* 1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 31 },
+ /* 1 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 31 } },
+ /* 8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 24 },
+ /* 8 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 24 } },
+ /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 16 },
+ /* 16 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 16 } }
+ }
+ },
+ { // Single instruction.
+ { // ARM Opc S Shift Imm
+ /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 },
+ /* 1 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 1 } },
+ /* 8 bit sext */ { { ARM::SXTB , 0, ARM_AM::no_shift, 0 },
+ /* 8 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 255 } },
+ /* 16 bit sext */ { { ARM::SXTH , 0, ARM_AM::no_shift, 0 },
+ /* 16 bit zext */ { ARM::UXTH , 0, ARM_AM::no_shift, 0 } }
+ },
+ { // Thumb Opc S Shift Imm
+ /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 },
+ /* 1 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 1 } },
+ /* 8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift, 0 },
+ /* 8 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } },
+ /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift, 0 },
+ /* 16 bit zext */ { ARM::t2UXTH , 0, ARM_AM::no_shift, 0 } }
+ }
}
- return 0;
+ };
+
+ unsigned SrcBits = SrcVT.getSizeInBits();
+ unsigned DestBits = DestVT.getSizeInBits();
+ (void) DestBits;
+ assert((SrcBits < DestBits) && "can only extend to larger types");
+ assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&
+ "other sizes unimplemented");
+ assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&
+ "other sizes unimplemented");
+
+ bool hasV6Ops = Subtarget->hasV6Ops();
+ unsigned Bitness = SrcBits / 8; // {1,8,16}=>{0,1,2}
+ assert((Bitness < 3) && "sanity-check table bounds");
+
+ bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
+ const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
+ const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt];
+ unsigned Opc = ITP->Opc;
+ assert(ARM::KILL != Opc && "Invalid table entry");
+ unsigned hasS = ITP->hasS;
+ ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift;
+ assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) &&
+ "only MOVsi has shift operand addressing mode");
+ unsigned Imm = ITP->Imm;
+
+ // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
+ bool setsCPSR = &ARM::tGPRRegClass == RC;
+ unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi;
+ unsigned ResultReg;
+ // MOVsi encodes shift and immediate in shift operand addressing mode.
+ // The following condition has the same value when emitting two
+ // instruction sequences: both are shifts.
+ bool ImmIsSO = (Shift != ARM_AM::no_shift);
+
+ // Either one or two instructions are emitted.
+ // They're always of the form:
+ // dst = in OP imm
+ // CPSR is set only by 16-bit Thumb instructions.
+ // Predicate, if any, is AL.
+ // S bit, if available, is always 0.
+ // When two are emitted the first's result will feed as the second's input,
+ // that value is then dead.
+ unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2;
+ for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) {
+ ResultReg = createResultReg(RC);
+ bool isLsl = (0 == Instr) && !isSingleInstr;
+ unsigned Opcode = isLsl ? LSLOpc : Opc;
+ ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift;
+ unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm;
+ bool isKill = 1 == Instr;
+ MachineInstrBuilder MIB = BuildMI(
+ *FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opcode), ResultReg);
+ if (setsCPSR)
+ MIB.addReg(ARM::CPSR, RegState::Define);
+ SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
+ AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc));
+ if (hasS)
+ AddDefaultCC(MIB);
+ // Second instruction consumes the first's result.
+ SrcReg = ResultReg;
}
- unsigned ResultReg = createResultReg(RC);
- MachineInstrBuilder MIB;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
- .addReg(SrcReg);
- if (isBoolZext)
- MIB.addImm(1);
- else
- MIB.addImm(0);
- AddOptionalDefs(MIB);
return ResultReg;
}
@@ -2707,7 +2863,7 @@ bool ARMFastISel::SelectShift(const Instruction *I,
if (Reg2 == 0) return false;
}
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
if(ResultReg == 0) return false;
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -2797,6 +2953,25 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
return false;
}
+namespace {
+// This table describes sign- and zero-extend instructions which can be
+// folded into a preceding load. All of these extends have an immediate
+// (sometimes a mask and sometimes a shift) that's applied after
+// extension.
+const struct FoldableLoadExtendsStruct {
+ uint16_t Opc[2]; // ARM, Thumb.
+ uint8_t ExpectedImm;
+ uint8_t isZExt : 1;
+ uint8_t ExpectedVT : 7;
+} FoldableLoadExtends[] = {
+ { { ARM::SXTH, ARM::t2SXTH }, 0, 0, MVT::i16 },
+ { { ARM::UXTH, ARM::t2UXTH }, 0, 1, MVT::i16 },
+ { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8 },
+ { { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 },
+ { { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 }
+};
+}
+
/// \brief The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
/// try to fold the load as an operand to the instruction, returning true if
@@ -2812,26 +2987,23 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
// ldrb r1, [r0] ldrb r1, [r0]
// uxtb r2, r1 =>
// mov r3, r2 mov r3, r1
- bool isZExt = true;
- switch(MI->getOpcode()) {
- default: return false;
- case ARM::SXTH:
- case ARM::t2SXTH:
- isZExt = false;
- case ARM::UXTH:
- case ARM::t2UXTH:
- if (VT != MVT::i16)
- return false;
- break;
- case ARM::SXTB:
- case ARM::t2SXTB:
- isZExt = false;
- case ARM::UXTB:
- case ARM::t2UXTB:
- if (VT != MVT::i8)
- return false;
- break;
+ if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm())
+ return false;
+ const uint64_t Imm = MI->getOperand(2).getImm();
+
+ bool Found = false;
+ bool isZExt;
+ for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends);
+ i != e; ++i) {
+ if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() &&
+ (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm &&
+ MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) {
+ Found = true;
+ isZExt = FoldableLoadExtends[i].isZExt;
+ }
}
+ if (!Found) return false;
+
// See if we can handle this address.
Address Addr;
if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
@@ -2854,12 +3026,14 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
// Load value.
if (isThumb2) {
+ DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::t2LDRpci), DestReg1)
.addConstantPoolIndex(Idx));
Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
} else {
// The extra immediate is for addrmode2.
+ DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(ARM::LDRcp), DestReg1)
.addConstantPoolIndex(Idx).addImm(0));
@@ -2873,6 +3047,9 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
}
unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
+ DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0);
+ DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1);
+ GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(Opc), DestReg2)
.addReg(DestReg1)
@@ -2938,12 +3115,10 @@ bool ARMFastISel::FastLowerArguments() {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
- const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+ const TargetRegisterClass *RC = &ARM::rGPRRegClass;
Idx = 0;
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I, ++Idx) {
- if (I->use_empty())
- continue;
unsigned SrcReg = GPRArgRegs[Idx];
unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
@@ -2961,13 +3136,23 @@ bool ARMFastISel::FastLowerArguments() {
namespace llvm {
FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
- // Completely untested on non-iOS.
const TargetMachine &TM = funcInfo.MF->getTarget();
- // Darwin and thumb1 only for now.
const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
- if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only())
+ // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
+ bool UseFastISel = false;
+ UseFastISel |= Subtarget->isTargetIOS() && !Subtarget->isThumb1Only();
+ UseFastISel |= Subtarget->isTargetLinux() && !Subtarget->isThumb();
+ UseFastISel |= Subtarget->isTargetNaCl() && !Subtarget->isThumb();
+
+ if (UseFastISel) {
+ // iOS always has a FP for backtracking, force other targets
+ // to keep their FP when doing FastISel. The emitted code is
+ // currently superior, and in cases like test-suite's lencod
+ // FastISel isn't quite correct when FP is eliminated.
+ TM.Options.NoFramePointerElim = true;
return new ARMFastISel(funcInfo, libInfo);
+ }
return 0;
}
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFeatures.h b/contrib/llvm/lib/Target/ARM/ARMFeatures.h
new file mode 100644
index 000000000000..dafc4b3a82bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFeatures.h
@@ -0,0 +1,93 @@
+//===-- ARMFeatures.h - Checks for ARM instruction features ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the code shared between ARM CodeGen and ARM MC
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARM_FEATURES_H
+#define TARGET_ARM_FEATURES_H
+
+#include "ARM.h"
+
+namespace llvm {
+
+template<typename InstrType> // could be MachineInstr or MCInst
+inline bool isV8EligibleForIT(InstrType *Instr, int BLXOperandIndex = 0) {
+ switch (Instr->getOpcode()) {
+ default:
+ return false;
+ case ARM::tADC:
+ case ARM::tADDi3:
+ case ARM::tADDi8:
+ case ARM::tADDrSPi:
+ case ARM::tADDrr:
+ case ARM::tAND:
+ case ARM::tASRri:
+ case ARM::tASRrr:
+ case ARM::tBIC:
+ case ARM::tCMNz:
+ case ARM::tCMPi8:
+ case ARM::tCMPr:
+ case ARM::tEOR:
+ case ARM::tLDRBi:
+ case ARM::tLDRBr:
+ case ARM::tLDRHi:
+ case ARM::tLDRHr:
+ case ARM::tLDRSB:
+ case ARM::tLDRSH:
+ case ARM::tLDRi:
+ case ARM::tLDRr:
+ case ARM::tLDRspi:
+ case ARM::tLSLri:
+ case ARM::tLSLrr:
+ case ARM::tLSRri:
+ case ARM::tLSRrr:
+ case ARM::tMOVi8:
+ case ARM::tMUL:
+ case ARM::tMVN:
+ case ARM::tORR:
+ case ARM::tROR:
+ case ARM::tRSB:
+ case ARM::tSBC:
+ case ARM::tSTRBi:
+ case ARM::tSTRBr:
+ case ARM::tSTRHi:
+ case ARM::tSTRHr:
+ case ARM::tSTRi:
+ case ARM::tSTRr:
+ case ARM::tSTRspi:
+ case ARM::tSUBi3:
+ case ARM::tSUBi8:
+ case ARM::tSUBrr:
+ case ARM::tTST:
+ return true;
+// there are some "conditionally deprecated" opcodes
+ case ARM::tADDspr:
+ return Instr->getOperand(2).getReg() != ARM::PC;
+ // ADD PC, SP and BLX PC were always unpredictable,
+ // now on top of it they're deprecated
+ case ARM::tADDrSP:
+ case ARM::tBX:
+ return Instr->getOperand(0).getReg() != ARM::PC;
+ case ARM::tBLXr:
+ return Instr->getOperand(BLXOperandIndex).getReg() != ARM::PC;
+ case ARM::tADDhirr:
+ return Instr->getOperand(0).getReg() != ARM::PC &&
+ Instr->getOperand(2).getReg() != ARM::PC;
+ case ARM::tCMPhir:
+ case ARM::tMOVr:
+ return Instr->getOperand(0).getReg() != ARM::PC &&
+ Instr->getOperand(1).getReg() != ARM::PC;
+ }
+}
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 483802b130b5..d32bdbc58939 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -82,22 +82,11 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
}
-static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
- for (unsigned i = 0; CSRegs[i]; ++i)
- if (Reg == CSRegs[i])
- return true;
- return false;
-}
-
static bool isCSRestore(MachineInstr *MI,
const ARMBaseInstrInfo &TII,
const uint16_t *CSRegs) {
// Integer spill area is handled with "pop".
- if (MI->getOpcode() == ARM::LDMIA_RET ||
- MI->getOpcode() == ARM::t2LDMIA_RET ||
- MI->getOpcode() == ARM::LDMIA_UPD ||
- MI->getOpcode() == ARM::t2LDMIA_UPD ||
- MI->getOpcode() == ARM::VLDMDIA_UPD) {
+ if (isPopOpcode(MI->getOpcode())) {
// The first two operands are predicates. The last two are
// imp-def and imp-use of SP. Check everything in between.
for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
@@ -115,20 +104,31 @@ static bool isCSRestore(MachineInstr *MI,
return false;
}
-static void
-emitSPUpdate(bool isARM,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- DebugLoc dl, const ARMBaseInstrInfo &TII,
- int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
- ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ const ARMBaseInstrInfo &TII, unsigned DestReg,
+ unsigned SrcReg, int NumBytes,
+ unsigned MIFlags = MachineInstr::NoFlags,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0) {
if (isARM)
- emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
Pred, PredReg, TII, MIFlags);
else
- emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
Pred, PredReg, TII, MIFlags);
}
+static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ const ARMBaseInstrInfo &TII, int NumBytes,
+ unsigned MIFlags = MachineInstr::NoFlags,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0) {
+ emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
+ MIFlags, Pred, PredReg);
+}
+
void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -141,7 +141,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
assert(!AFI->isThumb1OnlyFunction() &&
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -174,6 +175,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned Reg = CSI[i].getReg();
int FI = CSI[i].getFrameIdx();
switch (Reg) {
+ case ARM::R0:
+ case ARM::R1:
+ case ARM::R2:
+ case ARM::R3:
case ARM::R4:
case ARM::R5:
case ARM::R6:
@@ -181,73 +186,61 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::LR:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- AFI->addGPRCalleeSavedArea1Frame(FI);
GPRCS1Size += 4;
break;
case ARM::R8:
case ARM::R9:
case ARM::R10:
case ARM::R11:
+ case ARM::R12:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- if (STI.isTargetIOS()) {
- AFI->addGPRCalleeSavedArea2Frame(FI);
+ if (STI.isTargetIOS())
GPRCS2Size += 4;
- } else {
- AFI->addGPRCalleeSavedArea1Frame(FI);
+ else
GPRCS1Size += 4;
- }
break;
default:
// This is a DPR. Exclude the aligned DPRCS2 spills.
if (Reg == ARM::D8)
D8SpillFI = FI;
- if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) {
- AFI->addDPRCalleeSavedAreaFrame(FI);
+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
DPRCSSize += 8;
- }
}
}
// Move past area 1.
- if (GPRCS1Size > 0) MBBI++;
-
- // Set FP to point to the stack slot that contains the previous FP.
- // For iOS, FP is R7, which has now been stored in spill area 1.
- // Otherwise, if this is not iOS, all the callee-saved registers go
- // into spill area 1, including the FP in R11. In either case, it is
- // now safe to emit this assignment.
- bool HasFP = hasFP(MF);
- if (HasFP) {
- unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
- .addFrameIndex(FramePtrSpillFI).addImm(0)
- .setMIFlag(MachineInstr::FrameSetup);
- AddDefaultCC(AddDefaultPred(MIB));
- }
-
- // Move past area 2.
- if (GPRCS2Size > 0) MBBI++;
+ MachineBasicBlock::iterator LastPush = MBB.end(), FramePtrPush;
+ if (GPRCS1Size > 0)
+ FramePtrPush = LastPush = MBBI++;
// Determine starting offsets of spill areas.
+ bool HasFP = hasFP(MF);
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
- if (HasFP)
+ int FramePtrOffsetInPush = 0;
+ if (HasFP) {
+ FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size;
AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
NumBytes);
+ }
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+ // Move past area 2.
+ if (GPRCS2Size > 0) {
+ LastPush = MBBI++;
+ }
+
// Move past area 3.
if (DPRCSSize > 0) {
- MBBI++;
+ LastPush = MBBI++;
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
- MBBI++;
+ LastPush = MBBI++;
}
// Move past the aligned DPRCS2 area.
@@ -263,8 +256,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
if (NumBytes) {
// Adjust SP after all the callee-save spills.
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
- MachineInstr::FrameSetup);
+ if (tryFoldSPUpdateIntoPushPop(MF, LastPush, NumBytes)) {
+ if (LastPush == FramePtrPush)
+ FramePtrOffsetInPush += NumBytes;
+ } else
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+ MachineInstr::FrameSetup);
+
if (HasFP && isARM)
// Restore from fp only in ARM mode: e.g. sub sp, r7, #24
// Note it's not safe to do this in Thumb2 mode because it would have
@@ -277,6 +275,18 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setShouldRestoreSPFromFP(true);
}
+ // Set FP to point to the stack slot that contains the previous FP.
+ // For iOS, FP is R7, which has now been stored in spill area 1.
+ // Otherwise, if this is not iOS, all the callee-saved registers go
+ // into spill area 1, including the FP in R11. In either case, it
+ // is in area one and the adjustment needs to take place just after
+ // that push.
+ if (HasFP)
+ emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, ++FramePtrPush, dl, TII,
+ FramePtr, ARM::SP, FramePtrOffsetInPush,
+ MachineInstr::FrameSetup);
+
+
if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
@@ -357,7 +367,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
"This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -371,11 +382,11 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
} else {
// Unwind MBBI to point to first LDR / VLDRD.
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
if (MBBI != MBB.begin()) {
- do
+ do {
--MBBI;
- while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
+ } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
if (!isCSRestore(MBBI, TII, CSRegs))
++MBBI;
}
@@ -419,8 +430,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
ARM::SP)
.addReg(FramePtr));
}
- } else if (NumBytes)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+ } else if (NumBytes && !tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes))
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
// Increment past our save areas.
if (AFI->getDPRCalleeSavedAreaSize()) {
@@ -499,12 +510,6 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
FrameReg = ARM::SP;
Offset += SPAdj;
- if (AFI->isGPRCalleeSavedArea1Frame(FI))
- return Offset - AFI->getGPRCalleeSavedArea1Offset();
- else if (AFI->isGPRCalleeSavedArea2Frame(FI))
- return Offset - AFI->getGPRCalleeSavedArea2Offset();
- else if (AFI->isDPRCalleeSavedAreaFrame(FI))
- return Offset - AFI->getDPRCalleeSavedAreaOffset();
// SP can move around if there are allocas. We may also lose track of SP
// when emergency spilling inside a non-reserved call frame setup.
@@ -656,6 +661,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
unsigned RetOpcode = MI->getOpcode();
bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
RetOpcode == ARM::TCRETURNri);
+ bool isInterrupt =
+ RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
SmallVector<unsigned, 4> Regs;
unsigned i = CSI.size();
@@ -670,7 +677,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
continue;
- if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
+ if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
+ STI.hasV5TOps()) {
Reg = ARM::PC;
LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
// Fold the return instruction into the LDM.
@@ -1197,7 +1205,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
@@ -1224,6 +1232,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
case ARM::LR:
LRSpilled = true;
// Fallthrough
+ case ARM::R0: case ARM::R1:
+ case ARM::R2: case ARM::R3:
case ARM::R4: case ARM::R5:
case ARM::R6: case ARM::R7:
CS1Spilled = true;
@@ -1238,6 +1248,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
switch (Reg) {
+ case ARM::R0: case ARM::R1:
+ case ARM::R2: case ARM::R3:
case ARM::R4: case ARM::R5:
case ARM::R6: case ARM::R7:
case ARM::LR:
@@ -1293,8 +1305,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (!LRSpilled && CS1Spilled) {
MRI.setPhysRegUsed(ARM::LR);
NumGPRSpills++;
- UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
- UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+ SmallVectorImpl<unsigned>::iterator LRPos;
+ LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
+ (unsigned)ARM::LR);
+ if (LRPos != UnspilledCS1GPRs.end())
+ UnspilledCS1GPRs.erase(LRPos);
+
ForceLRSpill = false;
ExtraCSSpill = true;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
index 1240169e84ed..c69d313fd9ce 100644
--- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -44,10 +44,16 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
MachineInstr *DefMI = LastMI;
const MCInstrDesc &LastMCID = LastMI->getDesc();
+ const TargetMachine &TM =
+ MI->getParent()->getParent()->getTarget();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+
// Skip over one non-VFP / NEON instruction.
if (!LastMI->isBarrier() &&
// On A9, AGU and NEON/FPU are muxed.
- !(STI.isLikeA9() && (LastMI->mayLoad() || LastMI->mayStore())) &&
+ !(TII.getSubtarget().isLikeA9() &&
+ (LastMI->mayLoad() || LastMI->mayStore())) &&
(LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
MachineBasicBlock::iterator I = LastMI;
if (I != LastMI->getParent()->begin()) {
@@ -58,7 +64,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (TII.isFpMLxInstruction(DefMI->getOpcode()) &&
(TII.canCauseFpMLxStall(MI->getOpcode()) ||
- hasRAWHazard(DefMI, MI, TRI))) {
+ hasRAWHazard(DefMI, MI, TII.getRegisterInfo()))) {
// Try to schedule another instruction for the next 4 cycles.
if (FpMLxStalls == 0)
FpMLxStalls = 4;
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
index 98bfc4cf0cc5..e1dcec3d1cc8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
@@ -28,21 +28,14 @@ class MachineInstr;
/// ARM preRA scheduler uses an unspecialized instance of the
/// ScoreboardHazardRecognizer.
class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
- const ARMBaseInstrInfo &TII;
- const ARMBaseRegisterInfo &TRI;
- const ARMSubtarget &STI;
-
MachineInstr *LastMI;
unsigned FpMLxStalls;
public:
ARMHazardRecognizer(const InstrItineraryData *ItinData,
- const ARMBaseInstrInfo &tii,
- const ARMBaseRegisterInfo &tri,
- const ARMSubtarget &sti,
- const ScheduleDAG *DAG) :
- ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
- TRI(tri), STI(sti), LastMI(0) {}
+ const ScheduleDAG *DAG)
+ : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"),
+ LastMI(0) {}
virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void Reset();
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 5e88e95d6162..87d15226947a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -61,7 +61,6 @@ enum AddrMode2Type {
class ARMDAGToDAGISel : public SelectionDAGISel {
ARMBaseTargetMachine &TM;
- const ARMBaseInstrInfo *TII;
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -71,7 +70,6 @@ public:
explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel), TM(tm),
- TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())),
Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
}
@@ -132,6 +130,13 @@ public:
return true;
}
+ bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
+ const ConstantSDNode *CN = cast<ConstantSDNode>(N);
+ Pred = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
+ return true;
+ }
+
bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
@@ -177,6 +182,7 @@ public:
SDValue &OffImm);
bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
SDValue &OffReg, SDValue &ShImm);
+ bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
inline bool is_so_imm(unsigned Imm) const {
return ARM_AM::getSOImmVal(Imm) != -1;
@@ -240,21 +246,6 @@ private:
/// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
- /// SelectCMOVOp - Select CMOV instructions for ARM.
- SDNode *SelectCMOVOp(SDNode *N);
- SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR,
- SDValue InFlag);
- SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR,
- SDValue InFlag);
- SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR,
- SDValue InFlag);
- SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR,
- SDValue InFlag);
-
// Select special operations if node forms integer ABS pattern
SDNode *SelectABSOp(SDNode *N);
@@ -262,7 +253,7 @@ private:
SDNode *SelectConcatVector(SDNode *N);
- SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+ SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
@@ -364,7 +355,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
continue;
// Check if the AND mask is an immediate of the form: 000.....1111111100
- unsigned TZ = CountTrailingZeros_32(And_imm);
+ unsigned TZ = countTrailingZeros(And_imm);
if (TZ != 1 && TZ != 2)
// Be conservative here. Shifter operands aren't always free. e.g. On
// Swift, left shifter operand of 1 / 2 for free but others are not.
@@ -402,12 +393,12 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
}
// Now make the transformation.
- Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32,
+ Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
Srl.getOperand(0),
CurDAG->getConstant(Srl_imm+TZ, MVT::i32));
- N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32,
+ N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
Srl, CurDAG->getConstant(And_imm, MVT::i32));
- N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32,
+ N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
N1, CurDAG->getConstant(TZ, MVT::i32));
CurDAG->UpdateNodeOperands(N, N0, N1);
}
@@ -423,7 +414,7 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
if (!CheckVMLxHazard)
return true;
- if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() &&
+ if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9() &&
!Subtarget->isSwift())
return true;
@@ -434,6 +425,9 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
if (Use->getOpcode() == ISD::CopyToReg)
return true;
if (Use->isMachineOpcode()) {
+ const ARMBaseInstrInfo *TII =
+ static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+
const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
if (MCID.mayStore())
return true;
@@ -533,7 +527,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI,
+ getTargetLowering()->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -557,7 +552,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI,
+ getTargetLowering()->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -703,7 +699,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI,
+ getTargetLowering()->getPointerTy());
} else if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
@@ -724,7 +721,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI,
+ getTargetLowering()->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -901,7 +899,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI,
+ getTargetLowering()->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
@@ -915,7 +914,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI,
+ getTargetLowering()->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -960,7 +960,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI,
+ getTargetLowering()->getPointerTy());
} else if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
@@ -978,7 +979,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI,
+ getTargetLowering()->getPointerTy());
}
ARM_AM::AddrOpc AddSub = ARM_AM::add;
@@ -1202,7 +1204,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
SDValue &Base, SDValue &OffImm) {
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI,
+ getTargetLowering()->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -1219,7 +1222,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI,
+ getTargetLowering()->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -1267,7 +1271,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI,
+ getTargetLowering()->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -1297,7 +1302,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI,
+ getTargetLowering()->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -1326,7 +1332,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI,
+ getTargetLowering()->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -1403,6 +1410,34 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
return true;
}
+bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ // This *must* succeed since it's used for the irreplacable ldrex and strex
+ // instructions.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+
+ if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
+ return true;
+
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!RHS)
+ return true;
+
+ uint32_t RHSC = (int)RHS->getZExtValue();
+ if (RHSC > 1020 || RHSC % 4 != 0)
+ return true;
+
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy());
+ }
+
+ OffImm = CurDAG->getTargetConstant(RHSC / 4, MVT::i32);
+ return true;
+}
+
//===--------------------------------------------------------------------===//
/// getAL - Returns a ARMCC::AL immediate node.
@@ -1468,14 +1503,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
- return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
+ return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
MVT::i32, MVT::Other, Ops);
} else {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
- return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
+ return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
MVT::i32, MVT::Other, Ops);
}
}
@@ -1524,7 +1559,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, Offset, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
- return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
+ return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
MVT::Other, Ops);
}
@@ -1533,7 +1568,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
@@ -1544,7 +1579,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
/// \brief Form a D register from a pair of S registers.
SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
@@ -1555,7 +1590,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
/// \brief Form a quad register from a pair of D registers.
SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDLoc dl(V0.getNode());
SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
@@ -1565,7 +1600,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
/// \brief Form 4 consecutive D registers from a pair of Q registers.
SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDLoc dl(V0.getNode());
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
@@ -1576,7 +1611,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
/// \brief Form 4 consecutive S registers.
SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDLoc dl(V0.getNode());
SDValue RegClass =
CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
@@ -1591,7 +1626,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
/// \brief Form 4 consecutive D registers.
SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDLoc dl(V0.getNode());
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
@@ -1605,7 +1640,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
/// \brief Form 4 consecutive Q registers.
SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDLoc dl(V0.getNode());
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
@@ -1689,7 +1724,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *QOpcodes0,
const uint16_t *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue MemAddr, Align;
unsigned AddrOpIdx = isUpdating ? 1 : 2;
@@ -1821,7 +1856,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *QOpcodes0,
const uint16_t *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue MemAddr, Align;
unsigned AddrOpIdx = isUpdating ? 1 : 2;
@@ -1966,7 +2001,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue MemAddr, Align;
unsigned AddrOpIdx = isUpdating ? 1 : 2;
@@ -2084,7 +2119,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
unsigned NumVecs,
const uint16_t *Opcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue MemAddr, Align;
if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
@@ -2166,7 +2201,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
unsigned Opc) {
assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT VT = N->getValueType(0);
unsigned FirstTblReg = IsExt ? 2 : 1;
@@ -2278,204 +2313,6 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
return NULL;
}
-SDNode *ARMDAGToDAGISel::
-SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
- SDValue CPTmp0;
- SDValue CPTmp1;
- if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
- unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
- unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
- unsigned Opc = 0;
- switch (SOShOp) {
- case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
- case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
- case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
- case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
- default:
- llvm_unreachable("Unknown so_reg opcode!");
- }
- SDValue SOShImm =
- CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6);
- }
- return 0;
-}
-
-SDNode *ARMDAGToDAGISel::
-SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
- SDValue CPTmp0;
- SDValue CPTmp1;
- SDValue CPTmp2;
- if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6);
- }
-
- if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7);
- }
- return 0;
-}
-
-SDNode *ARMDAGToDAGISel::
-SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
- ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
- if (!T)
- return 0;
-
- unsigned Opc = 0;
- unsigned TrueImm = T->getZExtValue();
- if (is_t2_so_imm(TrueImm)) {
- Opc = ARM::t2MOVCCi;
- } else if (TrueImm <= 0xffff) {
- Opc = ARM::t2MOVCCi16;
- } else if (is_t2_so_imm_not(TrueImm)) {
- TrueImm = ~TrueImm;
- Opc = ARM::t2MVNCCi;
- } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) {
- // Large immediate.
- Opc = ARM::t2MOVCCi32imm;
- }
-
- if (Opc) {
- SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
- }
-
- return 0;
-}
-
-SDNode *ARMDAGToDAGISel::
-SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
- ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
- if (!T)
- return 0;
-
- unsigned Opc = 0;
- unsigned TrueImm = T->getZExtValue();
- bool isSoImm = is_so_imm(TrueImm);
- if (isSoImm) {
- Opc = ARM::MOVCCi;
- } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) {
- Opc = ARM::MOVCCi16;
- } else if (is_so_imm_not(TrueImm)) {
- TrueImm = ~TrueImm;
- Opc = ARM::MVNCCi;
- } else if (TrueVal.getNode()->hasOneUse() &&
- (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) {
- // Large immediate.
- Opc = ARM::MOVCCi32imm;
- }
-
- if (Opc) {
- SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
- }
-
- return 0;
-}
-
-SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
- EVT VT = N->getValueType(0);
- SDValue FalseVal = N->getOperand(0);
- SDValue TrueVal = N->getOperand(1);
- SDValue CC = N->getOperand(2);
- SDValue CCR = N->getOperand(3);
- SDValue InFlag = N->getOperand(4);
- assert(CC.getOpcode() == ISD::Constant);
- assert(CCR.getOpcode() == ISD::Register);
- ARMCC::CondCodes CCVal =
- (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue();
-
- if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
- // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
- // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
- // Pattern complexity = 18 cost = 1 size = 0
- if (Subtarget->isThumb()) {
- SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
- CCVal, CCR, InFlag);
- if (!Res)
- Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal,
- ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
- if (Res)
- return Res;
- } else {
- SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal,
- CCVal, CCR, InFlag);
- if (!Res)
- Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal,
- ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
- if (Res)
- return Res;
- }
-
- // Pattern: (ARMcmov:i32 GPR:i32:$false,
- // (imm:i32)<<P:Pred_so_imm>>:$true,
- // (imm:i32):$cc)
- // Emits: (MOVCCi:i32 GPR:i32:$false,
- // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
- // Pattern complexity = 10 cost = 1 size = 0
- if (Subtarget->isThumb()) {
- SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
- CCVal, CCR, InFlag);
- if (!Res)
- Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
- ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
- if (Res)
- return Res;
- } else {
- SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
- CCVal, CCR, InFlag);
- if (!Res)
- Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
- ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
- if (Res)
- return Res;
- }
- }
-
- // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Pattern complexity = 6 cost = 1 size = 0
- //
- // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Pattern complexity = 6 cost = 11 size = 0
- //
- // Also VMOVScc and VMOVDcc.
- SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
- unsigned Opc = 0;
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Illegal conditional move type!");
- case MVT::i32:
- Opc = Subtarget->isThumb()
- ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
- : ARM::MOVCCr;
- break;
- case MVT::f32:
- Opc = ARM::VMOVScc;
- break;
- case MVT::f64:
- Opc = ARM::VMOVDcc;
- break;
- }
- return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
-}
-
/// Target-specific DAG combining for ISD::XOR.
/// Target-independent combining lowers SELECT_CC nodes of the form
/// select_cc setg[ge] X, 0, X, -X
@@ -2524,27 +2361,40 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
}
-SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
+ unsigned Op16,unsigned Op32,
+ unsigned Op64) {
+ // Mostly direct translation to the given operations, except that we preserve
+ // the AtomicOrdering for use later on.
+ AtomicSDNode *AN = cast<AtomicSDNode>(Node);
+ EVT VT = AN->getMemoryVT();
+
+ unsigned Op;
+ SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other);
+ if (VT == MVT::i8)
+ Op = Op8;
+ else if (VT == MVT::i16)
+ Op = Op16;
+ else if (VT == MVT::i32)
+ Op = Op32;
+ else if (VT == MVT::i64) {
+ Op = Op64;
+ VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other);
+ } else
+ llvm_unreachable("Unexpected atomic operation");
+
SmallVector<SDValue, 6> Ops;
- Ops.push_back(Node->getOperand(1)); // Ptr
- Ops.push_back(Node->getOperand(2)); // Low part of Val1
- Ops.push_back(Node->getOperand(3)); // High part of Val1
- if (Opc == ARM::ATOMCMPXCHG6432) {
- Ops.push_back(Node->getOperand(4)); // Low part of Val2
- Ops.push_back(Node->getOperand(5)); // High part of Val2
- }
- Ops.push_back(Node->getOperand(0)); // Chain
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
- SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
- MVT::i32, MVT::i32, MVT::Other,
- Ops);
- cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
- return ResNode;
+ for (unsigned i = 1; i < AN->getNumOperands(); ++i)
+ Ops.push_back(AN->getOperand(i));
+
+ Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
+ Ops.push_back(AN->getOperand(0)); // Chain moves to the end
+
+ return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size());
}
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
if (N->isMachineOpcode()) {
N->setNodeId(-1);
@@ -2589,7 +2439,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue CPIdx =
CurDAG->getTargetConstantPool(ConstantInt::get(
Type::getInt32Ty(*CurDAG->getContext()), Val),
- TLI.getPointerTy());
+ getTargetLowering()->getPointerTy());
SDNode *ResNode;
if (Subtarget->isThumb1Only()) {
@@ -2619,7 +2469,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::FrameIndex: {
// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI,
+ getTargetLowering()->getPointerTy());
if (Subtarget->isThumb1Only()) {
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
@@ -2840,8 +2691,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue(Chain.getNode(), Chain.getResNo()));
return NULL;
}
- case ARMISD::CMOV:
- return SelectCMOVOp(N);
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
@@ -3123,7 +2972,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_ldrexd: {
SDValue MemAddr = N->getOperand(2);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue Chain = N->getOperand(0);
bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
@@ -3181,7 +3030,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case Intrinsic::arm_strexd: {
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue Chain = N->getOperand(0);
SDValue Val0 = N->getOperand(2);
SDValue Val1 = N->getOperand(3);
@@ -3385,7 +3234,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VTBL1: {
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT VT = N->getValueType(0);
SmallVector<SDValue, 6> Ops;
@@ -3396,7 +3245,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
}
case ARMISD::VTBL2: {
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT VT = N->getValueType(0);
// Form a REG_SEQUENCE to force register allocation.
@@ -3415,31 +3264,90 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
- case ARMISD::ATOMOR64_DAG:
- return SelectAtomic64(N, ARM::ATOMOR6432);
- case ARMISD::ATOMXOR64_DAG:
- return SelectAtomic64(N, ARM::ATOMXOR6432);
- case ARMISD::ATOMADD64_DAG:
- return SelectAtomic64(N, ARM::ATOMADD6432);
- case ARMISD::ATOMSUB64_DAG:
- return SelectAtomic64(N, ARM::ATOMSUB6432);
- case ARMISD::ATOMNAND64_DAG:
- return SelectAtomic64(N, ARM::ATOMNAND6432);
- case ARMISD::ATOMAND64_DAG:
- return SelectAtomic64(N, ARM::ATOMAND6432);
- case ARMISD::ATOMSWAP64_DAG:
- return SelectAtomic64(N, ARM::ATOMSWAP6432);
- case ARMISD::ATOMCMPXCHG64_DAG:
- return SelectAtomic64(N, ARM::ATOMCMPXCHG6432);
-
- case ARMISD::ATOMMIN64_DAG:
- return SelectAtomic64(N, ARM::ATOMMIN6432);
- case ARMISD::ATOMUMIN64_DAG:
- return SelectAtomic64(N, ARM::ATOMUMIN6432);
- case ARMISD::ATOMMAX64_DAG:
- return SelectAtomic64(N, ARM::ATOMMAX6432);
- case ARMISD::ATOMUMAX64_DAG:
- return SelectAtomic64(N, ARM::ATOMUMAX6432);
+ case ISD::ATOMIC_LOAD:
+ if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64)
+ return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64);
+ else
+ break;
+
+ case ISD::ATOMIC_STORE:
+ if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64)
+ return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_STORE_I64);
+ else
+ break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_ADD_I8,
+ ARM::ATOMIC_LOAD_ADD_I16,
+ ARM::ATOMIC_LOAD_ADD_I32,
+ ARM::ATOMIC_LOAD_ADD_I64);
+ case ISD::ATOMIC_LOAD_SUB:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_SUB_I8,
+ ARM::ATOMIC_LOAD_SUB_I16,
+ ARM::ATOMIC_LOAD_SUB_I32,
+ ARM::ATOMIC_LOAD_SUB_I64);
+ case ISD::ATOMIC_LOAD_AND:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_AND_I8,
+ ARM::ATOMIC_LOAD_AND_I16,
+ ARM::ATOMIC_LOAD_AND_I32,
+ ARM::ATOMIC_LOAD_AND_I64);
+ case ISD::ATOMIC_LOAD_OR:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_OR_I8,
+ ARM::ATOMIC_LOAD_OR_I16,
+ ARM::ATOMIC_LOAD_OR_I32,
+ ARM::ATOMIC_LOAD_OR_I64);
+ case ISD::ATOMIC_LOAD_XOR:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_XOR_I8,
+ ARM::ATOMIC_LOAD_XOR_I16,
+ ARM::ATOMIC_LOAD_XOR_I32,
+ ARM::ATOMIC_LOAD_XOR_I64);
+ case ISD::ATOMIC_LOAD_NAND:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_NAND_I8,
+ ARM::ATOMIC_LOAD_NAND_I16,
+ ARM::ATOMIC_LOAD_NAND_I32,
+ ARM::ATOMIC_LOAD_NAND_I64);
+ case ISD::ATOMIC_LOAD_MIN:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_MIN_I8,
+ ARM::ATOMIC_LOAD_MIN_I16,
+ ARM::ATOMIC_LOAD_MIN_I32,
+ ARM::ATOMIC_LOAD_MIN_I64);
+ case ISD::ATOMIC_LOAD_MAX:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_MAX_I8,
+ ARM::ATOMIC_LOAD_MAX_I16,
+ ARM::ATOMIC_LOAD_MAX_I32,
+ ARM::ATOMIC_LOAD_MAX_I64);
+ case ISD::ATOMIC_LOAD_UMIN:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_UMIN_I8,
+ ARM::ATOMIC_LOAD_UMIN_I16,
+ ARM::ATOMIC_LOAD_UMIN_I32,
+ ARM::ATOMIC_LOAD_UMIN_I64);
+ case ISD::ATOMIC_LOAD_UMAX:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_UMAX_I8,
+ ARM::ATOMIC_LOAD_UMAX_I16,
+ ARM::ATOMIC_LOAD_UMAX_I32,
+ ARM::ATOMIC_LOAD_UMAX_I64);
+ case ISD::ATOMIC_SWAP:
+ return SelectAtomic(N,
+ ARM::ATOMIC_SWAP_I8,
+ ARM::ATOMIC_SWAP_I16,
+ ARM::ATOMIC_SWAP_I32,
+ ARM::ATOMIC_SWAP_I64);
+ case ISD::ATOMIC_CMP_SWAP:
+ return SelectAtomic(N,
+ ARM::ATOMIC_CMP_SWAP_I8,
+ ARM::ATOMIC_CMP_SWAP_I16,
+ ARM::ATOMIC_CMP_SWAP_I32,
+ ARM::ATOMIC_CMP_SWAP_I64);
}
return SelectCode(N);
@@ -3451,24 +3359,20 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
bool Changed = false;
unsigned NumOps = N->getNumOperands();
- ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(
- N->getOperand(InlineAsm::Op_AsmString));
- StringRef AsmString = StringRef(S->getSymbol());
-
// Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
// However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
// (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
// respectively. Since there is no constraint to explicitly specify a
- // reg pair, we search %H operand inside the asm string. If it is found, the
- // transformation below enforces a GPRPair reg class for "%r" for 64-bit data.
- if (AsmString.find(":H}") == StringRef::npos)
- return NULL;
+ // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
+ // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
+ // them into a GPRPair.
- DebugLoc dl = N->getDebugLoc();
- SDValue Glue = N->getOperand(NumOps-1);
+ SDLoc dl(N);
+ SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) : SDValue(0,0);
+ SmallVector<bool, 8> OpChanged;
// Glue node will be appended late.
- for(unsigned i = 0; i < NumOps -1; ++i) {
+ for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
SDValue op = N->getOperand(i);
AsmNodeOperands.push_back(op);
@@ -3482,17 +3386,38 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
else
continue;
+ // Immediate operands to inline asm in the SelectionDAG are modeled with
+ // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
+ // the second is a constant with the value of the immediate. If we get here
+ // and we have a Kind_Imm, skip the next operand, and continue.
+ if (Kind == InlineAsm::Kind_Imm) {
+ SDValue op = N->getOperand(++i);
+ AsmNodeOperands.push_back(op);
+ continue;
+ }
+
+ unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
+ if (NumRegs)
+ OpChanged.push_back(false);
+
+ unsigned DefIdx = 0;
+ bool IsTiedToChangedOp = false;
+ // If it's a use that is tied with a previous def, it has no
+ // reg class constraint.
+ if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
+ IsTiedToChangedOp = OpChanged[DefIdx];
+
if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
&& Kind != InlineAsm::Kind_RegDefEarlyClobber)
continue;
- unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag);
unsigned RC;
bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
- if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2)
+ if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
+ || NumRegs != 2)
continue;
- assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm");
+ assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
SDValue V0 = N->getOperand(i+1);
SDValue V1 = N->getOperand(i+2);
unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
@@ -3553,8 +3478,12 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
Changed = true;
if(PairedReg.getNode()) {
+ OpChanged[OpChanged.size() -1 ] = true;
Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
- Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
+ if (IsTiedToChangedOp)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
+ else
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
// Replace the current flag.
AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
Flag, MVT::i32);
@@ -3565,11 +3494,12 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
}
}
- AsmNodeOperands.push_back(Glue);
+ if (Glue.getNode())
+ AsmNodeOperands.push_back(Glue);
if (!Changed)
return NULL;
- SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0],
AsmNodeOperands.size());
New->setNodeId(-1);
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index e49cfc49854a..76a0a831f695 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -48,6 +48,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
+#include <utility>
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
@@ -74,7 +75,7 @@ namespace {
class ARMCCState : public CCState {
public:
ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
- const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs,
+ const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
LLVMContext &C, ParmContext PC)
: CCState(CC, isVarArg, MF, TM, locs, C) {
assert(((PC == Call) || (PC == Prologue)) &&
@@ -174,9 +175,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- if (Subtarget->isTargetDarwin()) {
+ if (Subtarget->isTargetIOS()) {
// Uses VFP for Thumb libfuncs if available.
- if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+ if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
+ Subtarget->hasARMOps()) {
// Single-precision floating-point arithmetic.
setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
@@ -421,7 +423,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
- if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
+ if (Subtarget->getTargetTriple().isiOS() &&
!Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
@@ -452,6 +454,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
@@ -564,16 +567,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
- // Custom expand long extensions to vectors.
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
-
// NEON does not have single instruction CTPOP for vectors with element
// types wider than 8-bits. However, custom lowering can leverage the
// v8i8/v16i8 vcnt instruction.
@@ -681,6 +674,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+
// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -691,10 +686,36 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
}
+
+ // FIXME: Also set divmod for SREM on EABI
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ // Register based DivRem for AEABI (RTABI 4.2)
+ if (Subtarget->isTargetAEABI()) {
+ setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod");
+ setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
+ setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
+ setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
+ setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod");
+ setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
+ setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
+ setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
+
+ setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
+
+ setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ } else {
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ }
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
@@ -715,8 +736,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!Subtarget->isTargetDarwin()) {
// Non-Darwin platforms may return values in these registers via the
// personality function.
- setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
- setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setExceptionPointerRegister(ARM::R0);
setExceptionSelectorRegister(ARM::R1);
}
@@ -724,12 +743,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
- // FIXME: This should be checking for v6k, not just v6.
- if (Subtarget->hasDataBarrier() ||
- (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
- // membarrier needs custom lowering; the rest are legal and handled
- // normally.
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
+ // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and
+ // handled normally.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom lowering for 64-bit ops
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
@@ -742,11 +759,20 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
- // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
- setInsertFencesForAtomic(true);
+ // On v8, we have particularly efficient implementations of atomic fences
+ // if they can be combined with nearby atomic loads and stores.
+ if (!Subtarget->hasV8Ops()) {
+ // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
+ setInsertFencesForAtomic(true);
+ }
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
} else {
+ // If there's anything we can use as a barrier, go through custom lowering
+ // for ATOMIC_FENCE.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
+ Subtarget->hasAnyDataBarrier() ? Custom : Expand);
+
// Set them all for expansion, which will force libcalls.
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
@@ -843,6 +869,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
}
}
+
+ // Combine sin / cos into one node or libcall if possible.
+ if (Subtarget->hasSinCos()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
+ // For iOS, we don't want to the normal expansion of a libcall to
+ // sincos. We want to issue a libcall to __sincos_stret.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ }
+ }
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
@@ -882,6 +920,44 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
+static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
+ bool isThumb2, unsigned &LdrOpc,
+ unsigned &StrOpc) {
+ static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB},
+ {ARM::LDREXH, ARM::t2LDREXH},
+ {ARM::LDREX, ARM::t2LDREX},
+ {ARM::LDREXD, ARM::t2LDREXD}};
+ static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB},
+ {ARM::LDAEXH, ARM::t2LDAEXH},
+ {ARM::LDAEX, ARM::t2LDAEX},
+ {ARM::LDAEXD, ARM::t2LDAEXD}};
+ static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB},
+ {ARM::STREXH, ARM::t2STREXH},
+ {ARM::STREX, ARM::t2STREX},
+ {ARM::STREXD, ARM::t2STREXD}};
+ static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB},
+ {ARM::STLEXH, ARM::t2STLEXH},
+ {ARM::STLEX, ARM::t2STLEX},
+ {ARM::STLEXD, ARM::t2STLEXD}};
+
+ const unsigned (*LoadOps)[2], (*StoreOps)[2];
+ if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ LoadOps = LoadAcqs;
+ else
+ LoadOps = LoadBares;
+
+ if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ StoreOps = StoreRels;
+ else
+ StoreOps = StoreBares;
+
+ assert(isPowerOf2_32(Size) && Size <= 8 &&
+ "unsupported size for atomic binary op!");
+
+ LdrOpc = LoadOps[Log2_32(Size)][isThumb2];
+ StrOpc = StoreOps[Log2_32(Size)][isThumb2];
+}
+
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
@@ -944,6 +1020,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BR_JT: return "ARMISD::BR_JT";
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
case ARMISD::CMP: return "ARMISD::CMP";
case ARMISD::CMN: return "ARMISD::CMN";
@@ -983,7 +1060,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
- case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
@@ -1042,6 +1118,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
+ case ARMISD::VMAXNM: return "ARMISD::VMAX";
+ case ARMISD::VMINNM: return "ARMISD::VMIN";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
@@ -1069,7 +1147,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
+EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector()) return getPointerTy();
return VT.changeVectorElementTypeToInteger();
}
@@ -1233,7 +1311,7 @@ SDValue
ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
bool isThisReturn, SDValue ThisVal) const {
@@ -1314,7 +1392,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SDValue
ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
SDValue StackPtr, SDValue Arg,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
@@ -1325,12 +1403,12 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
false, false, 0);
}
-void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
SDValue Chain, SDValue &Arg,
RegsToPassVector &RegsToPass,
CCValAssign &VA, CCValAssign &NextVA,
SDValue &StackPtr,
- SmallVector<SDValue, 8> &MemOpChains,
+ SmallVectorImpl<SDValue> &MemOpChains,
ISD::ArgFlagsTy Flags) const {
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
@@ -1357,10 +1435,10 @@ SDValue
ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
- DebugLoc &dl = CLI.DL;
- SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
- SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
- SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDLoc &dl = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
@@ -1406,7 +1484,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!isSibCall)
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
@@ -1496,7 +1575,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
- false, false, false, 0);
+ false, false, false,
+ DAG.InferPtrAlignment(AddArg));
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
@@ -1705,17 +1785,26 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const uint32_t *Mask;
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
- if (isThisReturn)
- // For 'this' returns, use the R0-preserving mask
- Mask = ARI->getThisReturnPreservedMask(CallConv);
- else
- Mask = ARI->getCallPreservedMask(CallConv);
+ if (!isTailCall) {
+ const uint32_t *Mask;
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
+ if (isThisReturn) {
+ // For 'this' returns, use the R0-preserving mask if applicable
+ Mask = ARI->getThisReturnPreservedMask(CallConv);
+ if (!Mask) {
+ // Set isThisReturn to false if the calling convention is not one that
+ // allows 'returned' to be modeled in this way, so LowerCallResult does
+ // not try to pass 'this' straight through
+ isThisReturn = false;
+ Mask = ARI->getCallPreservedMask(CallConv);
+ }
+ } else
+ Mask = ARI->getCallPreservedMask(CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ }
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -1729,7 +1818,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag);
+ DAG.getIntPtrConstant(0, true), InFlag, dl);
if (!Ins.empty())
InFlag = Chain.getValue(1);
@@ -1795,7 +1884,7 @@ ARMTargetLowering::HandleByVal(
// else parameter would be splitted between registers and stack,
// end register would be r4 in this case.
unsigned ByValRegBegin = reg;
- unsigned ByValRegEnd = (size < excess) ? reg + size/4 : ARM::R4;
+ unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
// Note, first register is allocated in the beginning of function already,
// allocate remained amount of registers we need.
@@ -1886,6 +1975,12 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (isVarArg && !Outs.empty())
return false;
+ // Exception-handling functions need a special set of instructions to indicate
+ // a return to the hardware. Tail-calling another function would probably
+ // break this.
+ if (CallerF->hasFnAttribute("interrupt"))
+ return false;
+
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
if (isCalleeStructRet || isCallerStructRet)
@@ -2014,12 +2109,45 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
isVarArg));
}
+static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
+ SDLoc DL, SelectionDAG &DAG) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const Function *F = MF.getFunction();
+
+ StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
+
+ // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
+ // version of the "preferred return address". These offsets affect the return
+ // instruction if this is a return from PL1 without hypervisor extensions.
+ // IRQ/FIQ: +4 "subs pc, lr, #4"
+ // SWI: 0 "subs pc, lr, #0"
+ // ABORT: +4 "subs pc, lr, #4"
+ // UNDEF: +4/+2 "subs pc, lr, #0"
+ // UNDEF varies depending on where the exception came from ARM or Thumb
+ // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
+
+ int64_t LROffset;
+ if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
+ IntKind == "ABORT")
+ LROffset = 4;
+ else if (IntKind == "SWI" || IntKind == "UNDEF")
+ LROffset = 0;
+ else
+ report_fatal_error("Unsupported interrupt attribute. If present, value "
+ "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
+
+ RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
+
+ return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other,
+ RetOps.data(), RetOps.size());
+}
+
SDValue
ARMTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
+ SDLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location.
SmallVector<CCValAssign, 16> RVLocs;
@@ -2099,6 +2227,19 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
+ // CPUs which aren't M-class use a special sequence to return from
+ // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
+ // though we use "subs pc, lr, #N").
+ //
+ // M-class CPUs actually use a normal return sequence with a special
+ // (hardware-provided) value in LR, so the normal code path works.
+ if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
+ !Subtarget->isMClass()) {
+ if (Subtarget->isThumb1Only())
+ report_fatal_error("interrupt attribute is not supported in Thumb1");
+ return LowerInterruptReturn(RetOps, dl, DAG);
+ }
+
return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
RetOps.data(), RetOps.size());
}
@@ -2147,7 +2288,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
Copy = *Copy->use_begin();
if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
return false;
- Chain = Copy->getOperand(0);
+ TCChain = Copy->getOperand(0);
} else {
return false;
}
@@ -2155,7 +2296,8 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
bool HasRet = false;
for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
UI != UE; ++UI) {
- if (UI->getOpcode() != ARMISD::RET_FLAG)
+ if (UI->getOpcode() != ARMISD::RET_FLAG &&
+ UI->getOpcode() != ARMISD::INTRET_FLAG)
return false;
HasRet = true;
}
@@ -2186,7 +2328,7 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
EVT PtrVT = Op.getValueType();
// FIXME there is no actual debug info here
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDValue Res;
if (CP->isMachineConstantPoolEntry())
@@ -2207,7 +2349,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = 0;
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
EVT PtrVT = getPointerTy();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
@@ -2236,7 +2378,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const {
- DebugLoc dl = GA->getDebugLoc();
+ SDLoc dl(GA);
EVT PtrVT = getPointerTy();
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
MachineFunction &MF = DAG.getMachineFunction();
@@ -2279,7 +2421,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
TLSModel::Model model) const {
const GlobalValue *GV = GA->getGlobal();
- DebugLoc dl = GA->getDebugLoc();
+ SDLoc dl(GA);
SDValue Offset;
SDValue Chain = DAG.getEntryNode();
EVT PtrVT = getPointerTy();
@@ -2349,7 +2491,7 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
@@ -2392,7 +2534,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
@@ -2457,7 +2599,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV =
ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
@@ -2473,7 +2615,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue Val = DAG.getConstant(0, MVT::i32);
return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
@@ -2482,7 +2624,7 @@ ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
SDValue
ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
Op.getOperand(1), DAG.getConstant(0, MVT::i32));
}
@@ -2491,7 +2633,7 @@ SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::arm_thread_pointer: {
@@ -2527,7 +2669,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
case Intrinsic::arm_neon_vmullu: {
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
? ARMISD::VMULLs : ARMISD::VMULLu;
- return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
}
@@ -2536,19 +2678,33 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
// FIXME: handle "fence singlethread" more efficiently.
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (!Subtarget->hasDataBarrier()) {
// Some ARMv6 cpus can support data barriers with an mcr instruction.
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
- "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, MVT::i32));
}
- return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(ARM_MB::ISH, MVT::i32));
+ ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
+ unsigned Domain = ARM_MB::ISH;
+ if (Subtarget->isMClass()) {
+ // Only a full system barrier exists in the M-class architectures.
+ Domain = ARM_MB::SY;
+ } else if (Subtarget->isSwift() && Ord == Release) {
+ // Swift happens to implement ISHST barriers in a way that's compatible with
+ // Release semantics but weaker than ISH so we'd be fools not to use
+ // it. Beware: other processors probably don't!
+ Domain = ARM_MB::ISHST;
+ }
+
+ return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(Intrinsic::arm_dmb, MVT::i32),
+ DAG.getConstant(Domain, MVT::i32));
}
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
@@ -2559,7 +2715,7 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
// Just preserve the chain.
return Op.getOperand(0);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
if (!isRead &&
(!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
@@ -2584,7 +2740,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
@@ -2595,7 +2751,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
SDValue
ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
- DebugLoc dl) const {
+ SDLoc dl) const {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -2630,6 +2786,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
void
ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
unsigned InRegsParamRecordIdx,
+ unsigned ArgSize,
unsigned &ArgRegsSize,
unsigned &ArgRegsSaveSize)
const {
@@ -2648,7 +2805,29 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
ArgRegsSize = NumGPRs * 4;
- ArgRegsSaveSize = (ArgRegsSize + Align - 1) & ~(Align - 1);
+
+ // If parameter is split between stack and GPRs...
+ if (NumGPRs && Align == 8 &&
+ (ArgRegsSize < ArgSize ||
+ InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
+ // Add padding for part of param recovered from GPRs, so
+ // its last byte must be at address K*8 - 1.
+ // We need to do it, since remained (stack) part of parameter has
+ // stack alignment, and we need to "attach" "GPRs head" without gaps
+ // to it:
+ // Stack:
+ // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
+ // [ [padding] [GPRs head] ] [ Tail passed via stack ....
+ //
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned Padding =
+ ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) -
+ (ArgRegsSize + AFI->getArgRegsSaveSize());
+ ArgRegsSaveSize = ArgRegsSize + Padding;
+ } else
+ // We don't need to extend regs save size for byval parameters if they
+ // are passed via GPRs only.
+ ArgRegsSaveSize = ArgRegsSize;
}
// The remaining GPRs hold either the beginning of variable-argument
@@ -2661,11 +2840,12 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
// Return: The frame index registers were stored into.
int
ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
- DebugLoc dl, SDValue &Chain,
+ SDLoc dl, SDValue &Chain,
const Value *OrigArg,
unsigned InRegsParamRecordIdx,
unsigned OffsetFromOrigArg,
unsigned ArgOffset,
+ unsigned ArgSize,
bool ForceMutable) const {
// Currently, two use-cases possible:
@@ -2690,12 +2870,13 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
lastRegToSaveIndex = REnd - ARM::R0;
} else {
firstRegToSaveIndex = CCInfo.getFirstUnallocated
- (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+ (GPRArgRegs, array_lengthof(GPRArgRegs));
lastRegToSaveIndex = 4;
}
unsigned ArgRegsSize, ArgRegsSaveSize;
- computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgRegsSize, ArgRegsSaveSize);
+ computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
+ ArgRegsSize, ArgRegsSaveSize);
// Store any by-val regs to their spots on the stack so that they may be
// loaded by deferencing the result of formal parameter pointer or va_next.
@@ -2703,9 +2884,17 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
// was initialized, it can't be initialized again.
if (ArgRegsSaveSize) {
+ unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
+
+ if (Padding) {
+ assert(AFI->getStoredByValParamsPadding() == 0 &&
+ "The only parameter may be padded.");
+ AFI->setStoredByValParamsPadding(Padding);
+ }
+
int FrameIndex = MFI->CreateFixedObject(
ArgRegsSaveSize,
- ArgOffset + ArgRegsSaveSize - ArgRegsSize,
+ Padding + ArgOffset,
false);
SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
@@ -2737,13 +2926,14 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
return FrameIndex;
} else
// This will point to the next argument passed via stack.
- return MFI->CreateFixedObject(4, ArgOffset, !ForceMutable);
+ return MFI->CreateFixedObject(
+ 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable);
}
// Setup stack frame, the va_list pointer will start from.
void
ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
- DebugLoc dl, SDValue &Chain,
+ SDLoc dl, SDValue &Chain,
unsigned ArgOffset,
bool ForceMutable) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -2756,7 +2946,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
// argument passed via stack.
int FrameIndex =
StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
- 0, ArgOffset, ForceMutable);
+ 0, ArgOffset, 0, ForceMutable);
AFI->setVarArgsFrameIndex(FrameIndex);
}
@@ -2766,7 +2956,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -2896,12 +3086,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
CurByValIndex,
Ins[VA.getValNo()].PartOffset,
VA.getLocMemOffset(),
+ Flags.getByValSize(),
true /*force mutable frames*/);
InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
CCInfo.nextInRegsParam();
} else {
+ unsigned FIOffset = VA.getLocMemOffset() +
+ AFI->getStoredByValParamsPadding();
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
- VA.getLocMemOffset(), true);
+ FIOffset, true);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -2943,7 +3136,7 @@ static bool isFloatingPointZero(SDValue Op) {
SDValue
ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG,
- DebugLoc dl) const {
+ SDLoc dl) const {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
unsigned C = RHSC->getZExtValue();
if (!isLegalICmpImmediate(C)) {
@@ -3001,7 +3194,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
SDValue
ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- DebugLoc dl) const {
+ SDLoc dl) const {
SDValue Cmp;
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
@@ -3015,7 +3208,7 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
SDValue
ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
unsigned Opc = Cmp.getOpcode();
- DebugLoc DL = Cmp.getDebugLoc();
+ SDLoc DL(Cmp);
if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
@@ -3035,7 +3228,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond = Op.getOperand(0);
SDValue SelectTrue = Op.getOperand(1);
SDValue SelectFalse = Op.getOperand(2);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// Convert:
//
@@ -3083,6 +3276,61 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SelectTrue, SelectFalse, ISD::SETNE);
}
+static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
+ if (CC == ISD::SETNE)
+ return ISD::SETEQ;
+ return ISD::getSetCCSwappedOperands(CC);
+}
+
+static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ bool &swpCmpOps, bool &swpVselOps) {
+ // Start by selecting the GE condition code for opcodes that return true for
+ // 'equality'
+ if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
+ CC == ISD::SETULE)
+ CondCode = ARMCC::GE;
+
+ // and GT for opcodes that return false for 'equality'.
+ else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
+ CC == ISD::SETULT)
+ CondCode = ARMCC::GT;
+
+ // Since we are constrained to GE/GT, if the opcode contains 'less', we need
+ // to swap the compare operands.
+ if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
+ CC == ISD::SETULT)
+ swpCmpOps = true;
+
+ // Both GT and GE are ordered comparisons, and return false for 'unordered'.
+ // If we have an unordered opcode, we need to swap the operands to the VSEL
+ // instruction (effectively negating the condition).
+ //
+ // This also has the effect of swapping which one of 'less' or 'greater'
+ // returns true, so we also swap the compare operands. It also switches
+ // whether we return true for 'equality', so we compensate by picking the
+ // opposite condition code to our original choice.
+ if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
+ CC == ISD::SETUGT) {
+ swpCmpOps = !swpCmpOps;
+ swpVselOps = !swpVselOps;
+ CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
+ }
+
+ // 'ordered' is 'anything but unordered', so use the VS condition code and
+ // swap the VSEL operands.
+ if (CC == ISD::SETO) {
+ CondCode = ARMCC::VS;
+ swpVselOps = true;
+ }
+
+ // 'unordered or not equal' is 'anything but equal', so use the EQ condition
+ // code and swap the VSEL operands.
+ if (CC == ISD::SETUNE) {
+ CondCode = ARMCC::EQ;
+ swpVselOps = true;
+ }
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@@ -3090,18 +3338,69 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue TrueVal = Op.getOperand(2);
SDValue FalseVal = Op.getOperand(3);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (LHS.getValueType() == MVT::i32) {
+ // Try to generate VSEL on ARMv8.
+ // The VSEL instruction can't use all the usual ARM condition
+ // codes: it only has two bits to select the condition code, so it's
+ // constrained to use only GE, GT, VS and EQ.
+ //
+ // To implement all the various ISD::SETXXX opcodes, we sometimes need to
+ // swap the operands of the previous compare instruction (effectively
+ // inverting the compare condition, swapping 'less' and 'greater') and
+ // sometimes need to swap the operands to the VSEL (which inverts the
+ // condition in the sense of firing whenever the previous condition didn't)
+ if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
+ CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
+ CC = getInverseCCForVSEL(CC);
+ std::swap(TrueVal, FalseVal);
+ }
+ }
+
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
+ Cmp);
}
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
+ // Try to generate VSEL on ARMv8.
+ if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ // We can select VMAXNM/VMINNM from a compare followed by a select with the
+ // same operands, as follows:
+ // c = fcmp [ogt, olt, ugt, ult] a, b
+ // select c, a, b
+ // We only do this in unsafe-fp-math, because signed zeros and NaNs are
+ // handled differently than the original code sequence.
+ if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
+ RHS == FalseVal) {
+ if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+ if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ }
+
+ bool swpCmpOps = false;
+ bool swpVselOps = false;
+ checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
+
+ if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
+ CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
+ if (swpCmpOps)
+ std::swap(LHS, RHS);
+ if (swpVselOps)
+ std::swap(TrueVal, FalseVal);
+ }
+ }
+
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
@@ -3145,7 +3444,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
return DAG.getConstant(0, MVT::i32);
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
- return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ return DAG.getLoad(MVT::i32, SDLoc(Op),
Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->isInvariant(), Ld->getAlignment());
@@ -3163,7 +3462,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
SDValue Ptr = Ld->getBasePtr();
- RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op),
Ld->getChain(), Ptr,
Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
@@ -3171,9 +3470,9 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
EVT PtrType = Ptr.getValueType();
unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
- SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
+ SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op),
PtrType, Ptr, DAG.getConstant(4, PtrType));
- RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op),
Ld->getChain(), NewPtr,
Ld->getPointerInfo().getWithOffset(4),
Ld->isVolatile(), Ld->isNonTemporal(),
@@ -3193,7 +3492,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue Dest = Op.getOperand(4);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
bool LHSSeenZero = false;
bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
@@ -3243,7 +3542,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue Dest = Op.getOperand(4);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (LHS.getValueType() == MVT::i32) {
SDValue ARMcc;
@@ -3284,7 +3583,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Table = Op.getOperand(1);
SDValue Index = Op.getOperand(2);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT PTy = getPointerTy();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
@@ -3320,7 +3619,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (Op.getValueType().getVectorElementType() == MVT::i32) {
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
@@ -3342,7 +3641,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned Opc;
switch (Op.getOpcode()) {
@@ -3360,7 +3659,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
if (VT.getVectorElementType() == MVT::f32)
@@ -3396,7 +3695,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
if (VT.isVector())
return LowerVectorINT_TO_FP(Op, DAG);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned Opc;
switch (Op.getOpcode()) {
@@ -3417,7 +3716,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// Implement fcopysign with a fabs and a conditional fneg.
SDValue Tmp0 = Op.getOperand(0);
SDValue Tmp1 = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
@@ -3501,7 +3800,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
MFI->setReturnAddressIsTaken(true);
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
@@ -3521,7 +3820,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MFI->setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
? ARM::R7 : ARM::R11;
@@ -3533,47 +3832,6 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
-/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
-/// and size(DestVec) > 128-bits.
-/// This is achieved by doing the one extension from the SrcVec, splitting the
-/// result, extending these parts, and then concatenating these into the
-/// destination.
-static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
- SDValue Op = N->getOperand(0);
- EVT SrcVT = Op.getValueType();
- EVT DestVT = N->getValueType(0);
-
- assert(DestVT.getSizeInBits() > 128 &&
- "Custom sext/zext expansion needs >128-bit vector.");
- // If this is a normal length extension, use the default expansion.
- if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
- SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
- return SDValue();
-
- DebugLoc dl = N->getDebugLoc();
- unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
- unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
- unsigned NumElts = SrcVT.getVectorNumElements();
- LLVMContext &Ctx = *DAG.getContext();
- SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
-
- EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
- NumElts);
- EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
- NumElts/2);
- EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
- NumElts/2);
-
- Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
- SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
- DAG.getIntPtrConstant(0));
- SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
- DAG.getIntPtrConstant(NumElts/2));
- ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
- ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
-}
-
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -3581,7 +3839,7 @@ static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
/// vectors), since the legalizer won't know what to do with that.
static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue Op = N->getOperand(0);
// This function is only supposed to be called for i64 types, either as the
@@ -3618,7 +3876,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
/// not support i64 elements, so sometimes the zero vectors will need to be
/// explicitly constructed. Regardless, use a canonical VMOV to create the
/// zero vector.
-static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) {
assert(VT.isVector() && "Expected a vector type");
// The canonical modified immediate encoding of a zero vector is....0!
SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
@@ -3634,7 +3892,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
@@ -3670,7 +3928,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
@@ -3703,7 +3961,7 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
DAG.getConstant(Intrinsic::arm_get_fpscr,
MVT::i32));
@@ -3718,7 +3976,7 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
if (!ST->hasV6T2Ops())
return SDValue();
@@ -3742,7 +4000,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
@@ -3764,7 +4022,7 @@ static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
/// v4i16:Extracted = [k0 k1 k2 k3 ]
static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
if (VT.is64BitVector()) {
@@ -3799,7 +4057,7 @@ static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
///
static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
@@ -3838,7 +4096,7 @@ static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
if (!VT.isVector())
return SDValue();
@@ -3873,7 +4131,7 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
// We can get here for a node like i32 = ISD::SHL i32, i64
if (VT != MVT::i64)
@@ -3919,7 +4177,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
SDValue CC = Op.getOperand(2);
EVT VT = Op.getValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (Op.getOperand(1).getValueType().isFloatingPoint()) {
switch (SetCCOpcode) {
@@ -4177,18 +4435,26 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const {
- if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+ if (!ST->hasVFP3())
return SDValue();
+ bool IsDouble = Op.getValueType() == MVT::f64;
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
- assert(Op.getValueType() == MVT::f32 &&
- "ConstantFP custom lowering should only occur for f32.");
// Try splatting with a VMOV.f32...
APFloat FPVal = CFP->getValueAPF();
- int ImmVal = ARM_AM::getFP32Imm(FPVal);
+ int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
+
if (ImmVal != -1) {
- DebugLoc DL = Op.getDebugLoc();
+ if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
+ // We have code in place to select a valid ConstantFP already, no need to
+ // do any mangling.
+ return Op;
+ }
+
+ // It's a float and we are trying to use NEON operations where
+ // possible. Lower it to a splat followed by an extract.
+ SDLoc DL(Op);
SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
NewVal);
@@ -4196,15 +4462,31 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(0, MVT::i32));
}
- // If that fails, try a VMOV.i32
+ // The rest of our options are NEON only, make sure that's allowed before
+ // proceeding..
+ if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
+ return SDValue();
+
EVT VMovVT;
- unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
- SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
- VMOVModImm);
+ uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
+
+ // It wouldn't really be worth bothering for doubles except for one very
+ // important value, which does happen to match: 0.0. So make sure we don't do
+ // anything stupid.
+ if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
+ return SDValue();
+
+ // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
+ SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+ false, VMOVModImm);
if (NewVal != SDValue()) {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
NewVal);
+ if (IsDouble)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+ // It's a float: cast and extract a vector element.
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
@@ -4212,11 +4494,16 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
}
// Finally, try a VMVN.i32
- NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
- VMVNModImm);
+ NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+ false, VMVNModImm);
if (NewVal != SDValue()) {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
+
+ if (IsDouble)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+ // It's a float: cast and extract a vector element.
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
@@ -4475,7 +4762,7 @@ static bool isReverseMask(ArrayRef<int> M, EVT VT) {
// instruction, return an SDValue of such a constant (will become a MOV
// instruction). Otherwise return null.
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
- const ARMSubtarget *ST, DebugLoc dl) {
+ const ARMSubtarget *ST, SDLoc dl) {
uint64_t Val;
if (!isa<ConstantSDNode>(N))
return SDValue();
@@ -4496,7 +4783,7 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const {
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT VT = Op.getValueType();
APInt SplatBits, SplatUndef;
@@ -4580,7 +4867,9 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (ValueCounts.size() == 0)
return DAG.getUNDEF(VT);
- if (isOnlyLowElement)
+ // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
+ // Keep going if we are hitting this case.
+ if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
@@ -4679,6 +4968,24 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
+ // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
+ // know the default expansion would otherwise fall back on something even
+ // worse. For a vector with one or two non-undef values, that's
+ // scalar_to_vector for the elements followed by a shuffle (provided the
+ // shuffle is valid for the target) and materialization element by element
+ // on the stack followed by a load for everything else.
+ if (!isConstant && !usesOnlyOneValue) {
+ SDValue Vec = DAG.getUNDEF(VT);
+ for (unsigned i = 0 ; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ SDValue LaneIdx = DAG.getConstant(i, MVT::i32);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
+ }
+ return Vec;
+ }
+
return SDValue();
}
@@ -4686,7 +4993,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// shuffle in combination with VEXTs.
SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
@@ -4875,7 +5182,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
/// the specified operations to build the shuffle.
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
- DebugLoc dl) {
+ SDLoc dl) {
unsigned OpNum = (PFEntry >> 26) & 0x0F;
unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
@@ -4955,7 +5262,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
// Check to see if we can use the VTBL instruction.
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SmallVector<SDValue, 8> VTBLMask;
for (ArrayRef<int>::iterator
@@ -4974,7 +5281,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
SelectionDAG &DAG) {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue OpLHS = Op.getOperand(0);
EVT VT = OpLHS.getValueType();
@@ -4992,7 +5299,7 @@ static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT VT = Op.getValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
@@ -5156,7 +5463,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
SDValue Vec = Op.getOperand(0);
if (Op.getValueType() == MVT::i32 &&
Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
}
@@ -5168,7 +5475,7 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
// two 64-bit vectors are concatenated to a 128-bit vector.
assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
"unexpected CONCAT_VECTORS");
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue Val = DAG.getUNDEF(MVT::v2f64);
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@@ -5291,7 +5598,7 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
// Must extend size to at least 64 bits to be used as an operand for VMULL.
EVT NewVT = getExtensionTo64Bits(OrigTy);
- return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
+ return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
}
/// SkipLoadExtensionForVMULL - return a load of the original vector size that
@@ -5304,7 +5611,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
// The load already has the right type.
if (ExtendedTy == LD->getMemoryVT())
- return DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
+ return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
LD->isNonTemporal(), LD->isInvariant(),
LD->getAlignment());
@@ -5312,7 +5619,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
// We need to create a zextload/sextload. We cannot just create a load
// followed by a zext/zext node because LowerMUL is also run during normal
// operation legalization where we can't create illegal types.
- return DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), ExtendedTy,
+ return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
LD->getMemoryVT(), LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
@@ -5341,7 +5648,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32,
BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
}
// Construct a new BUILD_VECTOR with elements truncated to half the size.
@@ -5358,7 +5665,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
}
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
}
@@ -5430,7 +5737,7 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
}
// Legalize to a VMULL instruction.
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue Op0;
SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
if (!isMLA) {
@@ -5460,7 +5767,7 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
}
static SDValue
-LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
+LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
// Convert to float
// float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
// float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
@@ -5489,7 +5796,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
}
static SDValue
-LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
+LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
SDValue N2;
// Convert to float.
// float4 yf = vcvt_f32_s32(vmovl_s16(y));
@@ -5530,7 +5837,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
"unexpected type for custom-lowering ISD::SDIV");
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2, N3;
@@ -5565,7 +5872,7 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
"unexpected type for custom-lowering ISD::UDIV");
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2, N3;
@@ -5649,12 +5956,76 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
}
if (!ExtraOp)
- return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
Op.getOperand(1));
- return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
Op.getOperand(1), Op.getOperand(2));
}
+SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin());
+
+ // For iOS, we want to call an alternative entry point: __sincos_stret,
+ // return values are passed via sret.
+ SDLoc dl(Op);
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Pair of floats / doubles used to pass the result.
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+
+ // Create stack object for sret.
+ const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
+ const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy);
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+
+ Entry.Node = SRet;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isSRet = true;
+ Args.push_back(Entry);
+
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ const char *LibcallName = (ArgVT == MVT::f64)
+ ? "__sincos_stret" : "__sincosf_stret";
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0,
+ CallingConv::C, /*isTaillCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed*/false,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
+ MachinePointerInfo(), false, false, false, 0);
+
+ // Address of cos field.
+ SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,
+ DAG.getIntPtrConstant(ArgVT.getStoreSize()));
+ SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
+ MachinePointerInfo(), false, false, false, 0);
+
+ SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
+ LoadSin.getValue(0), LoadCos.getValue(0));
+}
+
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
// Monotonic load/store is legal for all targets
if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
@@ -5665,40 +6036,73 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
-
static void
ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
- SelectionDAG &DAG, unsigned NewOp) {
- DebugLoc dl = Node->getDebugLoc();
+ SelectionDAG &DAG) {
+ SDLoc dl(Node);
assert (Node->getValueType(0) == MVT::i64 &&
"Only know how to expand i64 atomics");
+ AtomicSDNode *AN = cast<AtomicSDNode>(Node);
SmallVector<SDValue, 6> Ops;
Ops.push_back(Node->getOperand(0)); // Chain
Ops.push_back(Node->getOperand(1)); // Ptr
- // Low part of Val1
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(0)));
- // High part of Val1
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(1)));
- if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
- // High part of Val1
+ for(unsigned i=2; i<Node->getNumOperands(); i++) {
+ // Low part
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(3), DAG.getIntPtrConstant(0)));
- // High part of Val2
+ Node->getOperand(i), DAG.getIntPtrConstant(0)));
+ // High part
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(3), DAG.getIntPtrConstant(1)));
+ Node->getOperand(i), DAG.getIntPtrConstant(1)));
}
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue Result =
- DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
- cast<MemSDNode>(Node)->getMemOperand());
+ DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(),
+ cast<MemSDNode>(Node)->getMemOperand(), AN->getOrdering(),
+ AN->getSynchScope());
SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
Results.push_back(Result.getValue(2));
}
+static void ReplaceREADCYCLECOUNTER(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ SDLoc DL(N);
+ SDValue Cycles32, OutChain;
+
+ if (Subtarget->hasPerfMon()) {
+ // Under Power Management extensions, the cycle-count is:
+ // mrc p15, #0, <Rt>, c9, c13, #0
+ SDValue Ops[] = { N->getOperand(0), // Chain
+ DAG.getConstant(Intrinsic::arm_mrc, MVT::i32),
+ DAG.getConstant(15, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(9, MVT::i32),
+ DAG.getConstant(13, MVT::i32),
+ DAG.getConstant(0, MVT::i32)
+ };
+
+ Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
+ DAG.getVTList(MVT::i32, MVT::Other), &Ops[0],
+ array_lengthof(Ops));
+ OutChain = Cycles32.getValue(1);
+ } else {
+ // Intrinsic is defined to return 0 on unsupported platforms. Technically
+ // there are older ARM CPUs that have implementation-specific ways of
+ // obtaining this information (FIXME!).
+ Cycles32 = DAG.getConstant(0, MVT::i32);
+ OutChain = DAG.getEntryNode();
+ }
+
+
+ SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
+ Cycles32, DAG.getConstant(0, MVT::i32));
+ Results.push_back(Cycles64);
+ Results.push_back(OutChain);
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
@@ -5753,6 +6157,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
+ case ISD::SDIVREM:
+ case ISD::UDIVREM: return LowerDivRem(Op, DAG);
}
}
@@ -5768,49 +6175,28 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST:
Res = ExpandBITCAST(N, DAG);
break;
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- Res = ExpandVectorExtension(N, DAG);
- break;
case ISD::SRL:
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
break;
- case ISD::ATOMIC_LOAD_ADD:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
+ case ISD::READCYCLECOUNTER:
+ ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
+ case ISD::ATOMIC_STORE:
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_AND:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_NAND:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_OR:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
- return;
case ISD::ATOMIC_LOAD_SUB:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
- return;
case ISD::ATOMIC_LOAD_XOR:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
- return;
case ISD::ATOMIC_SWAP:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
- return;
case ISD::ATOMIC_CMP_SWAP:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
- return;
case ISD::ATOMIC_LOAD_MIN:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG);
- return;
case ISD::ATOMIC_LOAD_UMIN:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG);
- return;
case ISD::ATOMIC_LOAD_MAX:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG);
- return;
case ISD::ATOMIC_LOAD_UMAX:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG);
+ ReplaceATOMIC_OP_64(N, Results, DAG);
return;
}
if (Res.getNode())
@@ -5830,6 +6216,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
unsigned oldval = MI->getOperand(2).getReg();
unsigned newval = MI->getOperand(3).getReg();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -5845,21 +6232,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
}
unsigned ldrOpc, strOpc;
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
- case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
- break;
- case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
- break;
- case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
- break;
- }
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -5939,6 +6312,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -5946,24 +6320,11 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
if (isThumb2) {
MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc;
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
- case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
- break;
- case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
- break;
- case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
- break;
- }
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -6047,6 +6408,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
unsigned oldval = dest;
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -6054,24 +6416,20 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
if (isThumb2) {
MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc, extendOpc;
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!");
case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
break;
case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
break;
case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
extendOpc = 0;
break;
}
@@ -6115,7 +6473,10 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
// Sign extend the value, if necessary.
if (signExtend && extendOpc) {
- oldval = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass
+ : &ARM::GPRnopcRegClass);
+ if (!isThumb2)
+ MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass);
AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
.addReg(dest)
.addImm(0));
@@ -6153,7 +6514,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Op1, unsigned Op2,
bool NeedsCarry, bool IsCmpxchg,
bool IsMinMax, ARMCC::CondCodes CC) const {
- // This also handles ATOMIC_SWAP, indicated by Op1==0.
+ // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -6161,11 +6522,15 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MachineFunction::iterator It = BB;
++It;
+ bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64);
+ unsigned offset = (isStore ? -2 : 0);
unsigned destlo = MI->getOperand(0).getReg();
unsigned desthi = MI->getOperand(1).getReg();
- unsigned ptr = MI->getOperand(2).getReg();
- unsigned vallo = MI->getOperand(3).getReg();
- unsigned valhi = MI->getOperand(4).getReg();
+ unsigned ptr = MI->getOperand(offset+2).getReg();
+ unsigned vallo = MI->getOperand(offset+3).getReg();
+ unsigned valhi = MI->getOperand(offset+4).getReg();
+ unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5);
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(OrdIdx).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -6174,8 +6539,13 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(vallo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(valhi, &ARM::rGPRRegClass);
}
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
+
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *contBB = 0, *cont2BB = 0;
if (IsCmpxchg || IsMinMax)
@@ -6215,21 +6585,23 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// fallthrough --> exitMBB
BB = loopMBB;
- // Load
- if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD))
- .addReg(destlo, RegState::Define)
- .addReg(desthi, RegState::Define)
- .addReg(ptr));
- } else {
- unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD))
- .addReg(GPRPair0, RegState::Define).addReg(ptr));
- // Copy r2/r3 into dest. (This copy will normally be coalesced.)
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
- .addReg(GPRPair0, 0, ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
- .addReg(GPRPair0, 0, ARM::gsub_1);
+ if (!isStore) {
+ // Load
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr));
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(GPRPair0, RegState::Define).addReg(ptr));
+ // Copy r2/r3 into dest. (This copy will normally be coalesced.)
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
+ }
}
unsigned StoreLo, StoreHi;
@@ -6281,7 +6653,9 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// Store
if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess)
+ MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
.addReg(StoreLo).addReg(StoreHi).addReg(ptr));
} else {
// Marshal a pair...
@@ -6299,7 +6673,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
.addImm(ARM::gsub_1);
// ...and store it
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
.addReg(StorePair).addReg(ptr));
}
// Cmp+jump
@@ -6320,6 +6694,51 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
return BB;
}
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const {
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ unsigned destlo = MI->getOperand(0).getReg();
+ unsigned desthi = MI->getOperand(1).getReg();
+ unsigned ptr = MI->getOperand(2).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ }
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
+
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc));
+
+ if (isThumb2) {
+ MIB.addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr);
+
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ MIB.addReg(GPRPair0, RegState::Define).addReg(ptr);
+
+ // Copy GPRPair0 into dest. (This copy will normally be coalesced.)
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
+ }
+ AddDefaultPred(MIB);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
void ARMTargetLowering::
@@ -6851,8 +7270,109 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
llvm_unreachable("Expecting a BB with two successors!");
}
-MachineBasicBlock *ARMTargetLowering::
-EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
+/// Return the load opcode for a given load size. If load size >= 8,
+/// neon opcode will be returned.
+static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
+ if (LdSize >= 8)
+ return LdSize == 16 ? ARM::VLD1q32wb_fixed
+ : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
+ if (IsThumb1)
+ return LdSize == 4 ? ARM::tLDRi
+ : LdSize == 2 ? ARM::tLDRHi
+ : LdSize == 1 ? ARM::tLDRBi : 0;
+ if (IsThumb2)
+ return LdSize == 4 ? ARM::t2LDR_POST
+ : LdSize == 2 ? ARM::t2LDRH_POST
+ : LdSize == 1 ? ARM::t2LDRB_POST : 0;
+ return LdSize == 4 ? ARM::LDR_POST_IMM
+ : LdSize == 2 ? ARM::LDRH_POST
+ : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
+}
+
+/// Return the store opcode for a given store size. If store size >= 8,
+/// neon opcode will be returned.
+static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
+ if (StSize >= 8)
+ return StSize == 16 ? ARM::VST1q32wb_fixed
+ : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
+ if (IsThumb1)
+ return StSize == 4 ? ARM::tSTRi
+ : StSize == 2 ? ARM::tSTRHi
+ : StSize == 1 ? ARM::tSTRBi : 0;
+ if (IsThumb2)
+ return StSize == 4 ? ARM::t2STR_POST
+ : StSize == 2 ? ARM::t2STRH_POST
+ : StSize == 1 ? ARM::t2STRB_POST : 0;
+ return StSize == 4 ? ARM::STR_POST_IMM
+ : StSize == 2 ? ARM::STRH_POST
+ : StSize == 1 ? ARM::STRB_POST_IMM : 0;
+}
+
+/// Emit a post-increment load operation with given size. The instructions
+/// will be added to BB at Pos.
+static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos,
+ const TargetInstrInfo *TII, DebugLoc dl,
+ unsigned LdSize, unsigned Data, unsigned AddrIn,
+ unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
+ unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
+ assert(LdOpc != 0 && "Should have a load opcode");
+ if (LdSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addImm(0));
+ } else if (IsThumb1) {
+ // load + update AddrIn
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrIn).addImm(0));
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(AddrIn).addImm(LdSize);
+ AddDefaultPred(MIB);
+ } else if (IsThumb2) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addImm(LdSize));
+ } else { // arm
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addReg(0).addImm(LdSize));
+ }
+}
+
+/// Emit a post-increment store operation with given size. The instructions
+/// will be added to BB at Pos.
+static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos,
+ const TargetInstrInfo *TII, DebugLoc dl,
+ unsigned StSize, unsigned Data, unsigned AddrIn,
+ unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
+ unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
+ assert(StOpc != 0 && "Should have a store opcode");
+ if (StSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(AddrIn).addImm(0).addReg(Data));
+ } else if (IsThumb1) {
+ // store + update AddrIn
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data)
+ .addReg(AddrIn).addImm(0));
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(AddrIn).addImm(StSize);
+ AddDefaultPred(MIB);
+ } else if (IsThumb2) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data).addReg(AddrIn).addImm(StSize));
+ } else { // arm
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data).addReg(AddrIn).addReg(0)
+ .addImm(StSize));
+ }
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitStructByval(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
// Otherwise, we will generate unrolled scalar copies.
@@ -6867,23 +7387,18 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
unsigned Align = MI->getOperand(3).getImm();
DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned ldrOpc, strOpc, UnitSize = 0;
+ unsigned UnitSize = 0;
+ const TargetRegisterClass *TRC = 0;
+ const TargetRegisterClass *VecTRC = 0;
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- const TargetRegisterClass *TRC_Vec = 0;
+ bool IsThumb1 = Subtarget->isThumb1Only();
+ bool IsThumb2 = Subtarget->isThumb2();
if (Align & 1) {
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
UnitSize = 1;
} else if (Align & 2) {
- ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST;
- strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
@@ -6891,27 +7406,27 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
hasAttribute(AttributeSet::FunctionIndex,
Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
- if ((Align % 16 == 0) && SizeVal >= 16) {
- ldrOpc = ARM::VLD1q32wb_fixed;
- strOpc = ARM::VST1q32wb_fixed;
+ if ((Align % 16 == 0) && SizeVal >= 16)
UnitSize = 16;
- TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass;
- }
- else if ((Align % 8 == 0) && SizeVal >= 8) {
- ldrOpc = ARM::VLD1d32wb_fixed;
- strOpc = ARM::VST1d32wb_fixed;
+ else if ((Align % 8 == 0) && SizeVal >= 8)
UnitSize = 8;
- TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass;
- }
}
// Can't use NEON instructions.
- if (UnitSize == 0) {
- ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
+ if (UnitSize == 0)
UnitSize = 4;
- }
}
+ // Select the correct opcode and register class for unit size load/store
+ bool IsNeon = UnitSize >= 8;
+ TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass
+ : (const TargetRegisterClass *)&ARM::GPRRegClass;
+ if (IsNeon)
+ VecTRC = UnitSize == 16
+ ? (const TargetRegisterClass *)&ARM::DPairRegClass
+ : UnitSize == 8
+ ? (const TargetRegisterClass *)&ARM::DPRRegClass
+ : 0;
+
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
@@ -6922,34 +7437,13 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
unsigned srcIn = src;
unsigned destIn = dest;
for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
- unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (UnitSize >= 8) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(destIn).addImm(0).addReg(scratch));
- } else if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(UnitSize));
- } else {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0)
- .addImm(UnitSize));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(UnitSize));
- }
+ unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
@@ -6957,30 +7451,14 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// Handle the leftover bytes with LDRB and STRB.
// [scratch, srcOut] = LDRB_POST(srcIn, 1)
// [destOut] = STRB_POST(scratch, destIn, 1)
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- } else {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn)
- .addReg(0).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- }
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
@@ -7021,17 +7499,16 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- unsigned VReg1 = varEnd;
+ if (IsThumb2) {
+ unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
- VReg1 = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
- .addImm(LoopSize & 0xFFFF));
+ Vtmp = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
+ .addImm(LoopSize & 0xFFFF));
if ((LoopSize & 0xFFFF0000) != 0)
AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
- .addReg(VReg1)
- .addImm(LoopSize >> 16));
+ .addReg(Vtmp).addImm(LoopSize >> 16));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -7043,10 +7520,12 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
Align = getDataLayout()->getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
- .addReg(varEnd, RegState::Define)
- .addConstantPoolIndex(Idx)
- .addImm(0));
+ if (IsThumb1)
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
+ varEnd, RegState::Define).addConstantPoolIndex(Idx));
+ else
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
+ varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
}
BB->addSuccessor(loopMBB);
@@ -7075,39 +7554,30 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
- unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
- if (UnitSize >= 8) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(destPhi).addImm(0).addReg(scratch));
- } else if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(scratch).addReg(destPhi)
- .addImm(UnitSize));
- } else {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0)
- .addImm(UnitSize));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(scratch).addReg(destPhi)
- .addReg(0).addImm(UnitSize));
- }
+ unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
+ IsThumb1, IsThumb2);
// Decrement loop variable by UnitSize.
- MachineInstrBuilder MIB = BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
- AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
- MIB->getOperand(5).setReg(ARM::CPSR);
- MIB->getOperand(5).setIsDef(true);
-
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ if (IsThumb1) {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(varPhi).addImm(UnitSize);
+ AddDefaultPred(MIB);
+ } else {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, BB->end(), dl,
+ TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
+ AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+ MIB->getOperand(5).setReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+ }
+ BuildMI(*BB, BB->end(), dl,
+ TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
// loopMBB can loop back to loopMBB or fall through to exitMBB.
BB->addSuccessor(loopMBB);
@@ -7116,34 +7586,19 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// Add epilogue to handle BytesLeft.
BB = exitMBB;
MachineInstr *StartOfExit = exitMBB->begin();
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
// [destOut] = STRB_POST(scratch, destLoop, 1)
unsigned srcIn = srcLoop;
unsigned destIn = destLoop;
for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(1));
- } else {
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- }
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
@@ -7293,46 +7748,49 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+ case ARM::ATOMIC_LOAD_I64:
+ return EmitAtomicLoad64(MI, BB);
- case ARM::ATOMADD6432:
+ case ARM::ATOMIC_LOAD_ADD_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
/*NeedsCarry*/ true);
- case ARM::ATOMSUB6432:
+ case ARM::ATOMIC_LOAD_SUB_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true);
- case ARM::ATOMOR6432:
+ case ARM::ATOMIC_LOAD_OR_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMXOR6432:
+ case ARM::ATOMIC_LOAD_XOR_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMAND6432:
+ case ARM::ATOMIC_LOAD_AND_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMSWAP6432:
+ case ARM::ATOMIC_STORE_I64:
+ case ARM::ATOMIC_SWAP_I64:
return EmitAtomicBinary64(MI, BB, 0, 0, false);
- case ARM::ATOMCMPXCHG6432:
+ case ARM::ATOMIC_CMP_SWAP_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ false, /*IsCmpxchg*/true);
- case ARM::ATOMMIN6432:
+ case ARM::ATOMIC_LOAD_MIN_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::LT);
- case ARM::ATOMMAX6432:
+ case ARM::ATOMIC_LOAD_MAX_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::GE);
- case ARM::ATOMUMIN6432:
+ case ARM::ATOMIC_LOAD_UMIN_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::LO);
- case ARM::ATOMUMAX6432:
+ case ARM::ATOMIC_LOAD_UMAX_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
@@ -7710,13 +8168,13 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
// Slct is now know to be the desired identity constant when CC is true.
SDValue TrueVal = OtherOp;
- SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+ SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
OtherOp, NonConstantVal);
// Unless SwapSelectOps says CC should be false.
if (SwapSelectOps)
std::swap(TrueVal, FalseVal);
- return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
CCOp, TrueVal, FalseVal);
}
@@ -7823,9 +8281,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
llvm_unreachable("Invalid vector element type for padd optimization.");
}
- SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
widenType, &Ops[0], Ops.size());
- return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp);
}
static SDValue findMUL_LOHI(SDValue V) {
@@ -7868,8 +8326,11 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
assert(AddcNode->getNumValues() == 2 &&
AddcNode->getValueType(0) == MVT::i32 &&
- AddcNode->getValueType(1) == MVT::Glue &&
- "Expect ADDC with two result values: i32, glue");
+ "Expect ADDC with two result values. First: i32");
+
+ // Check that we have a glued ADDC node.
+ if (AddcNode->getValueType(1) != MVT::Glue)
+ return SDValue();
// Check that the ADDC adds the low result of the S/UMUL_LOHI.
if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
@@ -7950,7 +8411,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
Ops.push_back(*LowAdd);
Ops.push_back(*HiAdd);
- SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(),
+ SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode),
DAG.getVTList(MVT::i32, MVT::i32),
&Ops[0], Ops.size());
@@ -8038,6 +8499,13 @@ static SDValue PerformSUBCombine(SDNode *N,
/// is faster than
/// vadd d3, d0, d1
/// vmul d3, d3, d2
+// However, for (A + B) * (A + B),
+// vadd d2, d0, d1
+// vmul d3, d0, d2
+// vmla d3, d1, d2
+// is slower than
+// vadd d2, d0, d1
+// vmul d3, d2, d2
static SDValue PerformVMULCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
@@ -8057,8 +8525,11 @@ static SDValue PerformVMULCombine(SDNode *N,
std::swap(N0, N1);
}
+ if (N0 == N1)
+ return SDValue();
+
EVT VT = N->getValueType(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
return DAG.getNode(Opcode, DL, VT,
@@ -8088,11 +8559,11 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
int64_t MulAmt = C->getSExtValue();
- unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+ unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
ShiftAmt = ShiftAmt & (32 - 1);
SDValue V = N->getOperand(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDValue Res;
MulAmt >>= ShiftAmt;
@@ -8156,7 +8627,7 @@ static SDValue PerformANDCombine(SDNode *N,
// Attempt to use immediate-form VBIC
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
@@ -8199,7 +8670,7 @@ static SDValue PerformORCombine(SDNode *N,
const ARMSubtarget *Subtarget) {
// Attempt to use immediate-form VORR
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
@@ -8248,22 +8719,29 @@ static SDValue PerformORCombine(SDNode *N,
unsigned SplatBitSize;
bool HasAnyUndefs;
+ APInt SplatBits0, SplatBits1;
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
- APInt SplatBits0;
+ BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
+ // Ensure that the second operand of both ands are constants
if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
- HasAnyUndefs) && !HasAnyUndefs) {
- BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
- APInt SplatBits1;
- if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
- HasAnyUndefs) && !HasAnyUndefs &&
- SplatBits0 == ~SplatBits1) {
- // Canonicalize the vector type to make instruction selection simpler.
- EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
- SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
- N0->getOperand(1), N0->getOperand(0),
- N1->getOperand(0));
- return DAG.getNode(ISD::BITCAST, dl, VT, Result);
- }
+ HasAnyUndefs) && !HasAnyUndefs) {
+ if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs) {
+ // Ensure that the bit width of the constants are the same and that
+ // the splat arguments are logical inverses as per the pattern we
+ // are trying to simplify.
+ if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
+ SplatBits0 == ~SplatBits1) {
+ // Canonicalize the vector type to make instruction selection
+ // simpler.
+ EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+ N0->getOperand(1),
+ N0->getOperand(0),
+ N1->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
+ }
}
}
@@ -8274,7 +8752,7 @@ static SDValue PerformORCombine(SDNode *N,
if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
return SDValue();
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
// 1) or (and A, mask), val => ARMbfi A, val, mask
// iff (val & mask) == val
//
@@ -8309,7 +8787,7 @@ static SDValue PerformORCombine(SDNode *N,
return SDValue();
if (ARM::isBitFieldInvertedMask(Mask)) {
- Val >>= CountTrailingZeros_32(~Mask);
+ Val >>= countTrailingZeros(~Mask);
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
DAG.getConstant(Val, MVT::i32),
@@ -8336,7 +8814,7 @@ static SDValue PerformORCombine(SDNode *N,
(Mask == 0xffff || Mask == 0xffff0000))
return SDValue();
// 2a
- unsigned amt = CountTrailingZeros_32(Mask2);
+ unsigned amt = countTrailingZeros(Mask2);
Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
DAG.getConstant(amt, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
@@ -8352,7 +8830,7 @@ static SDValue PerformORCombine(SDNode *N,
(Mask2 == 0xffff || Mask2 == 0xffff0000))
return SDValue();
// 2b
- unsigned lsb = CountTrailingZeros_32(Mask);
+ unsigned lsb = countTrailingZeros(Mask);
Res = DAG.getNode(ISD::SRL, DL, VT, N00,
DAG.getConstant(lsb, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
@@ -8370,7 +8848,7 @@ static SDValue PerformORCombine(SDNode *N,
// where lsb(mask) == #shamt and masked bits of B are known zero.
SDValue ShAmt = N00.getOperand(1);
unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
- unsigned LSB = CountTrailingZeros_32(Mask);
+ unsigned LSB = countTrailingZeros(Mask);
if (ShAmtC != LSB)
return SDValue();
@@ -8413,12 +8891,12 @@ static SDValue PerformBFICombine(SDNode *N,
if (!N11C)
return SDValue();
unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
- unsigned LSB = CountTrailingZeros_32(~InvMask);
- unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
+ unsigned LSB = countTrailingZeros(~InvMask);
+ unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
unsigned Mask = (1 << Width)-1;
unsigned Mask2 = N11C->getZExtValue();
if ((Mask & (~Mask2)) == 0)
- return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
+ return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
N->getOperand(0), N1.getOperand(0),
N->getOperand(2));
}
@@ -8444,7 +8922,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
LoadSDNode *LD = cast<LoadSDNode>(InNode);
SelectionDAG &DAG = DCI.DAG;
- DebugLoc DL = LD->getDebugLoc();
+ SDLoc DL(LD);
SDValue BasePtr = LD->getBasePtr();
SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
LD->getPointerInfo(), LD->isVolatile(),
@@ -8481,7 +8959,7 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
if (Op0.getOpcode() == ARMISD::VMOVRRD &&
Op0.getNode() == Op1.getNode() &&
Op0.getResNo() == 0 && Op1.getResNo() == 1)
- return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, SDLoc(N),
N->getValueType(0), Op0.getOperand(0));
return SDValue();
}
@@ -8523,7 +9001,7 @@ static SDValue PerformSTORECombine(SDNode *N,
NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
- DebugLoc DL = St->getDebugLoc();
+ SDLoc DL(St);
SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
@@ -8584,7 +9062,7 @@ static SDValue PerformSTORECombine(SDNode *N,
if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
StVal.getNode()->hasOneUse()) {
SelectionDAG &DAG = DCI.DAG;
- DebugLoc DL = St->getDebugLoc();
+ SDLoc DL(St);
SDValue BasePtr = St->getBasePtr();
SDValue NewST1 = DAG.getStore(St->getChain(), DL,
StVal.getNode()->getOperand(0), BasePtr,
@@ -8606,14 +9084,14 @@ static SDValue PerformSTORECombine(SDNode *N,
// Bitcast an i64 store extracted from a vector to f64.
// Otherwise, the i64 value will be legalized to a pair of i32 values.
SelectionDAG &DAG = DCI.DAG;
- DebugLoc dl = StVal.getDebugLoc();
+ SDLoc dl(StVal);
SDValue IntVec = StVal.getOperand(0);
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
IntVec.getValueType().getVectorNumElements());
SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
Vec, StVal.getOperand(1));
- dl = N->getDebugLoc();
+ dl = SDLoc(N);
SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
// Make the DAGCombiner fold the bitcasts.
DCI.AddToWorklist(Vec.getNode());
@@ -8659,7 +9137,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N,
EVT VT = N->getValueType(0);
if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
return SDValue();
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SmallVector<SDValue, 8> Ops;
unsigned NumElts = VT.getVectorNumElements();
for (unsigned i = 0; i < NumElts; ++i) {
@@ -8673,6 +9151,98 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N,
return DAG.getNode(ISD::BITCAST, dl, VT, BV);
}
+/// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
+static SDValue
+PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
+ // At that time, we may have inserted bitcasts from integer to float.
+ // If these bitcasts have survived DAGCombine, change the lowering of this
+ // BUILD_VECTOR in something more vector friendly, i.e., that does not
+ // force to use floating point types.
+
+ // Make sure we can change the type of the vector.
+ // This is possible iff:
+ // 1. The vector is only used in a bitcast to a integer type. I.e.,
+ // 1.1. Vector is used only once.
+ // 1.2. Use is a bit convert to an integer type.
+ // 2. The size of its operands are 32-bits (64-bits are not legal).
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+
+ // Check 1.1. and 2.
+ if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
+ return SDValue();
+
+ // By construction, the input type must be float.
+ assert(EltVT == MVT::f32 && "Unexpected type!");
+
+ // Check 1.2.
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() != ISD::BITCAST ||
+ Use->getValueType(0).isFloatingPoint())
+ return SDValue();
+
+ // Check profitability.
+ // Model is, if more than half of the relevant operands are bitcast from
+ // i32, turn the build_vector into a sequence of insert_vector_elt.
+ // Relevant operands are everything that is not statically
+ // (i.e., at compile time) bitcasted.
+ unsigned NumOfBitCastedElts = 0;
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumOfRelevantElts = NumElts;
+ for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
+ SDValue Elt = N->getOperand(Idx);
+ if (Elt->getOpcode() == ISD::BITCAST) {
+ // Assume only bit cast to i32 will go away.
+ if (Elt->getOperand(0).getValueType() == MVT::i32)
+ ++NumOfBitCastedElts;
+ } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt))
+ // Constants are statically casted, thus do not count them as
+ // relevant operands.
+ --NumOfRelevantElts;
+ }
+
+ // Check if more than half of the elements require a non-free bitcast.
+ if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ // Create the new vector type.
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ // Check if the type is legal.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(VecVT))
+ return SDValue();
+
+ // Combine:
+ // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
+ // => BITCAST INSERT_VECTOR_ELT
+ // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
+ // (BITCAST EN), N.
+ SDValue Vec = DAG.getUNDEF(VecVT);
+ SDLoc dl(N);
+ for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
+ SDValue V = N->getOperand(Idx);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (V.getOpcode() == ISD::BITCAST &&
+ V->getOperand(0).getValueType() == MVT::i32)
+ // Fold obvious case.
+ V = V.getOperand(0);
+ else {
+ V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(V.getNode());
+ }
+ SDValue LaneIdx = DAG.getConstant(Idx, MVT::i32);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
+ }
+ Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ return Vec;
+}
+
/// PerformInsertEltCombine - Target-specific dag combine xforms for
/// ISD::INSERT_VECTOR_ELT.
static SDValue PerformInsertEltCombine(SDNode *N,
@@ -8686,7 +9256,7 @@ static SDValue PerformInsertEltCombine(SDNode *N,
return SDValue();
SelectionDAG &DAG = DCI.DAG;
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
VT.getVectorNumElements());
SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
@@ -8732,7 +9302,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
!TLI.isTypeLegal(Concat1Op1.getValueType()))
return SDValue();
- SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
Op0.getOperand(0), Op1.getOperand(0));
// Translate the shuffle mask.
SmallVector<int, 16> NewMask;
@@ -8748,7 +9318,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
NewElt = HalfElts + MaskElt - NumElts;
NewMask.push_back(NewElt);
}
- return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
DAG.getUNDEF(VT), NewMask.data());
}
@@ -8865,7 +9435,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
Ops.push_back(N->getOperand(i));
}
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
- SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
Ops.data(), Ops.size(),
MemInt->getMemoryVT(),
MemInt->getMemOperand());
@@ -8939,7 +9509,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
- SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
+ SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
Ops, 2, VLDMemInt->getMemoryVT(),
VLDMemInt->getMemOperand());
@@ -8994,7 +9564,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
if (EltSize > VT.getVectorElementType().getSizeInBits())
return SDValue();
- return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
+ return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
}
// isConstVecPow2 - Return true if each vector element is a power of 2, all
@@ -9051,12 +9621,27 @@ static SDValue PerformVCVTCombine(SDNode *N,
!isConstVecPow2(ConstVec, isSigned, C))
return SDValue();
+ MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
+ MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
+ if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
+ // These instructions only exist converting from f32 to i32. We can handle
+ // smaller integers by generating an extra truncate, but larger ones would
+ // be lossy.
+ return SDValue();
+ }
+
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
Intrinsic::arm_neon_vcvtfp2fxu;
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
- N->getValueType(0),
- DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
- DAG.getConstant(Log2_64(C), MVT::i32));
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
+ NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
+ DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
+ DAG.getConstant(Log2_64(C), MVT::i32));
+
+ if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
+ FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv);
+
+ return FixConv;
}
/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
@@ -9087,12 +9672,28 @@ static SDValue PerformVDIVCombine(SDNode *N,
!isConstVecPow2(ConstVec, isSigned, C))
return SDValue();
+ MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
+ MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
+ if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
+ // These instructions only exist converting from i32 to f32. We can handle
+ // smaller integers by generating an extra extend, but larger ones would
+ // be lossy.
+ return SDValue();
+ }
+
+ SDValue ConvInput = Op.getOperand(0);
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
+ ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
+ ConvInput);
+
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
Intrinsic::arm_neon_vcvtfxu2fp;
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
Op.getValueType(),
DAG.getConstant(IntrinsicOpcode, MVT::i32),
- Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
+ ConvInput, DAG.getConstant(Log2_64(C), MVT::i32));
}
/// Getvshiftimm - Check if this is a valid build_vector for the immediate
@@ -9273,7 +9874,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
VShiftOpc = ARMISD::VQRSHRNsu; break;
}
- return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+ return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
}
@@ -9290,7 +9891,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
}
- return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+ return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2),
DAG.getConstant(Cnt, MVT::i32));
}
@@ -9321,7 +9922,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
DAG.MaskedValueIsZero(N0.getOperand(0),
APInt::getHighBitsSet(32, 16)))
- return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1);
+ return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
}
}
@@ -9338,7 +9939,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
case ISD::SHL:
if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
- return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
+ return DAG.getNode(ARMISD::VSHL, SDLoc(N), VT, N->getOperand(0),
DAG.getConstant(Cnt, MVT::i32));
break;
@@ -9347,7 +9948,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
ARMISD::VSHRs : ARMISD::VSHRu);
- return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
+ return DAG.getNode(VShiftOpc, SDLoc(N), VT, N->getOperand(0),
DAG.getConstant(Cnt, MVT::i32));
}
}
@@ -9387,7 +9988,7 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
Opc = ARMISD::VGETLANEu;
break;
}
- return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
+ return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
}
}
@@ -9476,7 +10077,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
if (!Opcode)
return SDValue();
- return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+ return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
}
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
@@ -9488,7 +10089,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
return SDValue();
EVT VT = N->getValueType(0);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue LHS = Cmp.getOperand(0);
SDValue RHS = Cmp.getOperand(1);
SDValue FalseVal = N->getOperand(0);
@@ -9578,6 +10179,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ARMISD::VLD3DUP:
case ARMISD::VLD4DUP:
return CombineBaseUpdate(N, DCI);
+ case ARMISD::BUILD_VECTOR:
+ return PerformARMBUILD_VECTORCombine(N, DCI);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -9702,6 +10305,21 @@ bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
return false;
}
+bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+
+ if (!isTypeLegal(EVT::getEVT(Ty1)))
+ return false;
+
+ assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
+
+ // Assuming the caller doesn't have a zeroext or signext return parameter,
+ // truncation all the way down to i1 is valid.
+ return true;
+}
+
+
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;
@@ -10101,9 +10719,19 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
- KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
+ unsigned BitWidth = KnownOne.getBitWidth();
+ KnownZero = KnownOne = APInt(BitWidth, 0);
switch (Op.getOpcode()) {
default: break;
+ case ARMISD::ADDC:
+ case ARMISD::ADDE:
+ case ARMISD::SUBC:
+ case ARMISD::SUBE:
+ // These nodes' second result is a boolean
+ if (Op.getResNo() == 0)
+ break;
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ break;
case ARMISD::CMOV: {
// Bits are known zero/one if known on the LHS and RHS.
DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
@@ -10217,7 +10845,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
RCPair
ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const {
+ MVT VT) const {
if (Constraint.size() == 1) {
// GCC ARM Constraint Letters
switch (Constraint[0]) {
@@ -10232,6 +10860,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
case 'r':
return RCPair(0U, &ARM::GPRRegClass);
case 'w':
+ if (VT == MVT::Other)
+ break;
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPRRegClass);
if (VT.getSizeInBits() == 64)
@@ -10240,6 +10870,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return RCPair(0U, &ARM::QPRRegClass);
break;
case 'x':
+ if (VT == MVT::Other)
+ break;
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPR_8RegClass);
if (VT.getSizeInBits() == 64)
@@ -10426,6 +11058,54 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
+SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only");
+ unsigned Opcode = Op->getOpcode();
+ assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
+ "Invalid opcode for Div/Rem lowering");
+ bool isSigned = (Opcode == ISD::SDIVREM);
+ EVT VT = Op->getValueType(0);
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+
+ RTLIB::Libcall LC;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ }
+
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Op->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+ getPointerTy());
+
+ Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL);
+
+ SDLoc dl(Op);
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true,
+ 0, getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+
+ return CallInfo.first;
+}
+
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
@@ -10434,17 +11114,15 @@ ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
bool ARM::isBitFieldInvertedMask(unsigned v) {
if (v == 0xffffffff)
- return 0;
+ return false;
+
// there can be 1's on either or both "outsides", all the "inside"
// bits must be 0's
- unsigned int lsb = 0, msb = 31;
- while (v & (1 << msb)) --msb;
- while (v & (1 << lsb)) ++lsb;
- for (unsigned int i = lsb; i <= msb; ++i) {
- if (v & (1 << i))
- return 0;
- }
- return 1;
+ unsigned TO = CountTrailingOnes_32(v);
+ unsigned LO = CountLeadingOnes_32(v);
+ v = (v >> TO) << TO;
+ v = (v << LO) >> LO;
+ return v == 0;
}
/// isFPImmLegal - Returns true if the target can instruction select the
@@ -10513,6 +11191,30 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.writeMem = true;
return true;
}
+ case Intrinsic::arm_ldrex: {
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.vol = true;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::arm_strex: {
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = 0;
+ Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.vol = true;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
case Intrinsic::arm_strexd: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 426010e295d7..90facddeb02b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -52,6 +52,7 @@ namespace llvm {
BR_JT, // Jumptable branch.
BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
RET_FLAG, // Return with a flag operand.
+ INTRET_FLAG, // Interrupt return with an LR-offset and a flag operand.
PIC_ADD, // Add with a PC operand and a PIC label.
@@ -94,7 +95,6 @@ namespace llvm {
DYN_ALLOC, // Dynamic allocation on the stack.
- MEMBARRIER, // Memory barrier (DMB)
MEMBARRIER_MCR, // Memory barrier (MCR)
PRELOAD, // Preload
@@ -186,6 +186,8 @@ namespace llvm {
// Floating-point max and min:
FMAX,
FMIN,
+ VMAXNM,
+ VMINNM,
// Bit-field insert
BFI,
@@ -222,21 +224,7 @@ namespace llvm {
VST4_UPD,
VST2LN_UPD,
VST3LN_UPD,
- VST4LN_UPD,
-
- // 64-bit atomic ops (value split into two registers)
- ATOMADD64_DAG,
- ATOMSUB64_DAG,
- ATOMOR64_DAG,
- ATOMXOR64_DAG,
- ATOMAND64_DAG,
- ATOMNAND64_DAG,
- ATOMSWAP64_DAG,
- ATOMCMPXCHG64_DAG,
- ATOMMIN64_DAG,
- ATOMUMIN64_DAG,
- ATOMMAX64_DAG,
- ATOMUMAX64_DAG
+ VST4LN_UPD
};
}
@@ -270,7 +258,7 @@ namespace llvm {
}
/// getSetCCResultType - Return the value type to use for ISD::SETCC.
- virtual EVT getSetCCResultType(EVT VT) const;
+ virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
virtual MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
@@ -298,6 +286,9 @@ namespace llvm {
using TargetLowering::isZExtFree;
virtual bool isZExtFree(SDValue Val, EVT VT2) const;
+ virtual bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const;
+
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
@@ -349,7 +340,7 @@ namespace llvm {
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const;
+ MVT VT) const;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops. If hasMemory is
@@ -372,6 +363,12 @@ namespace llvm {
/// be used for loads / stores from the global.
virtual unsigned getMaximalGlobalOffset() const;
+ /// Returns true if a cast between SrcAS and DestAS is a noop.
+ virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
+ // Addrspacecasts are always noops.
+ return true;
+ }
+
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -412,21 +409,21 @@ namespace llvm {
void addQRTypeForNEON(MVT VT);
typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
- void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+ void PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
SDValue Chain, SDValue &Arg,
RegsToPassVector &RegsToPass,
CCValAssign &VA, CCValAssign &NextVA,
SDValue &StackPtr,
- SmallVector<SDValue, 8> &MemOpChains,
+ SmallVectorImpl<SDValue> &MemOpChains,
ISD::ArgFlagsTy Flags) const;
SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
- DebugLoc dl) const;
+ SDLoc dl) const;
CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
bool isVarArg) const;
SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
@@ -457,13 +454,26 @@ namespace llvm {
const ARMSubtarget *ST) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const;
+ SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
+
+ /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+ /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+ /// expanded to FMAs when this method returns true, otherwise fmuladd is
+ /// expanded to fmul + fadd.
+ ///
+ /// ARM supports both fused and unfused multiply-add operations; we already
+ /// lower a pair of fmul and fadd to the latter so it's not clear that there
+ /// would be a gain or that the gain would be worthwhile enough to risk
+ /// correctness bugs.
+ virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; }
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
bool isThisReturn, SDValue ThisVal) const;
@@ -471,24 +481,26 @@ namespace llvm {
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
- DebugLoc dl, SDValue &Chain,
+ SDLoc dl, SDValue &Chain,
const Value *OrigArg,
unsigned InRegsParamRecordIdx,
unsigned OffsetFromOrigArg,
unsigned ArgOffset,
+ unsigned ArgSize,
bool ForceMutable) const;
void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
- DebugLoc dl, SDValue &Chain,
+ SDLoc dl, SDValue &Chain,
unsigned ArgOffset,
bool ForceMutable = false) const;
void computeRegArea(CCState &CCInfo, MachineFunction &MF,
unsigned InRegsParamRecordIdx,
+ unsigned ArgSize,
unsigned &ArgRegsSize,
unsigned &ArgRegsSaveSize) const;
@@ -522,16 +534,16 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const;
+ SDLoc dl, SelectionDAG &DAG) const;
virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, DebugLoc dl) const;
+ SelectionDAG &DAG, SDLoc dl) const;
SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
@@ -556,6 +568,8 @@ namespace llvm {
unsigned Size,
bool signExtend,
ARMCC::CondCodes Cond) const;
+ MachineBasicBlock *EmitAtomicLoad64(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
void SetupEntryBlockForSjLj(MachineInstr *MI,
MachineBasicBlock *MBB,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
index 67a6820932fc..f93504fddb8e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -155,6 +155,16 @@ def pred : PredicateOperand<OtherVT, (ops i32imm, i32imm),
let DecoderMethod = "DecodePredicateOperand";
}
+// Selectable predicate operand for CMOV instructions. We can't use a normal
+// predicate because the default values interfere with instruction selection. In
+// all other respects it is identical though: pseudo-instruction expansion
+// relies on the MachineOperands being compatible.
+def cmovpred : Operand<i32>, PredicateOp,
+ ComplexPattern<i32, 2, "SelectCMOVPred"> {
+ let MIOperandInfo = (ops i32imm, i32imm);
+ let PrintMethod = "printPredicateOperand";
+}
+
// Conditional code result for instructions whose 's' bit is set, e.g. subs.
def CCOutOperand : AsmOperandClass { let Name = "CCOut"; }
def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
@@ -237,6 +247,8 @@ class t2InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[IsThumb2]>;
class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
+class VFP2DPInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2,HasDPVFP]>;
class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1>
@@ -490,8 +502,7 @@ class JTI<dag oops, dag iops, InstrItinClass itin,
: XI<oops, iops, AddrModeNone, 0, IndexModeNone, BrMiscFrm, itin,
asm, "", pattern>;
-// Atomic load/store instructions
-class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+class AIldr_ex_or_acq<bits<2> opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
@@ -502,23 +513,52 @@ class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
let Inst{20} = 1;
let Inst{19-16} = addr;
let Inst{15-12} = Rt;
- let Inst{11-0} = 0b111110011111;
+ let Inst{11-10} = 0b11;
+ let Inst{9-8} = opcod2;
+ let Inst{7-0} = 0b10011111;
}
-class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+class AIstr_ex_or_rel<bits<2> opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
- bits<4> Rd;
bits<4> Rt;
bits<4> addr;
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
let Inst{20} = 0;
let Inst{19-16} = addr;
- let Inst{15-12} = Rd;
- let Inst{11-4} = 0b11111001;
+ let Inst{11-10} = 0b11;
+ let Inst{9-8} = opcod2;
+ let Inst{7-4} = 0b1001;
let Inst{3-0} = Rt;
}
+// Atomic load/store instructions
+class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIldr_ex_or_acq<opcod, 0b11, oops, iops, itin, opc, asm, pattern>;
+
+class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIstr_ex_or_rel<opcod, 0b11, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ let Inst{15-12} = Rd;
+}
+
+// Exclusive load/store instructions
+
+class AIldaex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIldr_ex_or_acq<opcod, 0b10, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]>;
+
+class AIstlex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIstr_ex_or_rel<opcod, 0b10, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]> {
+ bits<4> Rd;
+ let Inst{15-12} = Rd;
+}
+
class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
: AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, $addr", pattern> {
bits<4> Rt;
@@ -535,6 +575,18 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
let Unpredictable{11-8} = 0b1111;
let DecoderMethod = "DecodeSwap";
}
+// Acquire/Release load/store instructions
+class AIldracq<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIldr_ex_or_acq<opcod, 0b00, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]>;
+
+class AIstrrel<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIstr_ex_or_rel<opcod, 0b00, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]> {
+ let Inst{15-12} = 0b1111;
+}
// addrmode1 instructions
class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
@@ -1230,8 +1282,9 @@ class T2JTI<dag oops, dag iops, InstrItinClass itin,
: Thumb2XI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
// Move to/from coprocessor instructions
-class T2Cop<bits<4> opc, dag oops, dag iops, string asm, list<dag> pattern>
- : T2XI <oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2]> {
+class T2Cop<bits<4> opc, dag oops, dag iops, string opcstr, string asm,
+ list<dag> pattern>
+ : T2I <oops, iops, NoItinerary, opcstr, asm, pattern>, Requires<[IsThumb2]> {
let Inst{31-28} = opc;
}
@@ -1389,7 +1442,6 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
let Inst{15-12} = Dd{3-0};
let Inst{7-0} = addr{7-0}; // imm8
- // TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-24} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-9} = 0b101;
@@ -1415,7 +1467,6 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
let Inst{15-12} = Sd{4-1};
let Inst{7-0} = addr{7-0}; // imm8
- // TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-24} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-9} = 0b101;
@@ -1437,6 +1488,28 @@ class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
}
// Load / store multiple
+
+// Unknown precision
+class AXXI4<dag oops, dag iops, IndexMode im,
+ string asm, string cstr, list<dag> pattern>
+ : VFPXI<oops, iops, AddrMode4, 4, im,
+ VFPLdStFrm, NoItinerary, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = 0;
+ let Inst{15-12} = regs{11-8};
+ let Inst{7-1} = regs{7-1};
+
+ let Inst{27-25} = 0b110;
+ let Inst{11-8} = 0b1011;
+ let Inst{0} = 1;
+}
+
+// Double precision
class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
: VFPXI<oops, iops, AddrMode4, 4, im,
@@ -1449,14 +1522,15 @@ class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
let Inst{19-16} = Rn;
let Inst{22} = regs{12};
let Inst{15-12} = regs{11-8};
- let Inst{7-0} = regs{7-0};
+ let Inst{7-1} = regs{7-1};
- // TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
let Inst{11-9} = 0b101;
let Inst{8} = 1; // Double precision
+ let Inst{0} = 0;
}
+// Single Precision
class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
: VFPXI<oops, iops, AddrMode4, 4, im,
@@ -1471,7 +1545,6 @@ class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
let Inst{15-12} = regs{12-9};
let Inst{7-0} = regs{7-0};
- // TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
let Inst{11-9} = 0b101;
let Inst{8} = 0; // Single precision
@@ -1499,6 +1572,34 @@ class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
let Inst{8} = 1; // Double precision
let Inst{7-6} = opcod4;
let Inst{4} = opcod5;
+
+ let Predicates = [HasVFP2, HasDPVFP];
+}
+
+// Double precision, unary, not-predicated
+class ADuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, VFPUnaryFrm, itin, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dm;
+
+ let Inst{31-28} = 0b1111;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
}
// Double precision, binary
@@ -1525,9 +1626,42 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{8} = 1; // Double precision
let Inst{6} = op6;
let Inst{4} = op4;
+
+ let Predicates = [HasVFP2, HasDPVFP];
+}
+
+// FP, binary, not predicated
+class ADbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops,
+ InstrItinClass itin, string asm, list<dag> pattern>
+ : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, VFPBinaryFrm, itin,
+ asm, "", pattern>
+{
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dn;
+ bits<5> Dm;
+
+ let Inst{31-28} = 0b1111;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{19-16} = Dn{3-0};
+ let Inst{7} = Dn{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // double precision
+ let Inst{6} = opcod3;
+ let Inst{4} = 0;
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
-// Single precision, unary
+// Single precision, unary, predicated
class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
string asm, list<dag> pattern>
@@ -1551,6 +1685,33 @@ class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
let Inst{4} = opcod5;
}
+// Single precision, unary, non-predicated
+class ASuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone,
+ VFPUnaryFrm, itin, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ let Inst{31-28} = 0b1111;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+}
+
// Single precision unary, if no NEON. Same as ASuI except not available if
// NEON is enabled.
class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
@@ -1586,6 +1747,35 @@ class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
let Inst{4} = op4;
}
+// Single precision, binary, not predicated
+class ASbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops,
+ InstrItinClass itin, string asm, list<dag> pattern>
+ : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone,
+ VFPBinaryFrm, itin, asm, "", pattern>
+{
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ let Inst{31-28} = 0b1111;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{6} = opcod3;
+ let Inst{4} = 0;
+}
+
// Single precision binary, if no NEON. Same as ASbI except not available if
// NEON is enabled.
class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
@@ -1698,6 +1888,21 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
let DecoderNamespace = "NEON";
}
+// Same as NeonI except it is not predicated
+class NeonInp<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = !strconcat(opc, ".", dt, "\t", asm);
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+ let DecoderNamespace = "NEON";
+
+ let Inst{31-28} = 0b1111;
+}
+
class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
@@ -1817,6 +2022,35 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
let Inst{5} = Vm{4};
}
+// Same as N2V but not predicated.
+class N2Vnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+ dag oops, dag iops, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, list<dag> pattern>
+ : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N2RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vm", "", pattern> {
+ bits<5> Vd;
+ bits<5> Vm;
+
+ // Encode instruction operands
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{3-0} = Vm{3-0};
+
+ // Encode constant bits
+ let Inst{27-23} = 0b00111;
+ let Inst{21-20} = 0b11;
+ let Inst{19-18} = op19_18;
+ let Inst{17-16} = op17_16;
+ let Inst{11} = 0;
+ let Inst{10-8} = op10_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{4} = 0;
+
+ let DecoderNamespace = "NEON";
+}
+
// Same as N2V except it doesn't have a datatype suffix.
class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
bits<5> op11_7, bit op6, bit op4,
@@ -1898,6 +2132,32 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
let Inst{5} = Vm{4};
}
+class N3Vnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, dag oops, dag iops,Format f, InstrItinClass itin,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable, list<dag> pattern>
+ : NeonInp<oops, iops, AddrModeNone, IndexModeNone, f, itin, OpcodeStr,
+ Dt, "$Vd, $Vn, $Vm", "", pattern> {
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ // Encode instruction operands
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{5} = Vm{4};
+ let Inst{3-0} = Vm{3-0};
+
+ // Encode constant bits
+ let Inst{27-23} = op27_23;
+ let Inst{21-20} = op21_20;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
class N3VLane32<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
bit op4, dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 80f0ec74376a..df867b49ab57 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -29,7 +30,7 @@
using namespace llvm;
ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(*this, STI) {
+ : ARMBaseInstrInfo(STI), RI(STI) {
}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
@@ -106,29 +107,42 @@ namespace {
if (TM->getRelocationModel() != Reloc::PIC_)
return false;
- LLVMContext* Context = &MF.getFunction()->getContext();
- GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
- GlobalValue::ExternalLinkage, 0,
- "_GLOBAL_OFFSET_TABLE_");
- unsigned Id = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id);
- unsigned Align = TM->getDataLayout()->getPrefTypeAlignment(GV->getType());
+ LLVMContext *Context = &MF.getFunction()->getContext();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ unsigned PCAdj = TM->getSubtarget<ARMSubtarget>().isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(
+ *Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj);
+
+ unsigned Align = TM->getDataLayout()
+ ->getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
MachineBasicBlock &FirstMBB = MF.front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
- unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ unsigned TempReg =
+ MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ?
ARM::t2LDRpci : ARM::LDRcp;
const TargetInstrInfo &TII = *TM->getInstrInfo();
MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
- TII.get(Opc), GlobalBaseReg)
+ TII.get(Opc), TempReg)
.addConstantPoolIndex(Idx);
if (Opc == ARM::LDRcp)
MIB.addImm(0);
AddDefaultPred(MIB);
+ // Fix the GOT address by adding pc.
+ unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? ARM::tPICADD
+ : ARM::PICADD;
+ MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), GlobalBaseReg)
+ .addReg(TempReg)
+ .addImm(ARMPCLabelIndex);
+ if (Opc == ARM::PICADD)
+ AddDefaultPred(MIB);
+
+
return true;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index 1bd174e34162..2042c0460932 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -71,6 +71,9 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDT_ARMVMAXNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
+def SDT_ARMVMINNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
+
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
@@ -118,7 +121,8 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-
+def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
[SDNPInGlue]>;
@@ -162,8 +166,6 @@ def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp,
[SDNPHasChain, SDNPSideEffect]>;
-def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
- [SDNPHasChain, SDNPSideEffect]>;
def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
[SDNPHasChain, SDNPSideEffect]>;
def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
@@ -174,9 +176,11 @@ def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-
def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+def ARMvmaxnm : SDNode<"ARMISD::VMAXNM", SDT_ARMVMAXNM, []>;
+def ARMvminnm : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
@@ -189,11 +193,18 @@ def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">,
def HasV6 : Predicate<"Subtarget->hasV6Ops()">,
AssemblerPredicate<"HasV6Ops", "armv6">;
def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6M : Predicate<"Subtarget->hasV6MOps()">,
+ AssemblerPredicate<"HasV6MOps",
+ "armv6m or armv6t2">;
def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
AssemblerPredicate<"HasV7Ops", "armv7">;
+def HasV8 : Predicate<"Subtarget->hasV8Ops()">,
+ AssemblerPredicate<"HasV8Ops", "armv8">;
+def PreV8 : Predicate<"!Subtarget->hasV8Ops()">,
+ AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
AssemblerPredicate<"FeatureVFP2", "VFP2">;
@@ -201,14 +212,23 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
AssemblerPredicate<"FeatureVFP3", "VFP3">;
def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
AssemblerPredicate<"FeatureVFP4", "VFP4">;
+def HasDPVFP : Predicate<"!Subtarget->isFPOnlySP()">,
+ AssemblerPredicate<"!FeatureVFPOnlySP",
+ "double precision VFP">;
+def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
+ AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON", "NEON">;
+def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
+ AssemblerPredicate<"FeatureCrypto", "crypto">;
+def HasCRC : Predicate<"Subtarget->hasCRC()">,
+ AssemblerPredicate<"FeatureCRC", "crc">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
- AssemblerPredicate<"FeatureHWDiv", "divide">;
+ AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
- AssemblerPredicate<"FeatureHWDivARM">;
+ AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
AssemblerPredicate<"FeatureT2XtPk",
"pack/extract">;
@@ -233,10 +253,10 @@ def IsThumb2 : Predicate<"Subtarget->isThumb2()">,
AssemblerPredicate<"ModeThumb,FeatureThumb2",
"thumb2">;
def IsMClass : Predicate<"Subtarget->isMClass()">,
- AssemblerPredicate<"FeatureMClass", "armv7m">;
-def IsARClass : Predicate<"!Subtarget->isMClass()">,
+ AssemblerPredicate<"FeatureMClass", "armv*m">;
+def IsNotMClass : Predicate<"!Subtarget->isMClass()">,
AssemblerPredicate<"!FeatureMClass",
- "armv7a/r">;
+ "!armv*m">;
def IsARM : Predicate<"!Subtarget->isThumb()">,
AssemblerPredicate<"!ModeThumb", "arm-mode">;
def IsIOS : Predicate<"Subtarget->isTargetIOS()">;
@@ -258,7 +278,9 @@ def UseMulOps : Predicate<"Subtarget->useMulOps()">;
def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
" FPOpFusion::Fast) && "
"!Subtarget->isTargetDarwin()">;
-def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
+def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion =="
+ " FPOpFusion::Fast &&"
+ " Subtarget->hasVFP4()) || "
"Subtarget->isTargetDarwin()">;
// VGETLNi32 is microcoded on Swift - prefer VMOV.
@@ -275,8 +297,8 @@ def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">;
def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">;
-def IsLE : Predicate<"TLI.isLittleEndian()">;
-def IsBE : Predicate<"TLI.isBigEndian()">;
+def IsLE : Predicate<"getTargetLowering()->isLittleEndian()">;
+def IsBE : Predicate<"getTargetLowering()->isBigEndian()">;
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -456,7 +478,7 @@ def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; }
def adrlabel : Operand<i32> {
let EncoderMethod = "getAdrLabelOpValue";
let ParserMatchClass = AdrLabelAsmOperand;
- let PrintMethod = "printAdrLabelOperand";
+ let PrintMethod = "printAdrLabelOperand<0>";
}
def neon_vcvt_imm32 : Operand<i32> {
@@ -581,17 +603,6 @@ def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
-/// imm0_4 predicate - Immediate in the range [0,4].
-def Imm0_4AsmOperand : ImmAsmOperand
-{
- let Name = "Imm0_4";
- let DiagnosticType = "ImmRange0_4";
-}
-def imm0_4 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 5; }]> {
- let ParserMatchClass = Imm0_4AsmOperand;
- let DecoderMethod = "DecodeImm0_4";
-}
-
/// imm0_7 predicate - Immediate in the range [0,7].
def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
@@ -671,6 +682,15 @@ def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_63AsmOperand;
}
+/// imm0_239 predicate - Immediate in the range [0,239].
+def Imm0_239AsmOperand : ImmAsmOperand {
+ let Name = "Imm0_239";
+ let DiagnosticType = "ImmRange0_239";
+}
+def imm0_239 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 240; }]> {
+ let ParserMatchClass = Imm0_239AsmOperand;
+}
+
/// imm0_255 predicate - Immediate in the range [0,255].
def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
@@ -702,6 +722,11 @@ def imm0_65535_expr : Operand<i32> {
let ParserMatchClass = Imm0_65535ExprAsmOperand;
}
+def Imm256_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm256_65535Expr"; }
+def imm256_65535_expr : Operand<i32> {
+ let ParserMatchClass = Imm256_65535ExprAsmOperand;
+}
+
/// imm24b - True if the 32-bit immediate is encodable in 24 bits.
def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; }
def imm24b : Operand<i32>, ImmLeaf<i32, [{
@@ -1007,11 +1032,6 @@ def p_imm : Operand<i32> {
let DecoderMethod = "DecodeCoprocessor";
}
-def pf_imm : Operand<i32> {
- let PrintMethod = "printPImmediate";
- let ParserMatchClass = CoprocNumAsmOperand;
-}
-
def CoprocRegAsmOperand : AsmOperandClass {
let Name = "CoprocReg";
let ParserMethod = "parseCoprocRegOperand";
@@ -1327,7 +1347,7 @@ class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode>
: AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot),
IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
[(set GPRnopc:$Rd, (opnode (rotr GPRnopc:$Rm, rot_imm:$rot)))]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> {
bits<4> Rd;
bits<4> Rm;
bits<2> rot;
@@ -1340,11 +1360,11 @@ class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode>
class AI_ext_rrot_np<bits<8> opcod, string opc>
: AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot),
IIC_iEXTr, opc, "\t$Rd, $Rm$rot", []>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> {
bits<2> rot;
let Inst{19-16} = 0b1111;
let Inst{11-10} = rot;
-}
+ }
/// AI_exta_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
@@ -1353,7 +1373,7 @@ class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode>
IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot",
[(set GPRnopc:$Rd, (opnode GPR:$Rn,
(rotr GPRnopc:$Rm, rot_imm:$rot)))]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> {
bits<4> Rd;
bits<4> Rm;
bits<4> Rn;
@@ -1368,7 +1388,7 @@ class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode>
class AI_exta_rrot_np<bits<8> opcod, string opc>
: AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot),
IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", []>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> {
bits<4> Rn;
bits<2> rot;
let Inst{19-16} = Rn;
@@ -1664,53 +1684,11 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
[(ARMcallseq_start timm:$amt)]>;
}
-// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops.
-// (These pseudos use a hand-written selection code).
-let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in {
-def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
- GPR:$set1, GPR:$set2),
- NoItinerary, []>;
-def ATOMMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMUMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-}
-
-def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary,
+def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary,
"hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
- bits<3> imm;
- let Inst{27-3} = 0b0011001000001111000000000;
- let Inst{2-0} = imm;
+ bits<8> imm;
+ let Inst{27-8} = 0b00110010000011110000;
+ let Inst{7-0} = imm;
}
def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
@@ -1718,6 +1696,9 @@ def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>;
+
+def : Pat<(int_arm_sevl), (HINT 5)>;
def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
"\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
@@ -1735,12 +1716,23 @@ def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
// The 16-bit operand $val can be used by a debugger to store more information
// about the breakpoint.
-def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
- "bkpt", "\t$val", []>, Requires<[IsARM]> {
+def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
+ "bkpt", "\t$val", []>, Requires<[IsARM]> {
bits<16> val;
let Inst{3-0} = val{3-0};
let Inst{19-8} = val{15-4};
let Inst{27-20} = 0b00010010;
+ let Inst{31-28} = 0xe; // AL
+ let Inst{7-4} = 0b0111;
+}
+
+def HLT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
+ "hlt", "\t$val", []>, Requires<[IsARM, HasV8]> {
+ bits<16> val;
+ let Inst{3-0} = val{3-0};
+ let Inst{19-8} = val{15-4};
+ let Inst{27-20} = 0b00010000;
+ let Inst{31-28} = 0xe; // AL
let Inst{7-4} = 0b0111;
}
@@ -1780,7 +1772,8 @@ multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
!strconcat(opc, "\t$addr"),
- [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]> {
+ [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]>,
+ Sched<[WritePreLd]> {
bits<4> Rt;
bits<17> addr;
let Inst{31-26} = 0b111101;
@@ -1796,7 +1789,8 @@ multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload,
!strconcat(opc, "\t$shift"),
- [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]> {
+ [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]>,
+ Sched<[WritePreLd]> {
bits<17> shift;
let Inst{31-26} = 0b111101;
let Inst{25} = 1; // 1 for register form
@@ -1816,7 +1810,7 @@ defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>;
def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary,
- "setend\t$end", []>, Requires<[IsARM]> {
+ "setend\t$end", []>, Requires<[IsARM]>, Deprecated<HasV8Ops> {
bits<1> end;
let Inst{31-10} = 0b1111000100000001000000;
let Inst{9} = end;
@@ -1863,7 +1857,8 @@ def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
let isNotDuplicable = 1 in {
def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
4, IIC_iALUr,
- [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+ [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>,
+ Sched<[WriteALU, ReadALU]>;
let AddedComplexity = 10 in {
def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
@@ -1923,11 +1918,11 @@ def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
let hasSideEffects = 1 in {
def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
- 4, IIC_iALUi, []>;
+ 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
- 4, IIC_iALUi, []>;
+ 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
}
//===----------------------------------------------------------------------===//
@@ -1938,16 +1933,22 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
// ARMV4T and above
def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
"bx", "\tlr", [(ARMretflag)]>,
- Requires<[IsARM, HasV4T]> {
+ Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> {
let Inst{27-0} = 0b0001001011111111111100011110;
}
// ARMV4 only
def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
"mov", "\tpc, lr", [(ARMretflag)]>,
- Requires<[IsARM, NoV4T]> {
+ Requires<[IsARM, NoV4T]>, Sched<[WriteBr]> {
let Inst{27-0} = 0b0001101000001111000000001110;
}
+
+ // Exception return: N.b. doesn't set CPSR as far as we're concerned (it sets
+ // the user-space one).
+ def SUBS_PC_LR : ARMPseudoInst<(outs), (ins i32imm:$offset, pred:$p),
+ 4, IIC_Br,
+ [(ARMintretflag imm:$offset)]>;
}
// Indirect branches
@@ -1955,7 +1956,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
// ARMV4T and above
def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
[(brind GPR:$dst)]>,
- Requires<[IsARM, HasV4T]> {
+ Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> {
bits<4> dst;
let Inst{31-4} = 0b1110000100101111111111110001;
let Inst{3-0} = dst;
@@ -1963,7 +1964,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br,
"bx", "\t$dst", [/* pattern left blank */]>,
- Requires<[IsARM, HasV4T]> {
+ Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> {
bits<4> dst;
let Inst{27-4} = 0b000100101111111111110001;
let Inst{3-0} = dst;
@@ -1980,7 +1981,7 @@ let isCall = 1,
def BL : ABXI<0b1011, (outs), (ins bl_target:$func),
IIC_Br, "bl\t$func",
[(ARMcall tglobaladdr:$func)]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>, Sched<[WriteBrL]> {
let Inst{31-28} = 0b1110;
bits<24> func;
let Inst{23-0} = func;
@@ -1990,7 +1991,7 @@ let isCall = 1,
def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func),
IIC_Br, "bl", "\t$func",
[(ARMcall_pred tglobaladdr:$func)]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>, Sched<[WriteBrL]> {
bits<24> func;
let Inst{23-0} = func;
let DecoderMethod = "DecodeBranchImmInstruction";
@@ -2000,7 +2001,7 @@ let isCall = 1,
def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm,
IIC_Br, "blx\t$func",
[(ARMcall GPR:$func)]>,
- Requires<[IsARM, HasV5T]> {
+ Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
bits<4> func;
let Inst{31-4} = 0b1110000100101111111111110011;
let Inst{3-0} = func;
@@ -2009,7 +2010,7 @@ let isCall = 1,
def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm,
IIC_Br, "blx", "\t$func",
[(ARMcall_pred GPR:$func)]>,
- Requires<[IsARM, HasV5T]> {
+ Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
bits<4> func;
let Inst{27-4} = 0b000100101111111111110011;
let Inst{3-0} = func;
@@ -2019,18 +2020,18 @@ let isCall = 1,
// Note: Restrict $func to the tGPR regclass to prevent it being in LR.
def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func),
8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, HasV4T]>;
+ Requires<[IsARM, HasV4T]>, Sched<[WriteBr]>;
// ARMv4
def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func),
8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, NoV4T]>;
+ Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>;
// mov lr, pc; b if callee is marked noreturn to avoid confusing the
// return stack predictor.
def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func),
8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
- Requires<[IsARM]>;
+ Requires<[IsARM]>, Sched<[WriteBr]>;
}
let isBranch = 1, isTerminator = 1 in {
@@ -2038,7 +2039,8 @@ let isBranch = 1, isTerminator = 1 in {
// a two-value operand where a dag node expects two operands. :(
def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
IIC_Br, "b", "\t$target",
- [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
+ [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>,
+ Sched<[WriteBr]> {
bits<24> target;
let Inst{23-0} = target;
let DecoderMethod = "DecodeBranchImmInstruction";
@@ -2051,25 +2053,27 @@ let isBranch = 1, isTerminator = 1 in {
// should be sufficient.
// FIXME: Is B really a Barrier? That doesn't seem right.
def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br,
- [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>;
+ [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>,
+ Sched<[WriteBr]>;
let isNotDuplicable = 1, isIndirectBranch = 1 in {
def BR_JTr : ARMPseudoInst<(outs),
(ins GPR:$target, i32imm:$jt, i32imm:$id),
0, IIC_Br,
- [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
+ [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>,
+ Sched<[WriteBr]>;
// FIXME: This shouldn't use the generic "addrmode2," but rather be split
// into i12 and rs suffixed versions.
def BR_JTm : ARMPseudoInst<(outs),
(ins addrmode2:$target, i32imm:$jt, i32imm:$id),
0, IIC_Br,
[(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
- imm:$id)]>;
+ imm:$id)]>, Sched<[WriteBrTbl]>;
def BR_JTadd : ARMPseudoInst<(outs),
(ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id),
0, IIC_Br,
[(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
- imm:$id)]>;
+ imm:$id)]>, Sched<[WriteBrTbl]>;
} // isNotDuplicable = 1, isIndirectBranch = 1
} // isBarrier = 1
@@ -2078,7 +2082,7 @@ let isBranch = 1, isTerminator = 1 in {
// BLX (immediate)
def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary,
"blx\t$target", []>,
- Requires<[IsARM, HasV5T]> {
+ Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
let Inst{31-25} = 0b1111101;
bits<25> target;
let Inst{23-0} = target{24-1};
@@ -2087,7 +2091,7 @@ def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary,
// Branch and Exchange Jazelle
def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
- [/* pattern left blank */]> {
+ [/* pattern left blank */]>, Sched<[WriteBr]> {
bits<4> func;
let Inst{23-20} = 0b0010;
let Inst{19-8} = 0xfff;
@@ -2098,18 +2102,20 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
// Tail calls.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
- def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>;
+ def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>,
+ Sched<[WriteBr]>;
- def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>;
+ def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>,
+ Sched<[WriteBr]>;
def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst),
4, IIC_Br, [],
(Bcc br_target:$dst, (ops 14, zero_reg))>,
- Requires<[IsARM]>;
+ Requires<[IsARM]>, Sched<[WriteBr]>;
def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst),
4, IIC_Br, [],
- (BX GPR:$dst)>,
+ (BX GPR:$dst)>, Sched<[WriteBr]>,
Requires<[IsARM]>;
}
@@ -2123,7 +2129,8 @@ def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
// Supervisor Call (Software Interrupt)
let isCall = 1, Uses = [SP] in {
-def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []> {
+def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []>,
+ Sched<[WriteBr]> {
bits<24> svc;
let Inst{23-0} = svc;
}
@@ -2272,6 +2279,13 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
[]>, Requires<[IsARM, HasV5TE]>;
}
+def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "lda", "\t$Rt, $addr", []>;
+def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldab", "\t$Rt, $addr", []>;
+def LDAH : AIldracq<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldah", "\t$Rt, $addr", []>;
+
// Indexed loads
multiclass AI2_ldridx<bit isByte, string opc,
InstrItinClass iii, InstrItinClass iir> {
@@ -2284,7 +2298,6 @@ multiclass AI2_ldridx<bit isByte, string opc,
let Inst{19-16} = addr{16-13};
let Inst{11-0} = addr{11-0};
let DecoderMethod = "DecodeLDRPreImm";
- let AsmMatchConverter = "cvtLdWriteBackRegAddrModeImm12";
}
def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
@@ -2297,7 +2310,6 @@ multiclass AI2_ldridx<bit isByte, string opc,
let Inst{11-0} = addr{11-0};
let Inst{4} = 0;
let DecoderMethod = "DecodeLDRPreReg";
- let AsmMatchConverter = "cvtLdWriteBackRegAddrMode2";
}
def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
@@ -2355,7 +2367,6 @@ multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
let Inst{19-16} = addr{12-9}; // Rn
let Inst{11-8} = addr{7-4}; // imm7_4/zero
let Inst{3-0} = addr{3-0}; // imm3_0/Rm
- let AsmMatchConverter = "cvtLdWriteBackRegAddrMode3";
let DecoderMethod = "DecodeAddrMode3Instruction";
}
def _POST : AI3ldstidx<op, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
@@ -2391,7 +2402,6 @@ def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
let Inst{11-8} = addr{7-4}; // imm7_4/zero
let Inst{3-0} = addr{3-0}; // imm3_0/Rm
let DecoderMethod = "DecodeAddrMode3Instruction";
- let AsmMatchConverter = "cvtLdrdPre";
}
def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
(ins addr_offset_none:$addr, am3offset:$offset),
@@ -2494,7 +2504,6 @@ multiclass AI3ldrT<bits<4> op, string opc> {
let Inst{22} = 1;
let Inst{11-8} = offset{7-4};
let Inst{3-0} = offset{3-0};
- let AsmMatchConverter = "cvtLdExtTWriteBackImm";
}
def r : AI3ldstidxT<op, 1, (outs GPRnopc:$Rt, GPRnopc:$base_wb),
(ins addr_offset_none:$addr, postidx_reg:$Rm),
@@ -2506,7 +2515,6 @@ multiclass AI3ldrT<bits<4> op, string opc> {
let Inst{11-8} = 0;
let Unpredictable{11-8} = 0b1111;
let Inst{3-0} = Rm{3-0};
- let AsmMatchConverter = "cvtLdExtTWriteBackReg";
let DecoderMethod = "DecodeLDR";
}
}
@@ -2544,7 +2552,6 @@ multiclass AI2_stridx<bit isByte, string opc,
let Inst{23} = addr{12}; // U (add = ('U' == 1))
let Inst{19-16} = addr{16-13}; // Rn
let Inst{11-0} = addr{11-0}; // imm12
- let AsmMatchConverter = "cvtStWriteBackRegAddrModeImm12";
let DecoderMethod = "DecodeSTRPreImm";
}
@@ -2558,7 +2565,6 @@ multiclass AI2_stridx<bit isByte, string opc,
let Inst{19-16} = addr{16-13}; // Rn
let Inst{11-0} = addr{11-0};
let Inst{4} = 0; // Inst{4} = 0
- let AsmMatchConverter = "cvtStWriteBackRegAddrMode2";
let DecoderMethod = "DecodeSTRPreReg";
}
def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
@@ -2667,7 +2673,6 @@ def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
let Inst{19-16} = addr{12-9}; // Rn
let Inst{11-8} = addr{7-4}; // imm7_4/zero
let Inst{3-0} = addr{3-0}; // imm3_0/Rm
- let AsmMatchConverter = "cvtStWriteBackRegAddrMode3";
let DecoderMethod = "DecodeAddrMode3Instruction";
}
@@ -2701,7 +2706,6 @@ def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
let Inst{11-8} = addr{7-4}; // imm7_4/zero
let Inst{3-0} = addr{3-0}; // imm3_0/Rm
let DecoderMethod = "DecodeAddrMode3Instruction";
- let AsmMatchConverter = "cvtStrdPre";
}
def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb),
@@ -2808,7 +2812,6 @@ multiclass AI3strT<bits<4> op, string opc> {
let Inst{22} = 1;
let Inst{11-8} = offset{7-4};
let Inst{3-0} = offset{3-0};
- let AsmMatchConverter = "cvtStExtTWriteBackImm";
}
def r : AI3ldstidxT<op, 0, (outs GPR:$base_wb),
(ins GPR:$Rt, addr_offset_none:$addr, postidx_reg:$Rm),
@@ -2819,13 +2822,18 @@ multiclass AI3strT<bits<4> op, string opc> {
let Inst{22} = 0;
let Inst{11-8} = 0;
let Inst{3-0} = Rm{3-0};
- let AsmMatchConverter = "cvtStExtTWriteBackReg";
}
}
defm STRHT : AI3strT<0b1011, "strht">;
+def STL : AIstrrel<0b00, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stl", "\t$Rt, $addr", []>;
+def STLB : AIstrrel<0b10, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlb", "\t$Rt, $addr", []>;
+def STLH : AIstrrel<0b11, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlh", "\t$Rt, $addr", []>;
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
@@ -2955,7 +2963,7 @@ defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m,
let neverHasSideEffects = 1 in
def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
- "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
bits<4> Rm;
@@ -2969,7 +2977,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
// A version for the smaller set of tail call registers.
let neverHasSideEffects = 1 in
def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
- IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
bits<4> Rm;
@@ -2982,7 +2990,8 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src),
DPSoRegRegFrm, IIC_iMOVsr,
"mov", "\t$Rd, $src",
- [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP {
+ [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP,
+ Sched<[WriteALU]> {
bits<4> Rd;
bits<12> src;
let Inst{15-12} = Rd;
@@ -2998,7 +3007,7 @@ def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src),
def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src),
DPSoRegImmFrm, IIC_iMOVsr,
"mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg_imm:$src)]>,
- UnaryDP {
+ UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
bits<12> src;
let Inst{15-12} = Rd;
@@ -3011,7 +3020,8 @@ def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src),
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
- "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP {
+ "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP,
+ Sched<[WriteALU]> {
bits<4> Rd;
bits<12> imm;
let Inst{25} = 1;
@@ -3025,7 +3035,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm),
DPFrm, IIC_iMOVi,
"movw", "\t$Rd, $imm",
[(set GPR:$Rd, imm0_65535:$imm)]>,
- Requires<[IsARM, HasV6T2]>, UnaryDP {
+ Requires<[IsARM, HasV6T2]>, UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
bits<16> imm;
let Inst{15-12} = Rd;
@@ -3041,7 +3051,8 @@ def : InstAlias<"mov${p} $Rd, $imm",
Requires<[IsARM]>;
def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
- (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
+ Sched<[WriteALU]>;
let Constraints = "$src = $Rd" in {
def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd),
@@ -3051,7 +3062,7 @@ def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd),
[(set GPRnopc:$Rd,
(or (and GPR:$src, 0xffff),
lo16AllZero:$imm))]>, UnaryDP,
- Requires<[IsARM, HasV6T2]> {
+ Requires<[IsARM, HasV6T2]>, Sched<[WriteALU]> {
bits<4> Rd;
bits<16> imm;
let Inst{15-12} = Rd;
@@ -3063,7 +3074,8 @@ def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd),
}
def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
- (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+ (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
+ Sched<[WriteALU]>;
} // Constraints
@@ -3073,7 +3085,7 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
let Uses = [CPSR] in
def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
[(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
- Requires<[IsARM]>;
+ Requires<[IsARM]>, Sched<[WriteALU]>;
// These aren't really mov instructions, but we have to define them this way
// due to flag operands.
@@ -3081,10 +3093,10 @@ def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
let Defs = [CPSR] in {
def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
[(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP,
- Requires<[IsARM]>;
+ Sched<[WriteALU]>, Requires<[IsARM]>;
def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
[(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP,
- Requires<[IsARM]>;
+ Sched<[WriteALU]>, Requires<[IsARM]>;
}
//===----------------------------------------------------------------------===//
@@ -3250,7 +3262,8 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
list<dag> pattern = [],
dag iops = (ins GPRnopc:$Rn, GPRnopc:$Rm),
string asm = "\t$Rd, $Rn, $Rm">
- : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> {
+ : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rn;
bits<4> Rd;
bits<4> Rm;
@@ -3265,9 +3278,11 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
// Saturating add/subtract
+let DecoderMethod = "DecodeQADDInstruction" in
def QADD : AAI<0b00010000, 0b00000101, "qadd",
[(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))],
(ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
+
def QSUB : AAI<0b00010010, 0b00000101, "qsub",
[(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))],
(ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
@@ -3326,7 +3341,7 @@ def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">;
def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
MulFrm /* for convenience */, NoItinerary, "usad8",
"\t$Rd, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -3340,7 +3355,7 @@ def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
MulFrm /* for convenience */, NoItinerary, "usada8",
"\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]>{
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -3473,7 +3488,7 @@ def BFI:I<(outs GPRnopc:$Rd), (ins GPRnopc:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
"mvn", "\t$Rd, $Rm",
- [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP {
+ [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
bits<4> Rm;
let Inst{25} = 0;
@@ -3484,7 +3499,8 @@ def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
}
def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift),
DPSoRegImmFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift",
- [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP {
+ [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP,
+ Sched<[WriteALU]> {
bits<4> Rd;
bits<12> shift;
let Inst{25} = 0;
@@ -3496,7 +3512,8 @@ def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift),
}
def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift),
DPSoRegRegFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift",
- [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP {
+ [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP,
+ Sched<[WriteALU]> {
bits<4> Rd;
bits<12> shift;
let Inst{25} = 0;
@@ -3511,7 +3528,7 @@ def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift),
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
IIC_iMVNi, "mvn", "\t$Rd, $imm",
- [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP {
+ [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
bits<12> imm;
let Inst{25} = 1;
@@ -3993,14 +4010,58 @@ def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd),
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+// We also can not replace a srl (17..31) by an arithmetic shift we would use in
+// pkhtb src1, src2, asr (17..31).
def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
- (srl GPRnopc:$src2, imm16_31:$sh)),
+ (srl GPRnopc:$src2, imm16:$sh)),
+ (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16:$sh)>;
+def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
+ (sra GPRnopc:$src2, imm16_31:$sh)),
(PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>;
def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
(and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)),
(PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>;
//===----------------------------------------------------------------------===//
+// CRC Instructions
+//
+// Polynomials:
+// + CRC32{B,H,W} 0x04C11DB7
+// + CRC32C{B,H,W} 0x1EDC6F41
+//
+
+class AI_crc32<bit C, bits<2> sz, string suffix, SDPatternOperator builtin>
+ : AInoP<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), MiscFrm, NoItinerary,
+ !strconcat("crc32", suffix), "\t$Rd, $Rn, $Rm",
+ [(set GPRnopc:$Rd, (builtin GPRnopc:$Rn, GPRnopc:$Rm))]>,
+ Requires<[IsARM, HasV8, HasCRC]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{31-28} = 0b1110;
+ let Inst{27-23} = 0b00010;
+ let Inst{22-21} = sz;
+ let Inst{20} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = 0b00;
+ let Inst{9} = C;
+ let Inst{8} = 0;
+ let Inst{7-4} = 0b0100;
+ let Inst{3-0} = Rm;
+
+ let Unpredictable{11-8} = 0b1101;
+}
+
+def CRC32B : AI_crc32<0, 0b00, "b", int_arm_crc32b>;
+def CRC32CB : AI_crc32<1, 0b00, "cb", int_arm_crc32cb>;
+def CRC32H : AI_crc32<0, 0b01, "h", int_arm_crc32h>;
+def CRC32CH : AI_crc32<1, 0b01, "ch", int_arm_crc32ch>;
+def CRC32W : AI_crc32<0, 0b10, "w", int_arm_crc32w>;
+def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>;
+
+//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
@@ -4022,7 +4083,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs),
let isCompare = 1, Defs = [CPSR] in {
def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi,
"cmn", "\t$Rn, $imm",
- [(ARMcmn GPR:$Rn, so_imm:$imm)]> {
+ [(ARMcmn GPR:$Rn, so_imm:$imm)]>,
+ Sched<[WriteCMP, ReadALU]> {
bits<4> Rn;
bits<12> imm;
let Inst{25} = 1;
@@ -4038,7 +4100,7 @@ def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi,
def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr,
"cmn", "\t$Rn, $Rm",
[(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPR:$Rn, GPR:$Rm)]> {
+ GPR:$Rn, GPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> {
bits<4> Rn;
bits<4> Rm;
let isCommutable = 1;
@@ -4056,7 +4118,8 @@ def CMNzrsi : AI1<0b1011, (outs),
(ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr,
"cmn", "\t$Rn, $shift",
[(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPR:$Rn, so_reg_imm:$shift)]> {
+ GPR:$Rn, so_reg_imm:$shift)]>,
+ Sched<[WriteCMPsi, ReadALU]> {
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
@@ -4074,7 +4137,8 @@ def CMNzrsr : AI1<0b1011, (outs),
(ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr,
"cmn", "\t$Rn, $shift",
[(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPRnopc:$Rn, so_reg_reg:$shift)]> {
+ GPRnopc:$Rn, so_reg_reg:$shift)]>,
+ Sched<[WriteCMPsr, ReadALU]> {
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
@@ -4112,65 +4176,77 @@ let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
def BCCi64 : PseudoInst<(outs),
(ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
IIC_Br,
- [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>,
+ Sched<[WriteBr]>;
def BCCZi64 : PseudoInst<(outs),
(ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br,
- [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>,
+ Sched<[WriteBr]>;
} // usesCustomInserter
// Conditional moves
-// FIXME: should be able to write a pattern for ARMcmov, but can't use
-// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
let isCommutable = 1, isSelect = 1 in
-def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
+def MOVCCr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, GPR:$Rm, cmovpred:$p),
4, IIC_iCMOVr,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">;
+ [(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_reg_imm:$shift, pred:$p),
- 4, IIC_iCMOVsr,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_imm:$shift,
- imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">;
+ (ins GPR:$false, so_reg_imm:$shift, cmovpred:$p),
+ 4, IIC_iCMOVsr,
+ [(set GPR:$Rd,
+ (ARMcmov GPR:$false, so_reg_imm:$shift,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_reg_reg:$shift, pred:$p),
+ (ins GPR:$false, so_reg_reg:$shift, cmovpred:$p),
4, IIC_iCMOVsr,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift,
- imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">;
+ [(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
let isMoveImm = 1 in
-def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, imm0_65535_expr:$imm, pred:$p),
- 4, IIC_iMOVi,
- []>,
- RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
+def MOVCCi16
+ : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, imm0_65535_expr:$imm, cmovpred:$p),
+ 4, IIC_iMOVi,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, imm0_65535:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>,
+ Sched<[WriteALU]>;
let isMoveImm = 1 in
def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_imm:$imm, pred:$p),
+ (ins GPR:$false, so_imm:$imm, cmovpred:$p),
4, IIC_iCMOVi,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">;
+ [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
// Two instruction predicate mov immediate.
let isMoveImm = 1 in
-def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, i32imm:$src, pred:$p),
- 8, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+def MOVCCi32imm
+ : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, i32imm:$src, cmovpred:$p),
+ 8, IIC_iCMOVix2,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, imm:$src,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
let isMoveImm = 1 in
def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_imm:$imm, pred:$p),
+ (ins GPR:$false, so_imm:$imm, cmovpred:$p),
4, IIC_iCMOVi,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">;
+ [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
} // neverHasSideEffects
@@ -4189,10 +4265,20 @@ def memb_opt : Operand<i32> {
let DecoderMethod = "DecodeMemBarrierOption";
}
+def InstSyncBarrierOptOperand : AsmOperandClass {
+ let Name = "InstSyncBarrierOpt";
+ let ParserMethod = "parseInstSyncBarrierOptOperand";
+}
+def instsyncb_opt : Operand<i32> {
+ let PrintMethod = "printInstSyncBOption";
+ let ParserMatchClass = InstSyncBarrierOptOperand;
+ let DecoderMethod = "DecodeInstSyncBarrierOption";
+}
+
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
- "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>,
Requires<[IsARM, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf57ff05;
@@ -4201,7 +4287,7 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
}
def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
- "dsb", "\t$opt", []>,
+ "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>,
Requires<[IsARM, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf57ff04;
@@ -4209,7 +4295,7 @@ def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
}
// ISB has only full system option
-def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary,
"isb", "\t$opt", []>,
Requires<[IsARM, HasDB]> {
bits<4> opt;
@@ -4217,124 +4303,219 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
let Inst{3-0} = opt;
}
+let usesCustomInserter = 1, Defs = [CPSR] in {
+
// Pseudo instruction that combines movs + predicated rsbmi
// to implement integer ABS
-let usesCustomInserter = 1, Defs = [CPSR] in
-def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
+ def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
-let usesCustomInserter = 1 in {
- let Defs = [CPSR] in {
+// Atomic pseudo-insts which will be lowered to ldrex/strex loops.
+// (64-bit pseudos use a hand-written selection code).
+ let mayLoad = 1, mayStore = 1 in {
def ATOMIC_LOAD_ADD_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_SUB_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_AND_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_OR_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_XOR_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_NAND_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MIN_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MAX_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_CMP_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_ADD_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_SUB_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_AND_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_OR_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_XOR_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_NAND_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MIN_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MAX_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_CMP_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_ADD_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_SUB_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_AND_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_OR_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_XOR_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_NAND_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MIN_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MAX_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>;
-
- def ATOMIC_SWAP_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
- def ATOMIC_SWAP_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_SWAP_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
-
- def ATOMIC_CMP_SWAP_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
- def ATOMIC_CMP_SWAP_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_CMP_SWAP_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
-}
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_ADD_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_SUB_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_AND_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_OR_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_XOR_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_NAND_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_MIN_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_MAX_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_UMIN_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_UMAX_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_SWAP_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_CMP_SWAP_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
+ GPR:$set1, GPR:$set2, i32imm:$ordering),
+ NoItinerary, []>;
+ }
+ let mayLoad = 1 in
+ def ATOMIC_LOAD_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, i32imm:$ordering),
+ NoItinerary, []>;
+ let mayStore = 1 in
+ def ATOMIC_STORE_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
}
let usesCustomInserter = 1 in {
@@ -4344,48 +4525,147 @@ let usesCustomInserter = 1 in {
[(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
}
+def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def ldrex_2 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def ldrex_4 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def strex_1 : PatFrag<(ops node:$val, node:$ptr),
+ (int_arm_strex node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def strex_2 : PatFrag<(ops node:$val, node:$ptr),
+ (int_arm_strex node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def strex_4 : PatFrag<(ops node:$val, node:$ptr),
+ (int_arm_strex node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
let mayLoad = 1 in {
def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
- NoItinerary,
- "ldrexb", "\t$Rt, $addr", []>;
+ NoItinerary, "ldrexb", "\t$Rt, $addr",
+ [(set GPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>;
def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
- NoItinerary, "ldrexh", "\t$Rt, $addr", []>;
+ NoItinerary, "ldrexh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>;
def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
- NoItinerary, "ldrex", "\t$Rt, $addr", []>;
+ NoItinerary, "ldrex", "\t$Rt, $addr",
+ [(set GPR:$Rt, (ldrex_4 addr_offset_none:$addr))]>;
let hasExtraDefRegAllocReq = 1 in
-def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
+def LDREXD : AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
NoItinerary, "ldrexd", "\t$Rt, $addr", []> {
let DecoderMethod = "DecodeDoubleRegLoad";
}
+
+def LDAEXB : AIldaex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldaexb", "\t$Rt, $addr", []>;
+def LDAEXH : AIldaex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldaexh", "\t$Rt, $addr", []>;
+def LDAEX : AIldaex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldaex", "\t$Rt, $addr", []>;
+let hasExtraDefRegAllocReq = 1 in
+def LDAEXD : AIldaex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
+ NoItinerary, "ldaexd", "\t$Rt, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegLoad";
+}
}
let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
- NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>;
+ NoItinerary, "strexb", "\t$Rd, $Rt, $addr",
+ [(set GPR:$Rd, (strex_1 GPR:$Rt, addr_offset_none:$addr))]>;
def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
- NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>;
+ NoItinerary, "strexh", "\t$Rd, $Rt, $addr",
+ [(set GPR:$Rd, (strex_2 GPR:$Rt, addr_offset_none:$addr))]>;
def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
- NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
+ NoItinerary, "strex", "\t$Rd, $Rt, $addr",
+ [(set GPR:$Rd, (strex_4 GPR:$Rt, addr_offset_none:$addr))]>;
let hasExtraSrcRegAllocReq = 1 in
def STREXD : AIstrex<0b01, (outs GPR:$Rd),
(ins GPRPairOp:$Rt, addr_offset_none:$addr),
NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> {
let DecoderMethod = "DecodeDoubleRegStore";
}
+def STLEXB: AIstlex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlexb", "\t$Rd, $Rt, $addr",
+ []>;
+def STLEXH: AIstlex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlexh", "\t$Rd, $Rt, $addr",
+ []>;
+def STLEX : AIstlex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlex", "\t$Rd, $Rt, $addr",
+ []>;
+let hasExtraSrcRegAllocReq = 1 in
+def STLEXD : AIstlex<0b01, (outs GPR:$Rd),
+ (ins GPRPairOp:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlexd", "\t$Rd, $Rt, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegStore";
+}
}
-
-def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>,
+def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
+ [(int_arm_clrex)]>,
Requires<[IsARM, HasV7]> {
let Inst{31-0} = 0b11110101011111111111000000011111;
}
+def : ARMPat<(and (ldrex_1 addr_offset_none:$addr), 0xff),
+ (LDREXB addr_offset_none:$addr)>;
+def : ARMPat<(and (ldrex_2 addr_offset_none:$addr), 0xffff),
+ (LDREXH addr_offset_none:$addr)>;
+def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr),
+ (STREXB GPR:$Rt, addr_offset_none:$addr)>;
+def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
+ (STREXH GPR:$Rt, addr_offset_none:$addr)>;
+
+class acquiring_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ return Ordering == Acquire || Ordering == SequentiallyConsistent;
+}]>;
+
+def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
+def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
+def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
+
+class releasing_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ return Ordering == Release || Ordering == SequentiallyConsistent;
+}]>;
+
+def atomic_store_release_8 : releasing_store<atomic_store_8>;
+def atomic_store_release_16 : releasing_store<atomic_store_16>;
+def atomic_store_release_32 : releasing_store<atomic_store_32>;
+
+let AddedComplexity = 8 in {
+ def : ARMPat<(atomic_load_acquire_8 addr_offset_none:$addr), (LDAB addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_load_acquire_16 addr_offset_none:$addr), (LDAH addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_load_acquire_32 addr_offset_none:$addr), (LDA addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (STLB GPR:$val, addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (STLH GPR:$val, addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>;
+}
+
// SWP/SWPB are deprecated in V6/V7.
let mayLoad = 1, mayStore = 1 in {
def SWP : AIswp<0, (outs GPRnopc:$Rt),
- (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>;
+ (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>,
+ Requires<[PreV8]>;
def SWPB: AIswp<1, (outs GPRnopc:$Rt),
- (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>;
+ (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>,
+ Requires<[PreV8]>;
}
//===----------------------------------------------------------------------===//
@@ -4396,7 +4676,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
- imm:$CRm, imm:$opc2)]> {
+ imm:$CRm, imm:$opc2)]>,
+ Requires<[PreV8]> {
bits<4> opc1;
bits<4> CRn;
bits<4> CRd;
@@ -4413,11 +4694,12 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
let Inst{23-20} = opc1;
}
-def CDP2 : ABXI<0b1110, (outs), (ins pf_imm:$cop, imm0_15:$opc1,
+def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
- imm:$CRm, imm:$opc2)]> {
+ imm:$CRm, imm:$opc2)]>,
+ Requires<[PreV8]> {
let Inst{31-28} = 0b1111;
bits<4> opc1;
bits<4> CRn;
@@ -4595,10 +4877,10 @@ defm LDC : LdStCop <1, 0, "ldc">;
defm LDCL : LdStCop <1, 1, "ldcl">;
defm STC : LdStCop <0, 0, "stc">;
defm STCL : LdStCop <0, 1, "stcl">;
-defm LDC2 : LdSt2Cop<1, 0, "ldc2">;
-defm LDC2L : LdSt2Cop<1, 1, "ldc2l">;
-defm STC2 : LdSt2Cop<0, 0, "stc2">;
-defm STC2L : LdSt2Cop<0, 1, "stc2l">;
+defm LDC2 : LdSt2Cop<1, 0, "ldc2">, Requires<[PreV8]>;
+defm LDC2L : LdSt2Cop<1, 1, "ldc2l">, Requires<[PreV8]>;
+defm STC2 : LdSt2Cop<0, 0, "stc2">, Requires<[PreV8]>;
+defm STC2L : LdSt2Cop<0, 1, "stc2l">, Requires<[PreV8]>;
//===----------------------------------------------------------------------===//
// Move between coprocessor and ARM core register.
@@ -4631,16 +4913,17 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>;
+ imm:$CRm, imm:$opc2)]>,
+ ComplexDeprecationPredicate<"MCR">;
def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
(MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
- (outs GPR:$Rt),
+ (outs GPRwithAPSR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>;
def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
- (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
@@ -4650,7 +4933,7 @@ class MovRCopro2<string opc, bit direction, dag oops, dag iops,
list<dag> pattern>
: ABXI<0b1110, oops, iops, NoItinerary,
!strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> {
- let Inst{31-28} = 0b1111;
+ let Inst{31-24} = 0b11111110;
let Inst{20} = direction;
let Inst{4} = 1;
@@ -4674,16 +4957,18 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>;
+ imm:$CRm, imm:$opc2)]>,
+ Requires<[PreV8]>;
def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm",
(MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0)>;
def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
- (outs GPR:$Rt),
+ (outs GPRwithAPSR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
- imm0_7:$opc2), []>;
+ imm0_7:$opc2), []>,
+ Requires<[PreV8]>;
def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm",
- (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0)>;
def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
@@ -4718,7 +5003,8 @@ def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
: ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary,
- !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>,
+ Requires<[PreV8]> {
let Inst{31-28} = 0b1111;
let Inst{23-21} = 0b010;
let Inst{20} = direction;
@@ -4820,7 +5106,7 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
let isCall = 1,
Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
- [(set R0, ARMthread_pointer)]>;
+ [(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>;
}
//===----------------------------------------------------------------------===//
@@ -4884,7 +5170,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst),
4, IIC_Br, [(brind GPR:$dst)],
(MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>,
- Requires<[IsARM, NoV4T]>;
+ Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>;
// Large immediate handling.
@@ -5153,10 +5439,10 @@ def : MnemonicAlias<"rfeed", "rfeib">;
def : MnemonicAlias<"rfe", "rfeia">;
// SRS aliases
-def : MnemonicAlias<"srsfa", "srsda">;
-def : MnemonicAlias<"srsea", "srsdb">;
-def : MnemonicAlias<"srsfd", "srsia">;
-def : MnemonicAlias<"srsed", "srsib">;
+def : MnemonicAlias<"srsfa", "srsib">;
+def : MnemonicAlias<"srsea", "srsia">;
+def : MnemonicAlias<"srsfd", "srsdb">;
+def : MnemonicAlias<"srsed", "srsda">;
def : MnemonicAlias<"srs", "srsia">;
// QSAX == QSUBADDX
@@ -5233,7 +5519,7 @@ def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm",
cc_out:$s)>;
}
def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm",
- (ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+ (ins GPR:$Rd, GPR:$Rm, pred:$p, cc_out:$s)>;
let TwoOperandAliasConstraint = "$Rn = $Rd" in {
def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm",
(ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
@@ -5269,4 +5555,5 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
// 'it' blocks in ARM mode just validate the predicates. The IT itself
// is discarded.
-def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>;
+def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>,
+ ComplexDeprecationPredicate<"IT">;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 896fd0f7850c..43bd4c21dc39 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -626,7 +626,7 @@ class VLD1D<bits<4> op7_4, string Dt>
"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
class VLD1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
@@ -634,7 +634,7 @@ class VLD1Q<bits<4> op7_4, string Dt>
"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
@@ -655,16 +655,14 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
multiclass VLD1QWB<bits<4> op7_4, string Dt> {
@@ -674,16 +672,14 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
@@ -703,7 +699,7 @@ class VLD1D3<bits<4> op7_4, string Dt>
"$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
@@ -712,16 +708,14 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
@@ -744,7 +738,7 @@ class VLD1D4<bits<4> op7_4, string Dt>
"$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
@@ -753,16 +747,14 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
@@ -786,7 +778,7 @@ class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
"vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>;
@@ -810,16 +802,14 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), itin,
"vld2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
}
@@ -853,7 +843,7 @@ class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST3Instruction";
}
def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
@@ -872,7 +862,7 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST3Instruction";
}
def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
@@ -912,7 +902,7 @@ class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST4Instruction";
}
def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
@@ -931,7 +921,7 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+ let DecoderMethod = "DecodeVLDST4Instruction";
}
def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
@@ -1348,7 +1338,6 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListOneDAllLanes:$Vd, GPR:$wb),
@@ -1357,7 +1346,6 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
}
}
multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
@@ -1369,7 +1357,6 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListDPairAllLanes:$Vd, GPR:$wb),
@@ -1378,7 +1365,6 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
}
}
@@ -1419,7 +1405,6 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
- let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<1, 0b10, 0b1101, op7_4,
(outs VdTy:$Vd, GPR:$wb),
@@ -1428,7 +1413,6 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
- let AsmMatchConverter = "cvtVLDwbRegister";
}
}
@@ -1580,14 +1564,14 @@ class VST1D<bits<4> op7_4, string Dt>
IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
class VST1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd),
IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def VST1d8 : VST1D<{0,0,0,?}, "8">;
@@ -1608,8 +1592,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd),
@@ -1617,8 +1600,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> {
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
multiclass VST1QWB<bits<4> op7_4, string Dt> {
@@ -1628,8 +1610,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd),
@@ -1637,8 +1618,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> {
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
@@ -1659,7 +1639,7 @@ class VST1D3<bits<4> op7_4, string Dt>
IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VST1D3WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
@@ -1668,8 +1648,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
@@ -1677,8 +1656,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> {
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
@@ -1704,7 +1682,7 @@ class VST1D4<bits<4> op7_4, string Dt>
[]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
multiclass VST1D4WB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
@@ -1713,8 +1691,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbFixed";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
@@ -1722,8 +1699,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> {
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbRegister";
+ let DecoderMethod = "DecodeVLDST1Instruction";
}
}
@@ -1748,7 +1724,7 @@ class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
itin, "vst2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>;
@@ -1772,16 +1748,14 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbFixed";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbRegister";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
}
multiclass VST2QWB<bits<4> op7_4, string Dt> {
@@ -1791,8 +1765,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> {
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbFixed";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
@@ -1800,8 +1773,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> {
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
- let AsmMatchConverter = "cvtVSTwbRegister";
+ let DecoderMethod = "DecodeVLDST2Instruction";
}
}
@@ -1835,7 +1807,7 @@ class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST3Instruction";
}
def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
@@ -1854,7 +1826,7 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST3Instruction";
}
def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
@@ -1894,7 +1866,7 @@ class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
"", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST4Instruction";
}
def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
@@ -1913,7 +1885,7 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
"vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+ let DecoderMethod = "DecodeVLDST4Instruction";
}
def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
@@ -2379,6 +2351,40 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
(ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+// Same as above, but not predicated.
+class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+ itin, OpcodeStr, Dt, ResTy, OpTy,
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+
+class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
+ itin, OpcodeStr, Dt, ResTy, OpTy,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
+class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
+ bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
+ itin, OpcodeStr, Dt, ResTy, OpTy,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Same as N2VQIntXnp but with Vd as a src register.
+class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
+ bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
+ (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
+ itin, OpcodeStr, Dt, ResTy, OpTy,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
+ let Constraints = "$src = $Vd";
+}
+
// Narrow 2-register operations.
class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -2541,6 +2547,16 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
+
+class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, Format f, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
+ ResTy, OpTy, IntOp, Commutable,
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+
class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
: N3VLane32<0, 1, op21_20, op11_8, 1, 0,
@@ -2552,6 +2568,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
imm:$lane)))))]> {
let isCommutable = 0;
}
+
class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
@@ -2584,6 +2601,29 @@ class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
+
+class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, Format f, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
+ ResTy, OpTy, IntOp, Commutable,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
+
+// Same as N3VQIntnp but with Vd as a src register.
+class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, Format f, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr,
+ Dt, ResTy, OpTy, IntOp, Commutable,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
+ (OpTy QPR:$Vm))))]> {
+ let Constraints = "$src = $Vd";
+}
+
class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
@@ -2834,6 +2874,7 @@ class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
[(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
let isCommutable = Commutable;
}
+
class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
@@ -2889,6 +2930,17 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
[(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
let isCommutable = Commutable;
}
+
+// Same as above, but not predicated.
+class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
+ ResTy, OpTy, IntOp, Commutable,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+
class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
@@ -3965,12 +4017,18 @@ defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vqadd", "u", int_arm_neon_vqaddu, 1>;
// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
-defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
- int_arm_neon_vaddhn, 1>;
+defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
int_arm_neon_vraddhn, 1>;
+def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+ (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+ (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+ (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
+
// Vector Multiply Operations.
// VMUL : Vector Multiply (integer, polynomial and floating-point)
@@ -4008,6 +4066,17 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
+ (VMULslfd DPR:$Rn,
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
+ (i32 0))>;
+def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
+ (VMULslfq QPR:$Rn,
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
+ (i32 0))>;
+
+
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
@@ -4053,12 +4122,18 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "s", NEONvmulls, 1>;
-defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "u", NEONvmullu, 1>;
-def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
- v8i16, v8i8, int_arm_neon_vmullp, 1>;
+let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
+ DecoderNamespace = "NEONData" in {
+ defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "s", NEONvmulls, 1>;
+ defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "u", NEONvmullu, 1>;
+ def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
+ v8i16, v8i8, int_arm_neon_vmullp, 1>;
+ def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
+ "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
+ Requires<[HasV8, HasCrypto]>;
+}
defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
@@ -4125,8 +4200,27 @@ defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
- "vqdmlal", "s", int_arm_neon_vqdmlal>;
-defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
+ "vqdmlal", "s", null_frag>;
+defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
+
+def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
+def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
// VMLS : Vector Multiply Subtract (integer and floating-point)
defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -4182,25 +4276,44 @@ defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
- "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
-defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+ "vqdmlsl", "s", null_frag>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>;
+
+def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
+def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
v2f32, fmul_su, fadd_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
v4f32, fmul_su, fadd_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
// Fused Vector Multiply Subtract (floating-point)
def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
v2f32, fmul_su, fsub_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
v4f32, fmul_su, fsub_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
@@ -4248,12 +4361,18 @@ defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vqsub", "u", int_arm_neon_vqsubu, 0>;
// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
-defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
- int_arm_neon_vsubhn, 0>;
+defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
int_arm_neon_vrsubhn, 0>;
+def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+ (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+ (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+ (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
+
// Vector Comparisons.
// VCEQ : Vector Compare Equal
@@ -4659,6 +4778,18 @@ def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmax", "f32",
v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+// VMAXNM
+let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
+ def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f32",
+ v2f32, v2f32, int_arm_neon_vmaxnm, 1>,
+ Requires<[HasV8, HasNEON]>;
+ def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f32",
+ v4f32, v4f32, int_arm_neon_vmaxnm, 1>,
+ Requires<[HasV8, HasNEON]>;
+}
+
// VMIN : Vector Minimum
defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
@@ -4673,6 +4804,18 @@ def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmin", "f32",
v4f32, v4f32, int_arm_neon_vmins, 1>;
+// VMINNM
+let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
+ def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f32",
+ v2f32, v2f32, int_arm_neon_vminnm, 1>,
+ Requires<[HasV8, HasNEON]>;
+ def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f32",
+ v4f32, v4f32, int_arm_neon_vminnm, 1>,
+ Requires<[HasV8, HasNEON]>;
+}
+
// Vector Pairwise Operations.
// VPADD : Vector Pairwise Add
@@ -5015,10 +5158,10 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
// Vector Move Operations.
// VMOV : Vector Move (Register)
-def : InstAlias<"vmov${p} $Vd, $Vm",
- (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
-def : InstAlias<"vmov${p} $Vd, $Vm",
- (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p} $Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p} $Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
// VMOV : Vector Move (Immediate)
@@ -5386,6 +5529,26 @@ def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
v4f32, v4i32, uint_to_fp>;
+// VCVT{A, N, P, M}
+multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
+ SDPatternOperator IntU> {
+ let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
+ def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
+ def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
+ def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
+ def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
+ }
+}
+
+defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
+defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
+defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
+defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
+
// VCVT : Vector Convert Between Floating-Point and Fixed-Point.
let DecoderMethod = "DecodeVCVTD" in {
def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
@@ -5409,6 +5572,25 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
}
+def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
+ (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
+ (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
+ (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
+ (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
+ (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
+ (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
+ (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
+ (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
+
+
// VCVT : Vector Convert Between Half-Precision and Single-Precision.
def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
IIC_VUNAQ, "vcvt", "f16.f32",
@@ -5509,8 +5691,9 @@ class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
(Ty DPR:$Vm), imm:$index)))]> {
- bits<4> index;
- let Inst{11-8} = index{3-0};
+ bits<3> index;
+ let Inst{11} = 0b0;
+ let Inst{10-8} = index{2-0};
}
class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
@@ -5525,14 +5708,14 @@ class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
}
def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
- let Inst{11-8} = index{3-0};
+ let Inst{10-8} = index{2-0};
}
def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
- let Inst{11-9} = index{2-0};
+ let Inst{10-9} = index{1-0};
let Inst{8} = 0b0;
}
def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
- let Inst{11-10} = index{1-0};
+ let Inst{10} = index{0};
let Inst{9-8} = 0b00;
}
def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
@@ -5657,6 +5840,77 @@ def VTBX4Pseudo
IIC_VTBX4, "$orig = $dst", []>;
} // DecoderMethod = "DecodeTBLInstruction"
+// VRINT : Vector Rounding
+multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
+ let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
+ def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary,
+ !strconcat("vrint", op), "f32",
+ v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
+ let Inst{9-7} = op9_7;
+ }
+ def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary,
+ !strconcat("vrint", op), "f32",
+ v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
+ let Inst{9-7} = op9_7;
+ }
+ }
+
+ def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
+ (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>;
+ def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
+ (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>;
+}
+
+defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
+defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
+defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
+defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
+defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
+defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
+
+// Cryptography instructions
+let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
+ DecoderNamespace = "v8Crypto" in {
+ class AES<string op, bit op7, bit op6, SDPatternOperator Int>
+ : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
+ !strconcat("aes", op), "8", v16i8, v16i8, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
+ : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
+ !strconcat("aes", op), "8", v16i8, v16i8, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+ SDPatternOperator Int>
+ : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+ SDPatternOperator Int>
+ : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
+ : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
+ Requires<[HasV8, HasCrypto]>;
+}
+
+def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
+def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
+def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
+def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
+
+def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>;
+def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
+def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
+def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>;
+def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>;
+def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>;
+def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
+def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
+def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
+def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
+
//===----------------------------------------------------------------------===//
// NEON instructions for single-precision FP math
//===----------------------------------------------------------------------===//
@@ -6697,12 +6951,17 @@ def VST4qWB_register_Asm_32 :
(ins VecListFourQ:$list, addrmode6:$addr,
rGPR:$Rm, pred:$p)>;
-// VMOV takes an optional datatype suffix
+// VMOV/VMVN takes an optional datatype suffix
defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
(VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
(VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
+ (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
+ (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
+
// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
// D-register versions.
def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index ae7a5c00bd74..af5ef537b536 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -69,11 +69,6 @@ def thumb_immshifted_shamt : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(V, MVT::i32);
}]>;
-// ADR instruction labels.
-def t_adrlabel : Operand<i32> {
- let EncoderMethod = "getThumbAdrLabelOpValue";
-}
-
// Scaled 4 immediate.
def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; }
def t_imm0_1020s4 : Operand<i32> {
@@ -97,12 +92,34 @@ def t_imm0_508s4_neg : Operand<i32> {
// Define Thumb specific addressing modes.
+// unsigned 8-bit, 2-scaled memory offset
+class OperandUnsignedOffset_b8s2 : AsmOperandClass {
+ let Name = "UnsignedOffset_b8s2";
+ let PredicateMethod = "isUnsignedOffset<8, 2>";
+}
+
+def UnsignedOffset_b8s2 : OperandUnsignedOffset_b8s2;
+
+// thumb style PC relative operand. signed, 8 bits magnitude,
+// two bits shift. can be represented as either [pc, #imm], #imm,
+// or relocatable expression...
+def ThumbMemPC : AsmOperandClass {
+ let Name = "ThumbMemPC";
+}
+
let OperandType = "OPERAND_PCREL" in {
def t_brtarget : Operand<OtherVT> {
let EncoderMethod = "getThumbBRTargetOpValue";
let DecoderMethod = "DecodeThumbBROperand";
}
+// ADR instruction labels.
+def t_adrlabel : Operand<i32> {
+ let EncoderMethod = "getThumbAdrLabelOpValue";
+ let PrintMethod = "printAdrLabelOperand<2>";
+ let ParserMatchClass = UnsignedOffset_b8s2;
+}
+
def t_bcctarget : Operand<i32> {
let EncoderMethod = "getThumbBCCTargetOpValue";
let DecoderMethod = "DecodeThumbBCCTargetOperand";
@@ -122,6 +139,15 @@ def t_blxtarget : Operand<i32> {
let EncoderMethod = "getThumbBLXTargetOpValue";
let DecoderMethod = "DecodeThumbBLXOffset";
}
+
+// t_addrmode_pc := <label> => pc + imm8 * 4
+//
+def t_addrmode_pc : Operand<i32> {
+ let EncoderMethod = "getAddrModePCOpValue";
+ let DecoderMethod = "DecodeThumbAddrModePC";
+ let PrintMethod = "printThumbLdrLabelOperand";
+ let ParserMatchClass = ThumbMemPC;
+}
}
// t_addrmode_rr := reg + reg
@@ -218,14 +244,6 @@ def t_addrmode_sp : Operand<i32>,
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
-// t_addrmode_pc := <label> => pc + imm8 * 4
-//
-def t_addrmode_pc : Operand<i32> {
- let EncoderMethod = "getAddrModePCOpValue";
- let DecoderMethod = "DecodeThumbAddrModePC";
- let PrintMethod = "printThumbLdrLabelOperand";
-}
-
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions.
//
@@ -251,25 +269,26 @@ class T1SystemEncoding<bits<8> opc>
let Inst{7-0} = opc;
}
-def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", []>,
- T1SystemEncoding<0x00>, // A8.6.110
- Requires<[IsThumb2]>;
-
-def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", []>,
- T1SystemEncoding<0x10>, // A8.6.410
- Requires<[IsThumb2]>;
-
-def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", []>,
- T1SystemEncoding<0x20>, // A8.6.408
- Requires<[IsThumb2]>;
+def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm", []>,
+ T1SystemEncoding<0x00>,
+ Requires<[IsThumb, HasV6M]> {
+ bits<4> imm;
+ let Inst{7-4} = imm;
+}
-def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", []>,
- T1SystemEncoding<0x30>, // A8.6.409
- Requires<[IsThumb2]>;
+class tHintAlias<string Asm, dag Result> : tInstAlias<Asm, Result> {
+ let Predicates = [IsThumb, HasV6M];
+}
-def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", []>,
- T1SystemEncoding<0x40>, // A8.6.157
- Requires<[IsThumb2]>;
+def : tHintAlias<"nop$p", (tHINT 0, pred:$p)>; // A8.6.110
+def : tHintAlias<"yield$p", (tHINT 1, pred:$p)>; // A8.6.410
+def : tHintAlias<"wfe$p", (tHINT 2, pred:$p)>; // A8.6.408
+def : tHintAlias<"wfi$p", (tHINT 3, pred:$p)>; // A8.6.409
+def : tHintAlias<"sev$p", (tHINT 4, pred:$p)>; // A8.6.157
+def : tInstAlias<"sevl$p", (tHINT 5, pred:$p)> {
+ let Predicates = [IsThumb2, HasV8];
+}
+def : T2Pat<(int_arm_sevl), (tHINT 5)>;
// The imm operand $val can be used by a debugger to store more information
// about the breakpoint.
@@ -282,8 +301,15 @@ def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val",
let Inst{7-0} = val;
}
+def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val",
+ []>, T1Encoding<0b101110>, Requires<[IsThumb, HasV8]> {
+ let Inst{9-6} = 0b1010;
+ bits<6> val;
+ let Inst{5-0} = val;
+}
+
def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end",
- []>, T1Encoding<0b101101> {
+ []>, T1Encoding<0b101101>, Deprecated<HasV8Ops> {
bits<1> end;
// A8.6.156
let Inst{9-5} = 0b10010;
@@ -310,7 +336,7 @@ def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
let isNotDuplicable = 1, isCodeGenOnly = 1 in
def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
[(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>,
- T1Special<{0,0,?,?}> {
+ T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
// A8.6.6
bits<3> dst;
let Inst{6-3} = 0b1111; // Rm = pc
@@ -323,7 +349,7 @@ def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
// probably because the instruction can be moved around.
def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm),
IIC_iALUi, "add", "\t$dst, $sp, $imm", []>,
- T1Encoding<{1,0,1,0,1,?}> {
+ T1Encoding<{1,0,1,0,1,?}>, Sched<[WriteALU]> {
// A6.2 & A8.6.8
bits<3> dst;
bits<8> imm;
@@ -335,7 +361,7 @@ def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm),
// ADD sp, sp, #<imm7>
def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
IIC_iALUi, "add", "\t$Rdn, $imm", []>,
- T1Misc<{0,0,0,0,0,?,?}> {
+ T1Misc<{0,0,0,0,0,?,?}>, Sched<[WriteALU]> {
// A6.2.5 & A8.6.8
bits<7> imm;
let Inst{6-0} = imm;
@@ -346,7 +372,7 @@ def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
// FIXME: The encoding and the ASM string don't match up.
def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
IIC_iALUi, "sub", "\t$Rdn, $imm", []>,
- T1Misc<{0,0,0,0,1,?,?}> {
+ T1Misc<{0,0,0,0,1,?,?}>, Sched<[WriteALU]> {
// A6.2.5 & A8.6.214
bits<7> imm;
let Inst{6-0} = imm;
@@ -367,7 +393,7 @@ def : tInstAlias<"sub${p} sp, sp, $imm",
// ADD <Rm>, sp
def tADDrSP : T1pI<(outs GPR:$Rdn), (ins GPRsp:$sp, GPR:$Rn), IIC_iALUr,
"add", "\t$Rdn, $sp, $Rn", []>,
- T1Special<{0,0,?,?}> {
+ T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
// A8.6.9 Encoding T1
bits<4> Rdn;
let Inst{7} = Rdn{3};
@@ -379,7 +405,7 @@ def tADDrSP : T1pI<(outs GPR:$Rdn), (ins GPRsp:$sp, GPR:$Rn), IIC_iALUr,
// ADD sp, <Rm>
def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr,
"add", "\t$Rdn, $Rm", []>,
- T1Special<{0,0,?,?}> {
+ T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
// A8.6.9 Encoding T2
bits<4> Rm;
let Inst{7} = 1;
@@ -395,7 +421,7 @@ def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr,
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def tBX : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bx${p}\t$Rm", []>,
- T1Special<{1,1,0,?}> {
+ T1Special<{1,1,0,?}>, Sched<[WriteBr]> {
// A6.2.3 & A8.6.25
bits<4> Rm;
let Inst{6-3} = Rm;
@@ -406,12 +432,12 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br,
- [(ARMretflag)], (tBX LR, pred:$p)>;
+ [(ARMretflag)], (tBX LR, pred:$p)>, Sched<[WriteBr]>;
// Alternative return instruction used by vararg functions.
def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p),
2, IIC_Br, [],
- (tBX GPR:$Rm, pred:$p)>;
+ (tBX GPR:$Rm, pred:$p)>, Sched<[WriteBr]>;
}
// All calls clobber the non-callee saved registers. SP is marked as a use to
@@ -424,7 +450,7 @@ let isCall = 1,
(outs), (ins pred:$p, t_bltarget:$func), IIC_Br,
"bl${p}\t$func",
[(ARMtcall tglobaladdr:$func)]>,
- Requires<[IsThumb]> {
+ Requires<[IsThumb]>, Sched<[WriteBrL]> {
bits<24> func;
let Inst{26} = func{23};
let Inst{25-16} = func{20-11};
@@ -438,7 +464,7 @@ let isCall = 1,
(outs), (ins pred:$p, t_blxtarget:$func), IIC_Br,
"blx${p}\t$func",
[(ARMcall tglobaladdr:$func)]>,
- Requires<[IsThumb, HasV5T]> {
+ Requires<[IsThumb, HasV5T]>, Sched<[WriteBrL]> {
bits<24> func;
let Inst{26} = func{23};
let Inst{25-16} = func{20-11};
@@ -453,7 +479,7 @@ let isCall = 1,
"blx${p}\t$func",
[(ARMtcall GPR:$func)]>,
Requires<[IsThumb, HasV5T]>,
- T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24;
+ T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { // A6.2.3 & A8.6.24;
bits<4> func;
let Inst{6-3} = func;
let Inst{2-0} = 0b000;
@@ -463,29 +489,32 @@ let isCall = 1,
def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func),
4, IIC_Br,
[(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsThumb, IsThumb1Only]>;
+ Requires<[IsThumb, IsThumb1Only]>, Sched<[WriteBr]>;
}
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
let isPredicable = 1 in
def tB : T1pI<(outs), (ins t_brtarget:$target), IIC_Br,
"b", "\t$target", [(br bb:$target)]>,
- T1Encoding<{1,1,1,0,0,?}> {
+ T1Encoding<{1,1,1,0,0,?}>, Sched<[WriteBr]> {
bits<11> target;
let Inst{10-0} = target;
- }
+ let AsmMatchConverter = "cvtThumbBranches";
+ }
// Far jump
// Just a pseudo for a tBL instruction. Needed to let regalloc know about
// the clobber of LR.
let Defs = [LR] in
def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target, pred:$p),
- 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>;
+ 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>,
+ Sched<[WriteBrTbl]>;
def tBR_JTr : tPseudoInst<(outs),
(ins tGPR:$target, i32imm:$jt, i32imm:$id),
0, IIC_Br,
- [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> {
+ [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>,
+ Sched<[WriteBrTbl]> {
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
}
}
@@ -496,13 +525,15 @@ let isBranch = 1, isTerminator = 1 in
def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br,
"b${p}\t$target",
[/*(ARMbrcond bb:$target, imm:$cc)*/]>,
- T1BranchCond<{1,1,0,1}> {
+ T1BranchCond<{1,1,0,1}>, Sched<[WriteBr]> {
bits<4> p;
bits<8> target;
let Inst{11-8} = p;
let Inst{7-0} = target;
+ let AsmMatchConverter = "cvtThumbBranches";
}
+
// Tail calls
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// IOS versions.
@@ -510,7 +541,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst),
4, IIC_Br, [],
(tBX GPR:$dst, (ops 14, zero_reg))>,
- Requires<[IsThumb]>;
+ Requires<[IsThumb]>, Sched<[WriteBr]>;
}
// tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls
// on IOS), so it's in ARMInstrThumb2.td.
@@ -520,7 +551,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
(ins t_brtarget:$dst, pred:$p),
4, IIC_Br, [],
(tB t_brtarget:$dst, pred:$p)>,
- Requires<[IsThumb, IsNotIOS]>;
+ Requires<[IsThumb, IsNotIOS]>, Sched<[WriteBr]>;
}
}
@@ -530,7 +561,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// If Inst{11-8} == 0b1111 then SEE SVC
let isCall = 1, Uses = [SP] in
def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br,
- "svc", "\t$imm", []>, Encoding16 {
+ "svc", "\t$imm", []>, Encoding16, Sched<[WriteBr]> {
bits<8> imm;
let Inst{15-12} = 0b1101;
let Inst{11-8} = 0b1111;
@@ -540,7 +571,7 @@ def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br,
// The assembler uses 0xDEFE for a trap instruction.
let isBarrier = 1, isTerminator = 1 in
def tTRAP : TI<(outs), (ins), IIC_Br,
- "trap", [(trap)]>, Encoding16 {
+ "trap", [(trap)]>, Encoding16, Sched<[WriteBr]> {
let Inst = 0xdefe;
}
@@ -627,11 +658,9 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
let Inst{7-0} = addr;
}
-// Load tconstpool
-// FIXME: Use ldr.n to work around a darwin assembler bug.
-let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
- "ldr", ".n\t$Rt, $addr",
+ "ldr", "\t$Rt, $addr",
[(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
T1Encoding<{0,1,0,0,1,?}> {
// A6.2 & A8.6.59
@@ -641,18 +670,6 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
let Inst{7-0} = addr;
}
-// FIXME: Remove this entry when the above ldr.n workaround is fixed.
-// For assembly/disassembly use only.
-def tLDRpciASM : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
- "ldr", "\t$Rt, $addr", []>,
- T1Encoding<{0,1,0,0,1,?}> {
- // A6.2 & A8.6.59
- bits<3> Rt;
- bits<8> addr;
- let Inst{10-8} = Rt;
- let Inst{7-0} = addr;
-}
-
// A8.6.194 & A8.6.192
defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
t_addrmode_is4, AddrModeT1_4,
@@ -833,14 +850,15 @@ let isCommutable = 1, Uses = [CPSR] in
def tADC : // A8.6.2
T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
"adc", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
// Add immediate
def tADDi3 : // A8.6.4 T1
T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
IIC_iALUi,
"add", "\t$Rd, $Rm, $imm3",
- [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> {
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]>,
+ Sched<[WriteALU]> {
bits<3> imm3;
let Inst{8-6} = imm3;
}
@@ -849,7 +867,8 @@ def tADDi8 : // A8.6.4 T2
T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn),
(ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
"add", "\t$Rdn, $imm8",
- [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>;
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>,
+ Sched<[WriteALU]>;
// Add register
let isCommutable = 1 in
@@ -857,12 +876,12 @@ def tADDrr : // A8.6.6 T1
T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iALUr,
"add", "\t$Rd, $Rn, $Rm",
- [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
let neverHasSideEffects = 1 in
def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
"add", "\t$Rdn, $Rm", []>,
- T1Special<{0,0,?,?}> {
+ T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
// A8.6.6 T2
bits<4> Rdn;
bits<4> Rm;
@@ -877,14 +896,15 @@ def tAND : // A8.6.12
T1sItDPEncode<0b0000, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iBITr,
"and", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
// ASR immediate
def tASRri : // A8.6.14
T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5),
IIC_iMOVsi,
"asr", "\t$Rd, $Rm, $imm5",
- [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]> {
+ [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]>,
+ Sched<[WriteALU]> {
bits<5> imm5;
let Inst{10-6} = imm5;
}
@@ -894,14 +914,15 @@ def tASRrr : // A8.6.15
T1sItDPEncode<0b0100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iMOVsr,
"asr", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
// BIC register
def tBIC : // A8.6.20
T1sItDPEncode<0b1110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iBITr,
"bic", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>;
+ [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>,
+ Sched<[WriteALU]>;
// CMN register
let isCompare = 1, Defs = [CPSR] in {
@@ -917,7 +938,7 @@ def tCMNz : // A8.6.33
T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iCMPr,
"cmn", "\t$Rn, $Rm",
- [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>;
+ [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>, Sched<[WriteCMP]>;
} // isCompare = 1, Defs = [CPSR]
@@ -926,7 +947,7 @@ let isCompare = 1, Defs = [CPSR] in {
def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iCMPi,
"cmp", "\t$Rn, $imm8",
[(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
- T1General<{1,0,1,?,?}> {
+ T1General<{1,0,1,?,?}>, Sched<[WriteCMP]> {
// A8.6.35
bits<3> Rn;
bits<8> imm8;
@@ -939,11 +960,11 @@ def tCMPr : // A8.6.36 T1
T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iCMPr,
"cmp", "\t$Rn, $Rm",
- [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>;
+ [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>, Sched<[WriteCMP]>;
def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr,
"cmp", "\t$Rn, $Rm", []>,
- T1Special<{0,1,?,?}> {
+ T1Special<{0,1,?,?}>, Sched<[WriteCMP]> {
// A8.6.36 T2
bits<4> Rm;
bits<4> Rn;
@@ -960,14 +981,15 @@ def tEOR : // A8.6.45
T1sItDPEncode<0b0001, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iBITr,
"eor", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
// LSL immediate
def tLSLri : // A8.6.88
T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_31:$imm5),
IIC_iMOVsi,
"lsl", "\t$Rd, $Rm, $imm5",
- [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> {
+ [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]>,
+ Sched<[WriteALU]> {
bits<5> imm5;
let Inst{10-6} = imm5;
}
@@ -977,14 +999,15 @@ def tLSLrr : // A8.6.89
T1sItDPEncode<0b0010, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iMOVsr,
"lsl", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
// LSR immediate
def tLSRri : // A8.6.90
T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5),
IIC_iMOVsi,
"lsr", "\t$Rd, $Rm, $imm5",
- [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]> {
+ [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]>,
+ Sched<[WriteALU]> {
bits<5> imm5;
let Inst{10-6} = imm5;
}
@@ -994,14 +1017,14 @@ def tLSRrr : // A8.6.91
T1sItDPEncode<0b0011, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iMOVsr,
"lsr", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
// Move register
let isMoveImm = 1 in
def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi,
"mov", "\t$Rd, $imm8",
[(set tGPR:$Rd, imm0_255:$imm8)]>,
- T1General<{1,0,0,?,?}> {
+ T1General<{1,0,0,?,?}>, Sched<[WriteALU]> {
// A8.6.96
bits<3> Rd;
bits<8> imm8;
@@ -1019,7 +1042,7 @@ let neverHasSideEffects = 1 in {
def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone,
2, IIC_iMOVr,
"mov", "\t$Rd, $Rm", "", []>,
- T1Special<{1,0,?,?}> {
+ T1Special<{1,0,?,?}>, Sched<[WriteALU]> {
// A8.6.97
bits<4> Rd;
bits<4> Rm;
@@ -1029,7 +1052,7 @@ def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone,
}
let Defs = [CPSR] in
def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
- "movs\t$Rd, $Rm", []>, Encoding16 {
+ "movs\t$Rd, $Rm", []>, Encoding16, Sched<[WriteALU]> {
// A8.6.97
bits<3> Rd;
bits<3> Rm;
@@ -1060,7 +1083,7 @@ def :tInstAlias<"mul${s}${p} $Rdm, $Rn", (tMUL tGPR:$Rdm, s_cc_out:$s, tGPR:$Rn,
def tMVN : // A8.6.107
T1sIDPEncode<0b1111, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iMVNr,
"mvn", "\t$Rd, $Rn",
- [(set tGPR:$Rd, (not tGPR:$Rn))]>;
+ [(set tGPR:$Rd, (not tGPR:$Rn))]>, Sched<[WriteALU]>;
// Bitwise or register
let isCommutable = 1 in
@@ -1068,7 +1091,7 @@ def tORR : // A8.6.114
T1sItDPEncode<0b1100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iBITr,
"orr", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
// Swaps
def tREV : // A8.6.134
@@ -1076,35 +1099,36 @@ def tREV : // A8.6.134
IIC_iUNAr,
"rev", "\t$Rd, $Rm",
[(set tGPR:$Rd, (bswap tGPR:$Rm))]>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
+ Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>;
def tREV16 : // A8.6.135
T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
IIC_iUNAr,
"rev16", "\t$Rd, $Rm",
[(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
+ Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>;
def tREVSH : // A8.6.136
T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
IIC_iUNAr,
"revsh", "\t$Rd, $Rm",
[(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
+ Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>;
// Rotate right register
def tROR : // A8.6.139
T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iMOVsr,
"ror", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>,
+ Sched<[WriteALU]>;
// Negate register
def tRSB : // A8.6.141
T1sIDPEncode<0b1001, (outs tGPR:$Rd), (ins tGPR:$Rn),
IIC_iALUi,
"rsb", "\t$Rd, $Rn, #0",
- [(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
+ [(set tGPR:$Rd, (ineg tGPR:$Rn))]>, Sched<[WriteALU]>;
// Subtract with carry register
let Uses = [CPSR] in
@@ -1112,14 +1136,16 @@ def tSBC : // A8.6.151
T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iALUr,
"sbc", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>,
+ Sched<[WriteALU]>;
// Subtract immediate
def tSUBi3 : // A8.6.210 T1
T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
IIC_iALUi,
"sub", "\t$Rd, $Rm, $imm3",
- [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> {
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]>,
+ Sched<[WriteALU]> {
bits<3> imm3;
let Inst{8-6} = imm3;
}
@@ -1128,14 +1154,16 @@ def tSUBi8 : // A8.6.210 T2
T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn),
(ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
"sub", "\t$Rdn, $imm8",
- [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>;
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>,
+ Sched<[WriteALU]>;
// Subtract register
def tSUBrr : // A8.6.212
T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iALUr,
"sub", "\t$Rd, $Rn, $Rm",
- [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>;
+ [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>,
+ Sched<[WriteALU]>;
// Sign-extend byte
def tSXTB : // A8.6.222
@@ -1143,7 +1171,8 @@ def tSXTB : // A8.6.222
IIC_iUNAr,
"sxtb", "\t$Rd, $Rm",
[(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i8))]>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
+ Requires<[IsThumb, IsThumb1Only, HasV6]>,
+ Sched<[WriteALU]>;
// Sign-extend short
def tSXTH : // A8.6.224
@@ -1151,14 +1180,16 @@ def tSXTH : // A8.6.224
IIC_iUNAr,
"sxth", "\t$Rd, $Rm",
[(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i16))]>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
+ Requires<[IsThumb, IsThumb1Only, HasV6]>,
+ Sched<[WriteALU]>;
// Test
let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
def tTST : // A8.6.230
T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr,
"tst", "\t$Rn, $Rm",
- [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>;
+ [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>,
+ Sched<[WriteALU]>;
// Zero-extend byte
def tUXTB : // A8.6.262
@@ -1166,7 +1197,8 @@ def tUXTB : // A8.6.262
IIC_iUNAr,
"uxtb", "\t$Rd, $Rm",
[(set tGPR:$Rd, (and tGPR:$Rm, 0xFF))]>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
+ Requires<[IsThumb, IsThumb1Only, HasV6]>,
+ Sched<[WriteALU]>;
// Zero-extend short
def tUXTH : // A8.6.264
@@ -1174,22 +1206,22 @@ def tUXTH : // A8.6.264
IIC_iUNAr,
"uxth", "\t$Rd, $Rm",
[(set tGPR:$Rd, (and tGPR:$Rm, 0xFFFF))]>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
+ Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>;
// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation.
// Expanded after instruction selection into a branch sequence.
let usesCustomInserter = 1 in // Expanded after instruction selection.
def tMOVCCr_pseudo :
- PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
- NoItinerary,
- [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
+ PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, cmovpred:$p),
+ NoItinerary,
+ [(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, cmovpred:$p))]>;
// tLEApcrel - Load a pc-relative address into a register without offending the
// assembler.
def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
IIC_iALUi, "adr{$p}\t$Rd, $addr", []>,
- T1Encoding<{1,0,1,0,0,?}> {
+ T1Encoding<{1,0,1,0,0,?}>, Sched<[WriteALU]> {
bits<3> Rd;
bits<8> addr;
let Inst{10-8} = Rd;
@@ -1199,12 +1231,12 @@ def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
let neverHasSideEffects = 1, isReMaterializable = 1 in
def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
- 2, IIC_iALUi, []>;
+ 2, IIC_iALUi, []>, Sched<[WriteALU]>;
let hasSideEffects = 1 in
def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
- 2, IIC_iALUi, []>;
+ 2, IIC_iALUi, []>, Sched<[WriteALU]>;
//===----------------------------------------------------------------------===//
// TLS Instructions
@@ -1215,7 +1247,8 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
// complete with fixup for the aeabi_read_tp function.
let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in
def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
- [(set R0, ARMthread_pointer)]>;
+ [(set R0, ARMthread_pointer)]>,
+ Sched<[WriteBr]>;
//===----------------------------------------------------------------------===//
// SJLJ Exception handling intrinsics
@@ -1381,13 +1414,13 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
hasExtraDefRegAllocReq = 1 in
def tPOP_RET : tPseudoExpand<(outs), (ins pred:$p, reglist:$regs, variable_ops),
2, IIC_iPop_Br, [],
- (tPOP pred:$p, reglist:$regs)>;
+ (tPOP pred:$p, reglist:$regs)>, Sched<[WriteBrL]>;
// Indirect branch using "mov pc, $Rm"
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def tBRIND : tPseudoExpand<(outs), (ins GPR:$Rm, pred:$p),
2, IIC_Br, [(brind GPR:$Rm)],
- (tMOVr PC, GPR:$Rm, pred:$p)>;
+ (tMOVr PC, GPR:$Rm, pred:$p)>, Sched<[WriteBr]>;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 4dacb86df4ce..48acffd3a64e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -173,14 +173,13 @@ def t2ldr_pcrel_imm12 : Operand<i32> {
// ADR instruction labels.
def t2adrlabel : Operand<i32> {
let EncoderMethod = "getT2AdrLabelOpValue";
- let PrintMethod = "printAdrLabelOperand";
+ let PrintMethod = "printAdrLabelOperand<0>";
}
-
// t2addrmode_posimm8 := reg + imm8
def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";}
def t2addrmode_posimm8 : Operand<i32> {
- let PrintMethod = "printT2AddrModeImm8Operand";
+ let PrintMethod = "printT2AddrModeImm8Operand<false>";
let EncoderMethod = "getT2AddrModeImm8OpValue";
let DecoderMethod = "DecodeT2AddrModeImm8";
let ParserMatchClass = MemPosImm8OffsetAsmOperand;
@@ -191,7 +190,7 @@ def t2addrmode_posimm8 : Operand<i32> {
def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";}
def t2addrmode_negimm8 : Operand<i32>,
ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
- let PrintMethod = "printT2AddrModeImm8Operand";
+ let PrintMethod = "printT2AddrModeImm8Operand<false>";
let EncoderMethod = "getT2AddrModeImm8OpValue";
let DecoderMethod = "DecodeT2AddrModeImm8";
let ParserMatchClass = MemNegImm8OffsetAsmOperand;
@@ -200,15 +199,22 @@ def t2addrmode_negimm8 : Operand<i32>,
// t2addrmode_imm8 := reg +/- imm8
def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; }
-def t2addrmode_imm8 : Operand<i32>,
- ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
- let PrintMethod = "printT2AddrModeImm8Operand";
+class T2AddrMode_Imm8 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
let EncoderMethod = "getT2AddrModeImm8OpValue";
let DecoderMethod = "DecodeT2AddrModeImm8";
let ParserMatchClass = MemImm8OffsetAsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
+def t2addrmode_imm8 : T2AddrMode_Imm8 {
+ let PrintMethod = "printT2AddrModeImm8Operand<false>";
+}
+
+def t2addrmode_imm8_pre : T2AddrMode_Imm8 {
+ let PrintMethod = "printT2AddrModeImm8Operand<true>";
+}
+
def t2am_imm8_offset : Operand<i32>,
ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
[], [SDNPWantRoot]> {
@@ -219,14 +225,21 @@ def t2am_imm8_offset : Operand<i32>,
// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
-def t2addrmode_imm8s4 : Operand<i32> {
- let PrintMethod = "printT2AddrModeImm8s4Operand";
+class T2AddrMode_Imm8s4 : Operand<i32> {
let EncoderMethod = "getT2AddrModeImm8s4OpValue";
let DecoderMethod = "DecodeT2AddrModeImm8s4";
let ParserMatchClass = MemImm8s4OffsetAsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
+def t2addrmode_imm8s4 : T2AddrMode_Imm8s4 {
+ let PrintMethod = "printT2AddrModeImm8s4Operand<false>";
+}
+
+def t2addrmode_imm8s4_pre : T2AddrMode_Imm8s4 {
+ let PrintMethod = "printT2AddrModeImm8s4Operand<true>";
+}
+
def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; }
def t2am_imm8s4_offset : Operand<i32> {
let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
@@ -238,7 +251,8 @@ def t2am_imm8s4_offset : Operand<i32> {
def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass {
let Name = "MemImm0_1020s4Offset";
}
-def t2addrmode_imm0_1020s4 : Operand<i32> {
+def t2addrmode_imm0_1020s4 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectT2AddrModeExclusive"> {
let PrintMethod = "printT2AddrModeImm0_1020s4Operand";
let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue";
let DecoderMethod = "DecodeT2AddrModeImm0_1020s4";
@@ -451,6 +465,18 @@ class T2ThreeReg<dag oops, dag iops, InstrItinClass itin,
let Inst{3-0} = Rm;
}
+class T2ThreeRegNoP<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : T2XI<oops, iops, itin, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
class T2sThreeReg<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: T2sI<oops, iops, itin, opc, asm, pattern> {
@@ -554,7 +580,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
def ri : T2sTwoRegImm<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii,
opc, "\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]> {
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -563,7 +590,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
// register
def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), iir,
opc, !strconcat(wide, "\t$Rd, $Rn, $Rm"),
- [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -576,7 +604,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
def rs : T2sTwoRegShiftedReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
opc, !strconcat(wide, "\t$Rd, $Rn, $ShiftedRm"),
- [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
+ Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -635,7 +664,8 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
def ri : T2sTwoRegImm<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
opc, ".w\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -645,7 +675,8 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
def rr : T2sThreeReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
opc, "\t$Rd, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]> {
+ [/* For disassembly only; pattern left blank */]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -657,7 +688,8 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
def rs : T2sTwoRegShiftedReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
IIC_iALUsir, opc, "\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -678,12 +710,14 @@ multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
(ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p),
4, iii,
[(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
- t2_so_imm:$imm))]>;
+ t2_so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]>;
// register
def rr : t2PseudoInst<(outs rGPR:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm, pred:$p),
4, iir,
[(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
- rGPR:$Rm))]> {
+ rGPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
let isCommutable = Commutable;
}
// shifted register
@@ -691,7 +725,8 @@ multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
(ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
4, iis,
[(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
- t2_so_reg:$ShiftedRm))]>;
+ t2_so_reg:$ShiftedRm))]>,
+ Sched<[WriteALUsi, ReadALUsr]>;
}
}
@@ -704,13 +739,15 @@ multiclass T2I_rbin_s_is<PatFrag opnode> {
(ins rGPR:$Rn, t2_so_imm:$imm, pred:$p),
4, IIC_iALUi,
[(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm,
- rGPR:$Rn))]>;
+ rGPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]>;
// shifted register
def rs : t2PseudoInst<(outs rGPR:$Rd),
(ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
4, IIC_iALUsi,
[(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm,
- rGPR:$Rn))]>;
+ rGPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]>;
}
}
@@ -725,7 +762,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
def ri : T2sTwoRegImm<
(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi,
opc, ".w\t$Rd, $Rn, $imm",
- [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]> {
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24} = 1;
@@ -737,7 +775,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
def ri12 : T2I<
(outs GPRnopc:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
!strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
- [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> {
+ [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]>,
+ Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
@@ -755,7 +794,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
// register
def rr : T2sThreeReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm),
IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm",
- [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]> {
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -769,7 +809,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
def rs : T2sTwoRegShiftedReg<
(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm),
IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
- [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]> {
+ [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]>,
+ Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24} = 1;
@@ -787,7 +828,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
[(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_imm:$imm, CPSR))]>,
- Requires<[IsThumb2]> {
+ Requires<[IsThumb2]>, Sched<[WriteALU, ReadALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -797,7 +838,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
opc, ".w\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, rGPR:$Rm, CPSR))]>,
- Requires<[IsThumb2]> {
+ Requires<[IsThumb2]>, Sched<[WriteALU, ReadALU, ReadALU]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -811,7 +852,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
[(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm, CPSR))]>,
- Requires<[IsThumb2]> {
+ Requires<[IsThumb2]>, Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -826,7 +867,8 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> {
def ri : T2sTwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi,
opc, ".w\t$Rd, $Rm, $imm",
- [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]> {
+ [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-21} = 0b010010;
let Inst{19-16} = 0b1111; // Rn
@@ -836,7 +878,8 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> {
def rr : T2sThreeReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMOVsr,
opc, ".w\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-21} = opcod;
@@ -880,7 +923,7 @@ let isCompare = 1, Defs = [CPSR] in {
def ri : T2OneRegCmpImm<
(outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), iii,
opc, ".w\t$Rn, $imm",
- [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]> {
+ [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]>, Sched<[WriteCMP]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -892,7 +935,7 @@ let isCompare = 1, Defs = [CPSR] in {
def rr : T2TwoRegCmp<
(outs), (ins GPRnopc:$Rn, rGPR:$Rm), iir,
opc, ".w\t$Rn, $Rm",
- [(opnode GPRnopc:$Rn, rGPR:$Rm)]> {
+ [(opnode GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -906,7 +949,8 @@ let isCompare = 1, Defs = [CPSR] in {
def rs : T2OneRegCmpShiftedReg<
(outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), iis,
opc, ".w\t$Rn, $ShiftedRm",
- [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> {
+ [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>,
+ Sched<[WriteCMPsi]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -941,6 +985,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
let Inst{19-16} = addr{16-13}; // Rn
let Inst{15-12} = Rt;
let Inst{11-0} = addr{11-0}; // imm
+
+ let DecoderMethod = "DecodeT2LoadImm12";
}
def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii,
opc, "\t$Rt, $addr",
@@ -961,6 +1007,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
let Inst{9} = addr{8}; // U
let Inst{8} = 0; // The W bit.
let Inst{7-0} = addr{7-0}; // imm
+
+ let DecoderMethod = "DecodeT2LoadImm8";
}
def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis,
opc, ".w\t$Rt, $addr",
@@ -993,14 +1041,18 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
- let Inst{23} = ?; // add = (U == '1')
let Inst{22-21} = opcod;
let Inst{20} = 1; // load
let Inst{19-16} = 0b1111; // Rn
+
bits<4> Rt;
- bits<12> addr;
let Inst{15-12} = Rt{3-0};
+
+ bits<13> addr;
+ let Inst{23} = addr{12}; // add = (U == '1')
let Inst{11-0} = addr{11-0};
+
+ let DecoderMethod = "DecodeT2LoadLabel";
}
}
@@ -1167,7 +1219,8 @@ class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin,
// assembler.
def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
(ins t2adrlabel:$addr, pred:$p),
- IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []> {
+ IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []>,
+ Sched<[WriteALU, ReadALU]> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -1190,12 +1243,12 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
let neverHasSideEffects = 1, isReMaterializable = 1 in
def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
- 4, IIC_iALUi, []>;
+ 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
let hasSideEffects = 1 in
def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
4, IIC_iALUi,
- []>;
+ []>, Sched<[WriteALU, ReadALU]>;
//===----------------------------------------------------------------------===//
@@ -1209,15 +1262,15 @@ defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR,
// Loads with zero extension
defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- rGPR, UnOpFrag<(zextloadi16 node:$Src)>>;
+ GPR, UnOpFrag<(zextloadi16 node:$Src)>>;
defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- rGPR, UnOpFrag<(zextloadi8 node:$Src)>>;
+ GPR, UnOpFrag<(zextloadi8 node:$Src)>>;
// Loads with sign extension
defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- rGPR, UnOpFrag<(sextloadi16 node:$Src)>>;
+ GPR, UnOpFrag<(sextloadi16 node:$Src)>>;
defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- rGPR, UnOpFrag<(sextloadi8 node:$Src)>>;
+ GPR, UnOpFrag<(sextloadi8 node:$Src)>>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
@@ -1275,12 +1328,9 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
let mayLoad = 1, neverHasSideEffects = 1 in {
def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins t2addrmode_imm8:$addr),
+ (ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
- "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
- []> {
- let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
-}
+ "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
@@ -1288,48 +1338,42 @@ def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
"ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins t2addrmode_imm8:$addr),
+ (ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
- []> {
- let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
-}
+ "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
+
def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
"ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins t2addrmode_imm8:$addr),
+ (ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
- []> {
- let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
-}
+ "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
+
def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
"ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins t2addrmode_imm8:$addr),
+ (ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
"ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
- []> {
- let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
-}
+ []>;
+
def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
"ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins t2addrmode_imm8:$addr),
+ (ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
"ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
- []> {
- let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8";
-}
+ []>;
+
def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
@@ -1354,6 +1398,8 @@ class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
let Inst{11} = 1;
let Inst{10-8} = 0b110; // PUW.
let Inst{7-0} = addr{7-0};
+
+ let DecoderMethod = "DecodeT2LoadT";
}
def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>;
@@ -1362,6 +1408,32 @@ def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>;
def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
+class T2Ildacq<bits<4> bits23_20, bits<2> bit54, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary,
+ opc, asm, "", pattern>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt;
+ bits<4> addr;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-24} = 0b000;
+ let Inst{23-20} = bits23_20;
+ let Inst{11-6} = 0b111110;
+ let Inst{5-4} = bit54;
+ let Inst{3-0} = 0b1111;
+
+ // Encode instruction operands
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+
+def t2LDA : T2Ildacq<0b1101, 0b10, (outs rGPR:$Rt),
+ (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>;
+def t2LDAB : T2Ildacq<0b1101, 0b00, (outs rGPR:$Rt),
+ (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>;
+def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt),
+ (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>;
+
// Store
defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR,
BinOpFrag<(store node:$LHS, node:$RHS)>>;
@@ -1380,27 +1452,22 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
let mayStore = 1, neverHasSideEffects = 1 in {
def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
- (ins GPRnopc:$Rt, t2addrmode_imm8:$addr),
+ (ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"str", "\t$Rt, $addr!",
- "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
- let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
-}
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
+
def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb),
- (ins rGPR:$Rt, t2addrmode_imm8:$addr),
+ (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"strh", "\t$Rt, $addr!",
- "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
- let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
-}
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
- (ins rGPR:$Rt, t2addrmode_imm8:$addr),
+ (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
"strb", "\t$Rt, $addr!",
- "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
- let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
-}
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
} // mayStore = 1, neverHasSideEffects = 1
def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
@@ -1487,9 +1554,8 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
// For disassembly only.
def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
- (ins t2addrmode_imm8s4:$addr), IIC_iLoad_d_ru,
+ (ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru,
"ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> {
- let AsmMatchConverter = "cvtT2LdrdPre";
let DecoderMethod = "DecodeT2LDRDPreInstruction";
}
@@ -1499,10 +1565,9 @@ def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
"$addr.base = $wb", []>;
def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
- (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
+ (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!",
"$addr.base = $wb", []> {
- let AsmMatchConverter = "cvtT2StrdPre";
let DecoderMethod = "DecodeT2STRDPreInstruction";
}
@@ -1512,6 +1577,31 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm",
"$addr.base = $wb", []>;
+class T2Istrrel<bits<2> bit54, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary, opc,
+ asm, "", pattern>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt;
+ bits<4> addr;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001100;
+ let Inst{11-6} = 0b111110;
+ let Inst{5-4} = bit54;
+ let Inst{3-0} = 0b1111;
+
+ // Encode instruction operands
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+
+def t2STL : T2Istrrel<0b10, (outs), (ins rGPR:$Rt, addr_offset_none:$addr),
+ "stl", "\t$Rt, $addr", []>;
+def t2STLB : T2Istrrel<0b00, (outs), (ins rGPR:$Rt, addr_offset_none:$addr),
+ "stlb", "\t$Rt, $addr", []>;
+def t2STLH : T2Istrrel<0b01, (outs), (ins rGPR:$Rt, addr_offset_none:$addr),
+ "stlh", "\t$Rt, $addr", []>;
+
// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
// data/instruction access.
// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
@@ -1520,24 +1610,27 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc,
"\t$addr",
- [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]> {
+ [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]>,
+ Sched<[WritePreLd]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
+ let Inst{23} = 1;
let Inst{22} = 0;
let Inst{21} = write;
let Inst{20} = 1;
let Inst{15-12} = 0b1111;
bits<17> addr;
- let addr{12} = 1; // add = TRUE
let Inst{19-16} = addr{16-13}; // Rn
- let Inst{23} = addr{12}; // U
let Inst{11-0} = addr{11-0}; // imm12
+
+ let DecoderMethod = "DecodeT2LoadImm12";
}
def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc,
"\t$addr",
- [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]> {
+ [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]>,
+ Sched<[WritePreLd]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
let Inst{23} = 0; // U = 0
@@ -1550,11 +1643,14 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
bits<13> addr;
let Inst{19-16} = addr{12-9}; // Rn
let Inst{7-0} = addr{7-0}; // imm8
+
+ let DecoderMethod = "DecodeT2LoadImm8";
}
def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc,
"\t$addr",
- [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]> {
+ [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]>,
+ Sched<[WritePreLd]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
let Inst{23} = 0; // add = TRUE for T1
@@ -1562,7 +1658,7 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
let Inst{21} = write;
let Inst{20} = 1;
let Inst{15-12} = 0b1111;
- let Inst{11-6} = 0000000;
+ let Inst{11-6} = 0b000000;
bits<10> addr;
let Inst{19-16} = addr{9-6}; // Rn
@@ -1571,15 +1667,33 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
let DecoderMethod = "DecodeT2LoadShift";
}
- // FIXME: We should have a separate 'pci' variant here. As-is we represent
- // it via the i12 variant, which it's related to, but that means we can
- // represent negative immediates, which aren't legal for anything except
- // the 'pci' case (Rn == 15).
}
-defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>;
-defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
-defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>;
+defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>;
+defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
+defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>;
+
+// pci variant is very similar to i12, but supports negative offsets
+// from the PC. Only PLD and PLI have pci variants (not PLDW)
+class T2Iplpci<bits<1> inst, string opc> : T2Iso<(outs), (ins t2ldrlabel:$addr),
+ IIC_Preload, opc, "\t$addr",
+ [(ARMPreload (ARMWrapper tconstpool:$addr),
+ (i32 0), (i32 inst))]>, Sched<[WritePreLd]> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = inst;
+ let Inst{22-20} = 0b001;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = 0b1111;
+
+ bits<13> addr;
+ let Inst{23} = addr{12}; // add = (U == '1')
+ let Inst{11-0} = addr{11-0}; // imm12
+
+ let DecoderMethod = "DecodeT2LoadLabel";
+}
+
+def t2PLDpci : T2Iplpci<0, "pld">, Requires<[IsThumb2]>;
+def t2PLIpci : T2Iplpci<1, "pli">, Requires<[IsThumb2,HasV7]>;
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
@@ -1743,7 +1857,7 @@ defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
let neverHasSideEffects = 1 in
def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
- "mov", ".w\t$Rd, $Rm", []> {
+ "mov", ".w\t$Rd, $Rm", []>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1763,7 +1877,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
AddedComplexity = 1 in
def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
"mov", ".w\t$Rd, $imm",
- [(set rGPR:$Rd, t2_so_imm:$imm)]> {
+ [(set rGPR:$Rd, t2_so_imm:$imm)]>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = 0b0010;
@@ -1786,7 +1900,7 @@ def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
"movw", "\t$Rd, $imm",
- [(set rGPR:$Rd, imm0_65535:$imm)]> {
+ [(set rGPR:$Rd, imm0_65535:$imm)]>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0010;
@@ -1804,6 +1918,9 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
let DecoderMethod = "DecodeT2MOVTWInstruction";
}
+def : t2InstAlias<"mov${p} $Rd, $imm",
+ (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p)>;
+
def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
(ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
@@ -1812,7 +1929,8 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd),
(ins rGPR:$src, imm0_65535_expr:$imm), IIC_iMOVi,
"movt", "\t$Rd, $imm",
[(set rGPR:$Rd,
- (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
+ (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0110;
@@ -1831,7 +1949,8 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd),
}
def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
- (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+ (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
+ Sched<[WriteALU]>;
} // Constraints
def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
@@ -2171,7 +2290,7 @@ def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
let Uses = [CPSR] in {
def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"rrx", "\t$Rd, $Rm",
- [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2185,7 +2304,8 @@ let isCodeGenOnly = 1, Defs = [CPSR] in {
def t2MOVsrl_flag : T2TwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"lsrs", ".w\t$Rd, $Rm, #1",
- [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2199,7 +2319,8 @@ def t2MOVsrl_flag : T2TwoRegShiftImm<
def t2MOVsra_flag : T2TwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"asrs", ".w\t$Rd, $Rm, #1",
- [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2320,7 +2441,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc,
// shifted imm
def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
opc, "\t$Rd, $imm",
- [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]>, Sched<[WriteALU]> {
let isAsCheapAsAMove = Cheap;
let isReMaterializable = ReMat;
let isMoveImm = MoveImm;
@@ -2333,7 +2454,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc,
// register
def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
opc, ".w\t$Rd, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -2345,7 +2466,8 @@ multiclass T2I_un_irs<bits<4> opcod, string opc,
// shifted register
def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
opc, ".w\t$Rd, $ShiftedRm",
- [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> {
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]>,
+ Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -2804,22 +2926,27 @@ class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
}
def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
- "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>;
+ "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>,
+ Sched<[WriteALU]>;
def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"rbit", "\t$Rd, $Rm",
- [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>;
+ [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>,
+ Sched<[WriteALU]>;
def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
- "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>;
+ "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>,
+ Sched<[WriteALU]>;
def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"rev16", ".w\t$Rd, $Rm",
- [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>;
+ [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>,
+ Sched<[WriteALU]>;
def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"revsh", ".w\t$Rd, $Rm",
- [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>;
+ [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>,
+ Sched<[WriteALU]>;
def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
(and (srl rGPR:$Rm, (i32 8)), 0xFF)),
@@ -2831,7 +2958,8 @@ def t2PKHBT : T2ThreeReg<
[(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
(and (shl rGPR:$Rm, pkh_lsl_amt:$sh),
0xFFFF0000)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
+ Requires<[HasT2ExtractPack, IsThumb2]>,
+ Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-20} = 0b01100;
@@ -2859,7 +2987,8 @@ def t2PKHTB : T2ThreeReg<
[(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
(and (sra rGPR:$Rm, pkh_asr_amt:$sh),
0xFFFF)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
+ Requires<[HasT2ExtractPack, IsThumb2]>,
+ Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-20} = 0b01100;
@@ -2873,7 +3002,12 @@ def t2PKHTB : T2ThreeReg<
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
-def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
+// We also can not replace a srl (17..31) by an arithmetic shift we would use in
+// pkhtb src1, src2, asr (17..31).
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16:$sh)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, imm16:$sh)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (sra rGPR:$src2, imm16_31:$sh)),
(t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
@@ -2882,6 +3016,34 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
Requires<[HasT2ExtractPack, IsThumb2]>;
//===----------------------------------------------------------------------===//
+// CRC32 Instructions
+//
+// Polynomials:
+// + CRC32{B,H,W} 0x04C11DB7
+// + CRC32C{B,H,W} 0x1EDC6F41
+//
+
+class T2I_crc32<bit C, bits<2> sz, string suffix, SDPatternOperator builtin>
+ : T2ThreeRegNoP<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), NoItinerary,
+ !strconcat("crc32", suffix, "\t$Rd, $Rn, $Rm"),
+ [(set rGPR:$Rd, (builtin rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[IsThumb2, HasV8, HasCRC]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b010110;
+ let Inst{20} = C;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-6} = 0b10;
+ let Inst{5-4} = sz;
+}
+
+def t2CRC32B : T2I_crc32<0, 0b00, "b", int_arm_crc32b>;
+def t2CRC32CB : T2I_crc32<1, 0b00, "cb", int_arm_crc32cb>;
+def t2CRC32H : T2I_crc32<0, 0b01, "h", int_arm_crc32h>;
+def t2CRC32CH : T2I_crc32<1, 0b01, "ch", int_arm_crc32ch>;
+def t2CRC32W : T2I_crc32<0, 0b10, "w", int_arm_crc32w>;
+def t2CRC32CW : T2I_crc32<1, 0b10, "cw", int_arm_crc32cw>;
+
+//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
@@ -2900,7 +3062,8 @@ let isCompare = 1, Defs = [CPSR] in {
def t2CMNri : T2OneRegCmpImm<
(outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iCMPi,
"cmn", ".w\t$Rn, $imm",
- [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]> {
+ [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]>,
+ Sched<[WriteCMP, ReadALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = 0b1000;
@@ -2913,7 +3076,7 @@ let isCompare = 1, Defs = [CPSR] in {
(outs), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iCMPr,
"cmn", ".w\t$Rn, $Rm",
[(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPRnopc:$Rn, rGPR:$Rm)]> {
+ GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b1000;
@@ -2928,7 +3091,8 @@ let isCompare = 1, Defs = [CPSR] in {
(outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iCMPsi,
"cmn", ".w\t$Rn, $ShiftedRm",
[(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>
- GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> {
+ GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>,
+ Sched<[WriteCMPsi, ReadALU, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b1000;
@@ -2959,92 +3123,67 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
// Conditional moves
-// FIXME: should be able to write a pattern for ARMcmov, but can't use
-// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
let isCommutable = 1, isSelect = 1 in
def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, pred:$p),
+ (ins rGPR:$false, rGPR:$Rm, cmovpred:$p),
4, IIC_iCMOVr,
- [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">;
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
let isMoveImm = 1 in
-def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd),
- (ins rGPR:$false, t2_so_imm:$imm, pred:$p),
+def t2MOVCCi
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p),
4, IIC_iCMOVi,
-[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">;
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false,t2_so_imm:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
-// FIXME: Pseudo-ize these. For now, just mark codegen only.
let isCodeGenOnly = 1 in {
let isMoveImm = 1 in
-def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, imm0_65535_expr:$imm),
- IIC_iCMOVi,
- "movw", "\t$Rd, $imm", []>,
- RegConstraint<"$false = $Rd"> {
- let Inst{31-27} = 0b11110;
- let Inst{25} = 1;
- let Inst{24-21} = 0b0010;
- let Inst{20} = 0; // The S bit.
- let Inst{15} = 0;
-
- bits<4> Rd;
- bits<16> imm;
-
- let Inst{11-8} = Rd;
- let Inst{19-16} = imm{15-12};
- let Inst{26} = imm{11};
- let Inst{14-12} = imm{10-8};
- let Inst{7-0} = imm{7-0};
-}
+def t2MOVCCi16
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, imm0_65535_expr:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false, imm0_65535:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
let isMoveImm = 1 in
-def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
- (ins rGPR:$false, i32imm:$src, pred:$p),
- IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">;
+def t2MVNCCi
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set rGPR:$Rd,
+ (ARMcmov rGPR:$false, t2_so_imm_not:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+class MOVCCShPseudo<SDPatternOperator opnode, Operand ty>
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVsi,
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false,
+ (opnode rGPR:$Rm, (i32 ty:$imm)),
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+def t2MOVCClsl : MOVCCShPseudo<shl, imm0_31>;
+def t2MOVCClsr : MOVCCShPseudo<srl, imm_sr>;
+def t2MOVCCasr : MOVCCShPseudo<sra, imm_sr>;
+def t2MOVCCror : MOVCCShPseudo<rotr, imm0_31>;
let isMoveImm = 1 in
-def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
- IIC_iCMOVi, "mvn", "\t$Rd, $imm",
-[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
- imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd"> {
- let Inst{31-27} = 0b11110;
- let Inst{25} = 0;
- let Inst{24-21} = 0b0011;
- let Inst{20} = 0; // The S bit.
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15} = 0;
-}
-
-class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = 0b0010;
- let Inst{20} = 0; // The S bit.
- let Inst{19-16} = 0b1111; // Rn
- let Inst{5-4} = opcod; // Shift type.
-}
-def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
- IIC_iCMOVsi, "lsl", ".w\t$Rd, $Rm, $imm", []>,
- RegConstraint<"$false = $Rd">;
-def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
- IIC_iCMOVsi, "lsr", ".w\t$Rd, $Rm, $imm", []>,
- RegConstraint<"$false = $Rd">;
-def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
- IIC_iCMOVsi, "asr", ".w\t$Rd, $Rm, $imm", []>,
- RegConstraint<"$false = $Rd">;
-def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
- IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
- RegConstraint<"$false = $Rd">;
+def t2MOVCCi32imm
+ : t2PseudoInst<(outs rGPR:$dst),
+ (ins rGPR:$false, i32imm:$src, cmovpred:$p),
+ 8, IIC_iCMOVix2,
+ [(set rGPR:$dst, (ARMcmov rGPR:$false, imm:$src,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $dst">;
} // isCodeGenOnly = 1
} // neverHasSideEffects
@@ -3055,40 +3194,38 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
-def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
- "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
- Requires<[IsThumb, HasDB]> {
+def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
+ "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>,
+ Requires<[HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f5;
let Inst{3-0} = opt;
}
}
-def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
- "dsb", "\t$opt", []>,
- Requires<[IsThumb, HasDB]> {
+def t2DSB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
+ "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>,
+ Requires<[HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f4;
let Inst{3-0} = opt;
}
-def t2ISB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
- "isb", "\t$opt",
- []>, Requires<[IsThumb, HasDB]> {
+def t2ISB : T2I<(outs), (ins instsyncb_opt:$opt), NoItinerary,
+ "isb", "\t$opt", []>, Requires<[HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f6;
let Inst{3-0} = opt;
}
-class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
+class T2I_ldrex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz,
InstrItinClass itin, string opc, string asm, string cstr,
list<dag> pattern, bits<4> rt2 = 0b1111>
: Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0001101;
let Inst{11-8} = rt2;
- let Inst{7-6} = 0b01;
- let Inst{5-4} = opcod;
+ let Inst{7-4} = opcod;
let Inst{3-0} = 0b1111;
bits<4> addr;
@@ -3096,15 +3233,14 @@ class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
let Inst{19-16} = addr;
let Inst{15-12} = Rt;
}
-class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
+class T2I_strex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz,
InstrItinClass itin, string opc, string asm, string cstr,
list<dag> pattern, bits<4> rt2 = 0b1111>
: Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0001100;
let Inst{11-8} = rt2;
- let Inst{7-6} = 0b01;
- let Inst{5-4} = opcod;
+ let Inst{7-4} = opcod;
bits<4> Rd;
bits<4> addr;
@@ -3115,15 +3251,18 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
}
let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+def t2LDREXB : T2I_ldrex<0b0100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
- "ldrexb", "\t$Rt, $addr", "", []>;
-def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ "ldrexb", "\t$Rt, $addr", "",
+ [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>;
+def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
- "ldrexh", "\t$Rt, $addr", "", []>;
+ "ldrexh", "\t$Rt, $addr", "",
+ [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>;
def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
AddrModeNone, 4, NoItinerary,
- "ldrex", "\t$Rt, $addr", "", []> {
+ "ldrex", "\t$Rt, $addr", "",
+ [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]> {
bits<4> Rt;
bits<12> addr;
let Inst{31-27} = 0b11101;
@@ -3134,7 +3273,7 @@ def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
let Inst{7-0} = addr{7-0};
}
let hasExtraDefRegAllocReq = 1 in
-def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
+def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2),
(ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexd", "\t$Rt, $Rt2, $addr", "",
@@ -3142,22 +3281,60 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
+def t2LDAEXB : T2I_ldrex<0b1100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaexb", "\t$Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]>;
+def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaexh", "\t$Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]>;
+def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaex", "\t$Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001101;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = 0b11101111;
+}
+let hasExtraDefRegAllocReq = 1 in
+def t2LDAEXD : T2I_ldrex<0b1111, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaexd", "\t$Rt, $Rt2, $addr", "",
+ [], {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+
+ let Inst{7} = 1;
+}
}
let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
-def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd),
+def t2STREXB : T2I_strex<0b0100, (outs rGPR:$Rd),
(ins rGPR:$Rt, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
- "strexb", "\t$Rd, $Rt, $addr", "", []>;
-def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd),
+ "strexb", "\t$Rd, $Rt, $addr", "",
+ [(set rGPR:$Rd, (strex_1 rGPR:$Rt,
+ addr_offset_none:$addr))]>;
+def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd),
(ins rGPR:$Rt, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
- "strexh", "\t$Rd, $Rt, $addr", "", []>;
+ "strexh", "\t$Rd, $Rt, $addr", "",
+ [(set rGPR:$Rd, (strex_2 rGPR:$Rt,
+ addr_offset_none:$addr))]>;
+
def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
t2addrmode_imm0_1020s4:$addr),
AddrModeNone, 4, NoItinerary,
"strex", "\t$Rd, $Rt, $addr", "",
- []> {
+ [(set rGPR:$Rd, (strex_4 rGPR:$Rt,
+ t2addrmode_imm0_1020s4:$addr))]> {
bits<4> Rd;
bits<4> Rt;
bits<12> addr;
@@ -3169,7 +3346,7 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
let Inst{7-0} = addr{7-0};
}
let hasExtraSrcRegAllocReq = 1 in
-def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
+def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
@@ -3177,9 +3354,45 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
+def t2STLEXB : T2I_strex<0b1100, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlexb", "\t$Rd, $Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]>;
+
+def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlexh", "\t$Rd, $Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]>;
+
+def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
+ addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlex", "\t$Rd, $Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rd;
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001100;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+ let Inst{11-4} = 0b11111110;
+ let Inst{3-0} = Rd;
+}
+let hasExtraSrcRegAllocReq = 1 in
+def t2STLEXD : T2I_strex<0b1111, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
+ {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
}
-def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>,
+def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", [(int_arm_clrex)]>,
Requires<[IsThumb2, HasV7]> {
let Inst{31-16} = 0xf3bf;
let Inst{15-14} = 0b10;
@@ -3190,6 +3403,15 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>,
let Inst{3-0} = 0b1111;
}
+def : T2Pat<(and (ldrex_1 addr_offset_none:$addr), 0xff),
+ (t2LDREXB addr_offset_none:$addr)>;
+def : T2Pat<(and (ldrex_2 addr_offset_none:$addr), 0xffff),
+ (t2LDREXH addr_offset_none:$addr)>;
+def : T2Pat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr),
+ (t2STREXB GPR:$Rt, addr_offset_none:$addr)>;
+def : T2Pat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
+ (t2STREXH GPR:$Rt, addr_offset_none:$addr)>;
+
//===----------------------------------------------------------------------===//
// SJLJ Exception handling intrinsics
// eh_sjlj_setjmp() is an instruction sequence to store the return
@@ -3243,35 +3465,39 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
let isPredicable = 1 in
def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
"b", ".w\t$target",
- [(br bb:$target)]> {
+ [(br bb:$target)]>, Sched<[WriteBr]> {
let Inst{31-27} = 0b11110;
let Inst{15-14} = 0b10;
let Inst{12} = 1;
bits<24> target;
- let Inst{26} = target{19};
- let Inst{11} = target{18};
- let Inst{13} = target{17};
+ let Inst{26} = target{23};
+ let Inst{13} = target{22};
+ let Inst{11} = target{21};
let Inst{25-16} = target{20-11};
let Inst{10-0} = target{10-0};
let DecoderMethod = "DecodeT2BInstruction";
-}
+ let AsmMatchConverter = "cvtThumbBranches";
+}
let isNotDuplicable = 1, isIndirectBranch = 1 in {
def t2BR_JT : t2PseudoInst<(outs),
(ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id),
0, IIC_Br,
- [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>;
+ [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>,
+ Sched<[WriteBr]>;
// FIXME: Add a non-pc based case that can be predicated.
def t2TBB_JT : t2PseudoInst<(outs),
- (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>;
+ (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>,
+ Sched<[WriteBr]>;
def t2TBH_JT : t2PseudoInst<(outs),
- (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>;
+ (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>,
+ Sched<[WriteBr]>;
def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
- "tbb", "\t$addr", []> {
+ "tbb", "\t$addr", []>, Sched<[WriteBrTbl]> {
bits<4> Rn;
bits<4> Rm;
let Inst{31-20} = 0b111010001101;
@@ -3284,7 +3510,7 @@ def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
}
def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br,
- "tbh", "\t$addr", []> {
+ "tbh", "\t$addr", []>, Sched<[WriteBrTbl]> {
bits<4> Rn;
bits<4> Rm;
let Inst{31-20} = 0b111010001101;
@@ -3304,7 +3530,7 @@ def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br,
let isBranch = 1, isTerminator = 1 in
def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
"b", ".w\t$target",
- [/*(ARMbrcond bb:$target, imm:$cc)*/]> {
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]>, Sched<[WriteBr]> {
let Inst{31-27} = 0b11110;
let Inst{15-14} = 0b10;
let Inst{12} = 0;
@@ -3320,6 +3546,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
let Inst{10-0} = target{11-1};
let DecoderMethod = "DecodeThumb2BCCInstruction";
+ let AsmMatchConverter = "cvtThumbBranches";
}
// Tail calls. The IOS version of thumb tail calls uses a t2 branch, so
@@ -3331,14 +3558,15 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
(ins uncondbrtarget:$dst, pred:$p),
4, IIC_Br, [],
(t2B uncondbrtarget:$dst, pred:$p)>,
- Requires<[IsThumb2, IsIOS]>;
+ Requires<[IsThumb2, IsIOS]>, Sched<[WriteBr]>;
}
// IT block
let Defs = [ITSTATE] in
def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
AddrModeNone, 2, IIC_iALUx,
- "it$mask\t$cc", "", []> {
+ "it$mask\t$cc", "", []>,
+ ComplexDeprecationPredicate<"IT"> {
// 16-bit instruction.
let Inst{31-16} = 0x0000;
let Inst{15-8} = 0b10111111;
@@ -3353,7 +3581,8 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
// Branch and Exchange Jazelle -- for disassembly only
// Rm = Inst{19-16}
-def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []> {
+def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>,
+ Sched<[WriteBr]> {
bits<4> func;
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -3367,7 +3596,7 @@ let isBranch = 1, isTerminator = 1 in {
def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
"cbz\t$Rn, $target", []>,
T1Misc<{0,0,?,1,?,?,?}>,
- Requires<[IsThumb2]> {
+ Requires<[IsThumb2]>, Sched<[WriteBr]> {
// A8.6.27
bits<6> target;
bits<3> Rn;
@@ -3379,7 +3608,7 @@ let isBranch = 1, isTerminator = 1 in {
def tCBNZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
"cbnz\t$Rn, $target", []>,
T1Misc<{1,0,?,1,?,?,?}>,
- Requires<[IsThumb2]> {
+ Requires<[IsThumb2]>, Sched<[WriteBr]> {
// A8.6.27
bits<6> target;
bits<3> Rn;
@@ -3411,27 +3640,34 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
let M = 1 in
def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
- "$imod.w\t$iflags, $mode">;
+ "$imod\t$iflags, $mode">;
let mode = 0, M = 0 in
def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags),
"$imod.w\t$iflags">;
let imod = 0, iflags = 0, M = 1 in
def t2CPS1p : t2CPS<(ins imm0_31:$mode), "\t$mode">;
+def : t2InstAlias<"cps$imod.w $iflags, $mode",
+ (t2CPS3p imod_op:$imod, iflags_op:$iflags, i32imm:$mode), 0>;
+def : t2InstAlias<"cps.w $mode", (t2CPS1p imm0_31:$mode), 0>;
+
// A6.3.4 Branches and miscellaneous control
// Table A6-14 Change Processor State, and hint instructions
-def t2HINT : T2I<(outs), (ins imm0_4:$imm), NoItinerary, "hint", "\t$imm",[]> {
- bits<3> imm;
+def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",[]> {
+ bits<8> imm;
let Inst{31-3} = 0b11110011101011111000000000000;
- let Inst{2-0} = imm;
+ let Inst{7-0} = imm;
}
-def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_4:$imm, pred:$p)>;
+def : t2InstAlias<"hint$p $imm", (t2HINT imm0_239:$imm, pred:$p)>;
def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>;
def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>;
def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>;
def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p)>;
def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p)>;
+def : t2InstAlias<"sevl$p.w", (t2HINT 5, pred:$p)> {
+ let Predicates = [IsThumb2, HasV8];
+}
def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
bits<4> opt;
@@ -3454,6 +3690,20 @@ def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
let Inst{19-16} = opt;
}
+class T2DCPS<bits<2> opt, string opc>
+ : T2I<(outs), (ins), NoItinerary, opc, "", []>, Requires<[IsThumb2, HasV8]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26-20} = 0b1111000;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = 0b1000;
+ let Inst{11-2} = 0b0000000000;
+ let Inst{1-0} = opt;
+}
+
+def t2DCPS1 : T2DCPS<0b01, "dcps1">;
+def t2DCPS2 : T2DCPS<0b10, "dcps2">;
+def t2DCPS3 : T2DCPS<0b11, "dcps3">;
+
class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: T2I<oops, iops, itin, opc, asm, pattern> {
@@ -3508,6 +3758,19 @@ def t2RFEIA : T2RFE<0b111010011001,
(outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn",
[/* For disassembly only; pattern left blank */]>;
+// B9.3.19 SUBS PC, LR, #imm (Thumb2) system instruction.
+// Exception return instruction is "subs pc, lr, #imm".
+let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in
+def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary,
+ "subs", "\tpc, lr, $imm",
+ [(ARMintretflag imm0_255:$imm)]>,
+ Requires<[IsThumb2]> {
+ let Inst{31-8} = 0b111100111101111010001111;
+
+ bits<8> imm;
+ let Inst{7-0} = imm;
+}
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
@@ -3591,7 +3854,7 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> {
let DecoderMethod = "DecodeCopMemInstruction";
}
def _PRE : T2CI<op31_28,
- (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
asm, "\t$cop, $CRd, $addr!"> {
bits<13> addr;
bits<4> cop;
@@ -3651,10 +3914,10 @@ defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">;
defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">;
defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">;
defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">;
-defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">;
-defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">;
-defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">;
-defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">;
+defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8]>;
+defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8]>;
+defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8]>;
+defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8]>;
//===----------------------------------------------------------------------===//
@@ -3666,7 +3929,7 @@ defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">;
//
// A/R class can only move from CPSR or SPSR.
def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr",
- []>, Requires<[IsThumb2,IsARClass]> {
+ []>, Requires<[IsThumb2,IsNotMClass]> {
bits<4> Rd;
let Inst{31-12} = 0b11110011111011111000;
let Inst{11-8} = Rd;
@@ -3676,7 +3939,7 @@ def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr",
def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>;
def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
- []>, Requires<[IsThumb2,IsARClass]> {
+ []>, Requires<[IsThumb2,IsNotMClass]> {
bits<4> Rd;
let Inst{31-12} = 0b11110011111111111000;
let Inst{11-8} = Rd;
@@ -3709,7 +3972,7 @@ def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary,
// the mask with the fields to be accessed in the special register.
def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn),
NoItinerary, "msr", "\t$mask, $Rn", []>,
- Requires<[IsThumb2,IsARClass]> {
+ Requires<[IsThumb2,IsNotMClass]> {
bits<5> mask;
bits<4> Rn;
let Inst{31-21} = 0b11110011100;
@@ -3742,8 +4005,7 @@ def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn),
class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
list<dag> pattern>
- : T2Cop<Op, oops, iops,
- !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+ : T2Cop<Op, oops, iops, opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2",
pattern> {
let Inst{27-24} = 0b1110;
let Inst{20} = direction;
@@ -3768,7 +4030,7 @@ class t2MovRRCopro<bits<4> Op, string opc, bit direction,
list<dag> pattern = []>
: T2Cop<Op, (outs),
(ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
- !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
+ opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
let Inst{27-24} = 0b1100;
let Inst{23-21} = 0b010;
let Inst{20} = direction;
@@ -3792,33 +4054,38 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>;
-def : t2InstAlias<"mcr $cop, $opc1, $Rt, $CRn, $CRm",
+ imm:$CRm, imm:$opc2)]>,
+ ComplexDeprecationPredicate<"MCR">;
+def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
(t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
- c_imm:$CRm, 0)>;
+ c_imm:$CRm, 0, pred:$p)>;
def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
(outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>;
-def : t2InstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
+ imm:$CRm, imm:$opc2)]> {
+ let Predicates = [IsThumb2, PreV8];
+}
+def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm",
(t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
- c_imm:$CRm, 0)>;
+ c_imm:$CRm, 0, pred:$p)>;
/* from coprocessor to ARM core register */
def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
- (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2), []>;
-def : t2InstAlias<"mrc $cop, $opc1, $Rt, $CRn, $CRm",
- (t2MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
- c_imm:$CRm, 0)>;
+def : t2InstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
- (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
- c_imm:$CRm, imm0_7:$opc2), []>;
-def : t2InstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
- (t2MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
- c_imm:$CRm, 0)>;
+ (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, imm0_7:$opc2), []> {
+ let Predicates = [IsThumb2, PreV8];
+}
+def : t2InstAlias<"mrc2${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
@@ -3833,19 +4100,24 @@ def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0,
imm:$CRm)]>;
def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0,
[(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
- GPR:$Rt2, imm:$CRm)]>;
+ GPR:$Rt2, imm:$CRm)]> {
+ let Predicates = [IsThumb2, PreV8];
+}
+
/* from coprocessor to ARM core register */
def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>;
-def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1>;
+def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1> {
+ let Predicates = [IsThumb2, PreV8];
+}
//===----------------------------------------------------------------------===//
// Other Coprocessor Instructions.
//
-def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
- "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
imm:$CRm, imm:$opc2)]> {
let Inst{27-24} = 0b1110;
@@ -3864,11 +4136,13 @@ def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
let Inst{15-12} = CRd;
let Inst{19-16} = CRn;
let Inst{23-20} = opc1;
+
+ let Predicates = [IsThumb2, PreV8];
}
def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
- "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ "cdp2", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
imm:$CRm, imm:$opc2)]> {
let Inst{27-24} = 0b1110;
@@ -3887,6 +4161,8 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
let Inst{15-12} = CRd;
let Inst{19-16} = CRn;
let Inst{23-20} = opc1;
+
+ let Predicates = [IsThumb2, PreV8];
}
@@ -3960,6 +4236,15 @@ def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val),
def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val),
(t2STRs GPR:$val, t2addrmode_so_reg:$addr)>;
+let AddedComplexity = 8 in {
+ def : T2Pat<(atomic_load_acquire_8 addr_offset_none:$addr), (t2LDAB addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (t2STLB GPR:$val, addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL GPR:$val, addr_offset_none:$addr)>;
+}
+
//===----------------------------------------------------------------------===//
// Assembler aliases
@@ -3981,7 +4266,8 @@ def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm",
// Aliases for ADD without the ".w" optional width specifier.
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
- (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+ (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
(t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm",
@@ -4056,9 +4342,9 @@ def : t2InstAlias<"tst${p} $Rn, $Rm",
(t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
// Memory barriers
-def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb, HasDB]>;
-def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb, HasDB]>;
-def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb, HasDB]>;
+def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p)>, Requires<[HasDB]>;
+def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p)>, Requires<[HasDB]>;
+def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p)>, Requires<[HasDB]>;
// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
// width specifier.
@@ -4085,7 +4371,7 @@ def : t2InstAlias<"ldrsh${p} $Rt, $addr",
(t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
def : t2InstAlias<"ldr${p} $Rt, $addr",
- (t2LDRpci GPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+ (t2LDRpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
def : t2InstAlias<"ldrb${p} $Rt, $addr",
(t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
def : t2InstAlias<"ldrh${p} $Rt, $addr",
@@ -4247,16 +4533,16 @@ def : t2InstAlias<"mvn${p} $Rd, $imm",
(t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
// Same for AND <--> BIC
def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
- (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ (t2ANDri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
- (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ (t2ANDri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
- (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ (t2BICri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : t2InstAlias<"and${s}${p} $Rdn, $imm",
- (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ (t2BICri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
// Likewise, "add Rd, t2_so_imm_neg" -> sub
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
@@ -4298,7 +4584,7 @@ def : t2InstAlias<"adr${p} $Rd, $addr",
// LDR(literal) w/ alternate [pc, #imm] syntax.
def t2LDRpcrel : t2AsmPseudo<"ldr${p} $Rt, $addr",
- (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+ (ins GPR:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
def t2LDRBpcrel : t2AsmPseudo<"ldrb${p} $Rt, $addr",
(ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
def t2LDRHpcrel : t2AsmPseudo<"ldrh${p} $Rt, $addr",
@@ -4309,7 +4595,7 @@ def t2LDRSHpcrel : t2AsmPseudo<"ldrsh${p} $Rt, $addr",
(ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
// Version w/ the .w suffix.
def : t2InstAlias<"ldr${p}.w $Rt, $addr",
- (t2LDRpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+ (t2LDRpcrel GPR:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p), 0>;
def : t2InstAlias<"ldrb${p}.w $Rt, $addr",
(t2LDRBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
def : t2InstAlias<"ldrh${p}.w $Rt, $addr",
@@ -4321,3 +4607,10 @@ def : t2InstAlias<"ldrsh${p}.w $Rt, $addr",
def : t2InstAlias<"add${p} $Rd, pc, $imm",
(t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>;
+
+// PLD/PLDW/PLI with alternate literal form.
+def : t2InstAlias<"pld${p} $addr",
+ (t2PLDpci t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : InstAlias<"pli${p} $addr",
+ (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p)>,
+ Requires<[IsThumb2,HasV7]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index b5a896c69985..a8cdc5ca0637 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -224,7 +224,36 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
(VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>;
-// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
+// FLDMX, FSTMX - Load and store multiple unknown precision registers for
+// pre-armv6 cores.
+// These instruction are deprecated so we don't want them to get selected.
+multiclass vfp_ldstx_mult<string asm, bit L_bit> {
+ // Unknown precision
+ def XIA :
+ AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeNone, !strconcat(asm, "iax${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def XIA_UPD :
+ AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeUpd, !strconcat(asm, "iax${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def XDB_UPD :
+ AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeUpd, !strconcat(asm, "dbx${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1;
+ let Inst{20} = L_bit;
+ }
+}
+
+defm FLDM : vfp_ldstx_mult<"fldm", 1>;
+defm FSTM : vfp_ldstx_mult<"fstm", 0>;
//===----------------------------------------------------------------------===//
// FP Binary Operations.
@@ -304,9 +333,52 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
let D = VFPNeonA8Domain;
}
+multiclass vsel_inst<string op, bits<2> opc, int CC> {
+ let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
+ Uses = [CPSR], AddedComplexity = 4 in {
+ def S : ASbInp<0b11100, opc, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"),
+ [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>,
+ Requires<[HasFPARMv8]>;
+
+ def D : ADbInp<0b11100, opc, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"),
+ [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
+ }
+}
+
+// The CC constants here match ARMCC::CondCodes.
+defm VSELGT : vsel_inst<"gt", 0b11, 12>;
+defm VSELGE : vsel_inst<"ge", 0b10, 10>;
+defm VSELEQ : vsel_inst<"eq", 0b00, 0>;
+defm VSELVS : vsel_inst<"vs", 0b01, 6>;
+
+multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
+ let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
+ def S : ASbInp<0b11101, 0b00, opc,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"),
+ [(set SPR:$Sd, (SD SPR:$Sn, SPR:$Sm))]>,
+ Requires<[HasFPARMv8]>;
+
+ def D : ADbInp<0b11101, 0b00, opc,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"),
+ [(set DPR:$Dd, (f64 (SD (f64 DPR:$Dn), (f64 DPR:$Dm))))]>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
+ }
+}
+
+defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, ARMvmaxnm>;
+defm VMINNM : vmaxmin_inst<"vminnm", 1, ARMvminnm>;
+
// Match reassociated forms only if not sign dependent rounding.
def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
- (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+ (VNMULD DPR:$a, DPR:$b)>,
+ Requires<[NoHonorSignDependentRounding,HasDPVFP]>;
def : Pat<(fmul (fneg SPR:$a), SPR:$b),
(VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
@@ -437,9 +509,11 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
let Inst{11-8} = 0b1011;
let Inst{7-6} = 0b11;
let Inst{4} = 0;
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
-// Between half-precision and single-precision. For disassembly only.
+// Between half, single and double-precision. For disassembly only.
// FIXME: Verify encoding after integrated assembler is working.
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
@@ -464,6 +538,111 @@ def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
+def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ // Instruction operands.
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+}
+
+def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm",
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm",
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ // Instruction operands.
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+}
+
+def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm",
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+}
+
+multiclass vcvt_inst<string opc, bits<2> rm> {
+ let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+ def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"),
+ []>, Requires<[HasFPARMv8]> {
+ let Inst{17-16} = rm;
+ }
+
+ def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"),
+ []>, Requires<[HasFPARMv8]> {
+ let Inst{17-16} = rm;
+ }
+
+ def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"),
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ bits<5> Dm;
+
+ let Inst{17-16} = rm;
+
+ // Encode instruction operands
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{8} = 1;
+ }
+
+ def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"),
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ bits<5> Dm;
+
+ let Inst{17-16} = rm;
+
+ // Encode instruction operands
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{8} = 1;
+ }
+ }
+}
+
+defm VCVTA : vcvt_inst<"a", 0b00>;
+defm VCVTN : vcvt_inst<"n", 0b01>;
+defm VCVTP : vcvt_inst<"p", 0b10>;
+defm VCVTM : vcvt_inst<"m", 0b11>;
+
def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
@@ -478,6 +657,63 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
let D = VFPNeonA8Domain;
}
+multiclass vrint_inst_zrx<string opc, bit op, bit op2> {
+ def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm",
+ []>, Requires<[HasFPARMv8]> {
+ let Inst{7} = op2;
+ let Inst{16} = op;
+ }
+ def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm",
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ let Inst{7} = op2;
+ let Inst{16} = op;
+ }
+
+ def : InstAlias<!strconcat("vrint", opc, "$p.f32.f32\t$Sd, $Sm"),
+ (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p)>,
+ Requires<[HasFPARMv8]>;
+ def : InstAlias<!strconcat("vrint", opc, "$p.f64.f64\t$Dd, $Dm"),
+ (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm, pred:$p)>,
+ Requires<[HasFPARMv8,HasDPVFP]>;
+}
+
+defm VRINTZ : vrint_inst_zrx<"z", 0, 1>;
+defm VRINTR : vrint_inst_zrx<"r", 0, 0>;
+defm VRINTX : vrint_inst_zrx<"x", 1, 0>;
+
+multiclass vrint_inst_anpm<string opc, bits<2> rm> {
+ let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+ def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"),
+ []>, Requires<[HasFPARMv8]> {
+ let Inst{17-16} = rm;
+ }
+ def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"),
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ let Inst{17-16} = rm;
+ }
+ }
+
+ def : InstAlias<!strconcat("vrint", opc, ".f32.f32\t$Sd, $Sm"),
+ (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm)>,
+ Requires<[HasFPARMv8]>;
+ def : InstAlias<!strconcat("vrint", opc, ".f64.f64\t$Dd, $Dm"),
+ (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>,
+ Requires<[HasFPARMv8,HasDPVFP]>;
+}
+
+defm VRINTA : vrint_inst_anpm<"a", 0b00>;
+defm VRINTN : vrint_inst_anpm<"n", 0b01>;
+defm VRINTP : vrint_inst_anpm<"p", 0b10>;
+defm VRINTM : vrint_inst_anpm<"m", 0b11>;
+
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
@@ -667,6 +903,8 @@ class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{5} = Sm{0};
let Inst{15-12} = Dd{3-0};
let Inst{22} = Dd{4};
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -738,6 +976,8 @@ class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{5} = Dm{4};
let Inst{15-12} = Sd{4-1};
let Inst{22} = Sd{0};
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -841,7 +1081,8 @@ let Constraints = "$a = $dst" in {
class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
bit op5, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
+ Sched<[WriteCvtFP]> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{0};
@@ -852,11 +1093,14 @@ class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
bit op5, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
+ Sched<[WriteCvtFP]> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{4};
let Inst{15-12} = dst{3-0};
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
@@ -969,7 +1213,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -985,7 +1229,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
@@ -996,7 +1240,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1012,7 +1256,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1023,7 +1267,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1039,7 +1283,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1050,7 +1294,7 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1065,7 +1309,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
(VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1079,7 +1323,7 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1094,7 +1338,7 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
@@ -1103,7 +1347,7 @@ def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
// (fma x, y, z) -> (vfms z, x, y)
def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
(VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
(VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1114,7 +1358,7 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1129,7 +1373,7 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
@@ -1138,14 +1382,14 @@ def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
// (fma (fneg x), y, z) -> (vfms z, x, y)
def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
// (fma x, (fneg y), z) -> (vfms z, x, y)
def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1156,7 +1400,7 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1171,7 +1415,7 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
(VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
@@ -1180,14 +1424,14 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
// (fneg (fma x, y, z)) -> (vfnma z, x, y)
def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1198,7 +1442,7 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1212,7 +1456,7 @@ def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
(VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
@@ -1222,21 +1466,21 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
// (fma x, y, (fneg z)) -> (vfnms z, x, y))
def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1246,15 +1490,17 @@ def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
//
let neverHasSideEffects = 1 in {
-def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p),
- 4, IIC_fpUNA64,
- [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
- RegConstraint<"$Dn = $Dd">;
-
-def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p),
- 4, IIC_fpUNA32,
- [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
- RegConstraint<"$Sn = $Sd">;
+def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p),
+ IIC_fpUNA64,
+ [(set (f64 DPR:$Dd),
+ (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>,
+ RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2,HasDPVFP]>;
+
+def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
+ IIC_fpUNA32,
+ [(set (f32 SPR:$Sd),
+ (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
+ RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>;
} // neverHasSideEffects
//===----------------------------------------------------------------------===//
@@ -1300,6 +1546,12 @@ let Uses = [FPSCR] in {
"vmrs", "\t$Rt, mvfr0", []>;
def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins),
"vmrs", "\t$Rt, mvfr1", []>;
+ def VMRS_MVFR2 : MovFromVFP<0b0101 /* mvfr2 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr2", []>, Requires<[HasFPARMv8]>;
+ def VMRS_FPINST : MovFromVFP<0b1001 /* fpinst */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpinst", []>;
+ def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpinst2", []>;
}
//===----------------------------------------------------------------------===//
@@ -1333,6 +1585,11 @@ let Defs = [FPSCR] in {
// System level GPR -> FPSID
def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src),
"vmsr", "\tfpsid, $src", []>;
+
+ def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpinst, $src", []>;
+ def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpinst2, $src", []>;
}
//===----------------------------------------------------------------------===//
@@ -1344,7 +1601,8 @@ let isReMaterializable = 1 in {
def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
VFPMiscFrm, IIC_fpUNA64,
"vmov", ".f64\t$Dd, $imm",
- [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+ [(set DPR:$Dd, vfp_f64imm:$imm)]>,
+ Requires<[HasVFP3,HasDPVFP]> {
bits<5> Dd;
bits<8> imm;
@@ -1426,23 +1684,23 @@ def : VFP2MnemonicAlias<"fmrx", "vmrs">;
def : VFP2MnemonicAlias<"fmxr", "vmsr">;
// Be friendly and accept the old form of zero-compare
-def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
+def : VFP2DPInstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
(VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
-def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm",
- (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm",
+ (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm",
(VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
-def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm",
- (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2DPInstAlias<"fsubd${p} $Dd, $Dn, $Dm",
+ (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
// No need for the size suffix on VSQRT. It's implied by the register classes.
def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>;
-def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : VFP2DPInstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
// VLDR/VSTR accept an optional type suffix.
def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr",
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index c8ed5760f935..61596d54b692 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -90,6 +90,10 @@ namespace {
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
+ void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
+ const MemOpQueue &MemOps, unsigned DefReg,
+ unsigned RangeBegin, unsigned RangeEnd);
+
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, bool BaseKill, int Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
@@ -109,12 +113,12 @@ namespace {
unsigned PredReg,
unsigned Scratch,
DebugLoc dl,
- SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+ SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
int Opcode, unsigned Size,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned Scratch, MemOpQueue &MemOps,
- SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+ SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
@@ -360,6 +364,62 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return true;
}
+/// \brief Find all instructions using a given imp-def within a range.
+///
+/// We are trying to combine a range of instructions, one of which (located at
+/// position RangeBegin) implicitly defines a register. The final LDM/STM will
+/// be placed at RangeEnd, and so any uses of this definition between RangeStart
+/// and RangeEnd must be modified to use an undefined value.
+///
+/// The live range continues until we find a second definition or one of the
+/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
+/// we must consider all uses and decide which are relevant in a second pass.
+void ARMLoadStoreOpt::findUsesOfImpDef(
+ SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
+ unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
+ std::map<unsigned, MachineOperand *> Uses;
+ unsigned LastLivePos = RangeEnd;
+
+ // First we find all uses of this register with Position between RangeBegin
+ // and RangeEnd, any or all of these could be uses of a definition at
+ // RangeBegin. We also record the latest position a definition at RangeBegin
+ // would be considered live.
+ for (unsigned i = 0; i < MemOps.size(); ++i) {
+ MachineInstr &MI = *MemOps[i].MBBI;
+ unsigned MIPosition = MemOps[i].Position;
+ if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
+ continue;
+
+ // If this instruction defines the register, then any later use will be of
+ // that definition rather than ours.
+ if (MI.definesRegister(DefReg))
+ LastLivePos = std::min(LastLivePos, MIPosition);
+
+ MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
+ if (!UseOp)
+ continue;
+
+ // If this instruction kills the register then (assuming liveness is
+ // correct when we start) we don't need to think about anything after here.
+ if (UseOp->isKill())
+ LastLivePos = std::min(LastLivePos, MIPosition);
+
+ Uses[MIPosition] = UseOp;
+ }
+
+ // Now we traverse the list of all uses, and append the ones that actually use
+ // our definition to the requested list.
+ for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
+ E = Uses.end();
+ I != E; ++I) {
+ // List is sorted by position so once we've found one out of range there
+ // will be no more to consider.
+ if (I->first > LastLivePos)
+ break;
+ UsesOfImpDefs.push_back(I->second);
+ }
+}
+
// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
// success.
void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
@@ -371,7 +431,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned Scratch,
DebugLoc dl,
- SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+ SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
// First calculate which of the registers should be killed by the merged
// instruction.
const unsigned insertPos = memOps[insertAfter].Position;
@@ -392,6 +452,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
SmallVector<std::pair<unsigned, bool>, 8> Regs;
SmallVector<unsigned, 8> ImpDefs;
+ SmallVector<MachineOperand *, 8> UsesOfImpDefs;
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
unsigned Reg = memOps[i].Reg;
// If we are inserting the merged operation after an operation that
@@ -406,6 +467,12 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
unsigned DefReg = MO->getReg();
if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
ImpDefs.push_back(DefReg);
+
+ // There may be other uses of the definition between this instruction and
+ // the eventual LDM/STM position. These should be marked undef if the
+ // merge takes place.
+ findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
+ insertPos);
}
}
@@ -418,6 +485,16 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
// Merge succeeded, update records.
Merges.push_back(prior(Loc));
+
+ // In gathering loads together, we may have moved the imp-def of a register
+ // past one of its uses. This is OK, since we know better than the rest of
+ // LLVM what's OK with ARM loads and stores; but we still have to adjust the
+ // affected uses.
+ for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
+ E = UsesOfImpDefs.end();
+ I != E; ++I)
+ (*I)->setIsUndef();
+
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
// Remove kill flags from any memops that come before insertPos.
if (Regs[i-memOpsBegin].second) {
@@ -444,10 +521,10 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
/// load / store multiple instructions.
void
ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
- unsigned Base, int Opcode, unsigned Size,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned Scratch, MemOpQueue &MemOps,
- SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+ unsigned Base, int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps,
+ SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
int Offset = MemOps[SIndex].Offset;
int SOffset = Offset;
@@ -489,7 +566,10 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
if (Reg != ARM::SP &&
NewOffset == Offset + (int)Size &&
((isNotVFP && RegNum > PRegNum) ||
- ((Count < Limit) && RegNum == PRegNum+1))) {
+ ((Count < Limit) && RegNum == PRegNum+1)) &&
+ // On Swift we don't want vldm/vstm to start with a odd register num
+ // because Q register unaligned vldm/vstm need more uops.
+ (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
Offset += Size;
PRegNum = RegNum;
++Count;
@@ -1484,7 +1564,7 @@ namespace {
unsigned &PredReg, ARMCC::CondCodes &Pred,
bool &isT2);
bool RescheduleOps(MachineBasicBlock *MBB,
- SmallVector<MachineInstr*, 4> &Ops,
+ SmallVectorImpl<MachineInstr *> &Ops,
unsigned Base, bool isLd,
DenseMap<MachineInstr*, unsigned> &MI2LocMap);
bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
@@ -1602,8 +1682,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
return false;
// Make sure the base address satisfies i64 ld / st alignment requirement.
+ // At the moment, we ignore the memoryoperand's value.
+ // If we want to use AliasAnalysis, we should check it accordingly.
if (!Op0->hasOneMemOperand() ||
- !(*Op0->memoperands_begin())->getValue() ||
(*Op0->memoperands_begin())->isVolatile())
return false;
@@ -1655,7 +1736,7 @@ namespace {
}
bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
- SmallVector<MachineInstr*, 4> &Ops,
+ SmallVectorImpl<MachineInstr *> &Ops,
unsigned Base, bool isLd,
DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
bool RetVal = false;
@@ -1857,9 +1938,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
if (!StopHere)
BI->second.push_back(MI);
} else {
- SmallVector<MachineInstr*, 4> MIs;
- MIs.push_back(MI);
- Base2LdsMap[Base] = MIs;
+ Base2LdsMap[Base].push_back(MI);
LdBases.push_back(Base);
}
} else {
@@ -1875,9 +1954,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
if (!StopHere)
BI->second.push_back(MI);
} else {
- SmallVector<MachineInstr*, 4> MIs;
- MIs.push_back(MI);
- Base2StsMap[Base] = MIs;
+ Base2StsMap[Base].push_back(MI);
StBases.push_back(Base);
}
}
@@ -1893,7 +1970,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
// Re-schedule loads.
for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
unsigned Base = LdBases[i];
- SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
+ SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
if (Lds.size() > 1)
RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
}
@@ -1901,7 +1978,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
// Re-schedule stores.
for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
unsigned Base = StBases[i];
- SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
+ SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
if (Sts.size() > 1)
RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
index b6414832003d..e12c9c61ab14 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -82,7 +82,7 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
MO.getMBB()->getSymbol(), OutContext));
break;
case MachineOperand::MO_GlobalAddress:
- MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+ MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal()));
break;
case MachineOperand::MO_ExternalSymbol:
MCOp = GetSymbolRef(MO,
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index f4248fcfcc75..010edf33cea4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -36,6 +36,13 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// 'isThumb'.
bool hasThumb2;
+ /// StByValParamsPadding - For parameter that is split between
+ /// GPRs and memory; while recovering GPRs part, when
+ /// StackAlignment == 8, and GPRs-part-size mod 8 != 0,
+ /// we need to insert gap before parameter start address. It allows to
+ /// "attach" GPR-part to the part that was passed via stack.
+ unsigned StByValParamsPadding;
+
/// VarArgsRegSaveSize - Size of the register save area for vararg functions.
///
unsigned ArgRegsSaveSize;
@@ -77,12 +84,6 @@ class ARMFunctionInfo : public MachineFunctionInfo {
unsigned GPRCS2Size;
unsigned DPRCSSize;
- /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
- /// which belong to these spill areas.
- BitVector GPRCS1Frames;
- BitVector GPRCS2Frames;
- BitVector DPRCSFrames;
-
/// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
/// the aligned portion of the stack frame. This is always a contiguous
/// sequence of D-registers starting from d8.
@@ -121,7 +122,6 @@ public:
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
NumAlignedDPRCS2Regs(0),
JumpTableUId(0), PICLabelUId(0),
VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
@@ -129,11 +129,11 @@ public:
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+ StByValParamsPadding(0),
ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
JumpTableUId(0), PICLabelUId(0),
VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
@@ -141,7 +141,14 @@ public:
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
bool isThumb2Function() const { return isThumb && hasThumb2; }
- unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
+ unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; }
+ void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; }
+
+ unsigned getArgRegsSaveSize(unsigned Align = 0) const {
+ if (!Align)
+ return ArgRegsSaveSize;
+ return (ArgRegsSaveSize + Align - 1) & ~(Align - 1);
+ }
void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
bool hasStackFrame() const { return HasStackFrame; }
@@ -175,59 +182,6 @@ public:
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
- bool isGPRCalleeSavedArea1Frame(int fi) const {
- if (fi < 0 || fi >= (int)GPRCS1Frames.size())
- return false;
- return GPRCS1Frames[fi];
- }
- bool isGPRCalleeSavedArea2Frame(int fi) const {
- if (fi < 0 || fi >= (int)GPRCS2Frames.size())
- return false;
- return GPRCS2Frames[fi];
- }
- bool isDPRCalleeSavedAreaFrame(int fi) const {
- if (fi < 0 || fi >= (int)DPRCSFrames.size())
- return false;
- return DPRCSFrames[fi];
- }
-
- void addGPRCalleeSavedArea1Frame(int fi) {
- if (fi >= 0) {
- int Size = GPRCS1Frames.size();
- if (fi >= Size) {
- Size *= 2;
- if (fi >= Size)
- Size = fi+1;
- GPRCS1Frames.resize(Size);
- }
- GPRCS1Frames[fi] = true;
- }
- }
- void addGPRCalleeSavedArea2Frame(int fi) {
- if (fi >= 0) {
- int Size = GPRCS2Frames.size();
- if (fi >= Size) {
- Size *= 2;
- if (fi >= Size)
- Size = fi+1;
- GPRCS2Frames.resize(Size);
- }
- GPRCS2Frames[fi] = true;
- }
- }
- void addDPRCalleeSavedAreaFrame(int fi) {
- if (fi >= 0) {
- int Size = DPRCSFrames.size();
- if (fi >= Size) {
- Size *= 2;
- if (fi >= Size)
- Size = fi+1;
- DPRCSFrames.resize(Size);
- }
- DPRCSFrames[fi] = true;
- }
- }
-
unsigned createJumpTableUId() {
return JumpTableUId++;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
index 6f3819afd0a5..a7880364d89a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -18,7 +18,6 @@ using namespace llvm;
void ARMRegisterInfo::anchor() { }
-ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii,
- const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(tii, sti) {
+ARMRegisterInfo::ARMRegisterInfo(const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(sti) {
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
index 8a248425c33c..fb1537cf9425 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
@@ -19,13 +19,13 @@
#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
- class ARMSubtarget;
- class ARMBaseInstrInfo;
+
+class ARMSubtarget;
struct ARMRegisterInfo : public ARMBaseRegisterInfo {
virtual void anchor();
public:
- ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+ ARMRegisterInfo(const ARMSubtarget &STI);
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
index b0f576bc2b6f..d0457618ef6f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -27,31 +27,31 @@ class ARMFReg<bits<16> Enc, string n> : Register<n> {
// Subregister indices.
let Namespace = "ARM" in {
-def qqsub_0 : SubRegIndex;
-def qqsub_1 : SubRegIndex;
+def qqsub_0 : SubRegIndex<256>;
+def qqsub_1 : SubRegIndex<256, 256>;
// Note: Code depends on these having consecutive numbers.
-def qsub_0 : SubRegIndex;
-def qsub_1 : SubRegIndex;
-def qsub_2 : SubRegIndex<[qqsub_1, qsub_0]>;
-def qsub_3 : SubRegIndex<[qqsub_1, qsub_1]>;
-
-def dsub_0 : SubRegIndex;
-def dsub_1 : SubRegIndex;
-def dsub_2 : SubRegIndex<[qsub_1, dsub_0]>;
-def dsub_3 : SubRegIndex<[qsub_1, dsub_1]>;
-def dsub_4 : SubRegIndex<[qsub_2, dsub_0]>;
-def dsub_5 : SubRegIndex<[qsub_2, dsub_1]>;
-def dsub_6 : SubRegIndex<[qsub_3, dsub_0]>;
-def dsub_7 : SubRegIndex<[qsub_3, dsub_1]>;
-
-def ssub_0 : SubRegIndex;
-def ssub_1 : SubRegIndex;
-def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>;
-def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>;
-
-def gsub_0 : SubRegIndex;
-def gsub_1 : SubRegIndex;
+def qsub_0 : SubRegIndex<128>;
+def qsub_1 : SubRegIndex<128, 128>;
+def qsub_2 : ComposedSubRegIndex<qqsub_1, qsub_0>;
+def qsub_3 : ComposedSubRegIndex<qqsub_1, qsub_1>;
+
+def dsub_0 : SubRegIndex<64>;
+def dsub_1 : SubRegIndex<64, 64>;
+def dsub_2 : ComposedSubRegIndex<qsub_1, dsub_0>;
+def dsub_3 : ComposedSubRegIndex<qsub_1, dsub_1>;
+def dsub_4 : ComposedSubRegIndex<qsub_2, dsub_0>;
+def dsub_5 : ComposedSubRegIndex<qsub_2, dsub_1>;
+def dsub_6 : ComposedSubRegIndex<qsub_3, dsub_0>;
+def dsub_7 : ComposedSubRegIndex<qsub_3, dsub_1>;
+
+def ssub_0 : SubRegIndex<32>;
+def ssub_1 : SubRegIndex<32, 32>;
+def ssub_2 : ComposedSubRegIndex<dsub_1, ssub_0>;
+def ssub_3 : ComposedSubRegIndex<dsub_1, ssub_1>;
+
+def gsub_0 : SubRegIndex<32>;
+def gsub_1 : SubRegIndex<32, 32>;
// Let TableGen synthesize the remaining 12 ssub_* indices.
// We don't need to name them.
}
@@ -157,21 +157,27 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>;
// Current Program Status Register.
// We model fpscr with two registers: FPSCR models the control bits and will be
-// reserved. FPSCR_NZCV models the flag bits and will be unreserved.
-def CPSR : ARMReg<0, "cpsr">;
-def APSR : ARMReg<1, "apsr">;
-def SPSR : ARMReg<2, "spsr">;
-def FPSCR : ARMReg<3, "fpscr">;
-def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
+// reserved. FPSCR_NZCV models the flag bits and will be unreserved. APSR_NZCV
+// models the APSR when it's accessed by some special instructions. In such cases
+// it has the same encoding as PC.
+def CPSR : ARMReg<0, "cpsr">;
+def APSR : ARMReg<1, "apsr">;
+def APSR_NZCV : ARMReg<15, "apsr_nzcv">;
+def SPSR : ARMReg<2, "spsr">;
+def FPSCR : ARMReg<3, "fpscr">;
+def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
let Aliases = [FPSCR];
}
def ITSTATE : ARMReg<4, "itstate">;
// Special Registers - only available in privileged mode.
-def FPSID : ARMReg<0, "fpsid">;
-def MVFR1 : ARMReg<6, "mvfr1">;
-def MVFR0 : ARMReg<7, "mvfr0">;
-def FPEXC : ARMReg<8, "fpexc">;
+def FPSID : ARMReg<0, "fpsid">;
+def MVFR2 : ARMReg<5, "mvfr2">;
+def MVFR1 : ARMReg<6, "mvfr1">;
+def MVFR0 : ARMReg<7, "mvfr0">;
+def FPEXC : ARMReg<8, "fpexc">;
+def FPINST : ARMReg<9, "fpinst">;
+def FPINST2 : ARMReg<10, "fpinst2">;
// Register classes.
//
@@ -207,6 +213,16 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
}];
}
+// GPRs without the PC but with APSR. Some instructions allow accessing the
+// APSR, while actually encoding PC in the register field. This is usefull
+// for assembly and disassembly only.
+def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV)> {
+ let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+}
+
// GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the
// implied SP argument list.
// FIXME: It would be better to not use this at all and refactor the
@@ -236,7 +252,7 @@ def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
// to the saved value before the tail call, which would clobber a call address.
// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
// this class and the preceding one(!) This is what we want.
-def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> {
+def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R12)> {
let AltOrders = [(and tcGPR, tGPR)];
let AltOrderSelect = [{
return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
index 2d088de96e27..528c4ec73781 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSchedule.td
+++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
@@ -69,6 +69,24 @@ def WriteCMP : SchedWrite;
def WriteCMPsi : SchedWrite;
def WriteCMPsr : SchedWrite;
+// Division.
+def WriteDiv : SchedWrite;
+
+// Loads.
+def WriteLd : SchedWrite;
+def WritePreLd : SchedWrite;
+
+// Branches.
+def WriteBr : SchedWrite;
+def WriteBrL : SchedWrite;
+def WriteBrTbl : SchedWrite;
+
+// Fixpoint conversions.
+def WriteCvtFP : SchedWrite;
+
+// Noop.
+def WriteNoop : SchedWrite;
+
// Define TII for use in SchedVariant Predicates.
def : PredicateProlog<[{
const ARMBaseInstrInfo *TII =
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
index 9739ed20ce2e..603e775d351d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
@@ -1879,17 +1879,18 @@ def CortexA9Itineraries : ProcessorItineraries<
// The following definitions describe the simpler per-operand machine model.
// This works with MachineScheduler and will eventually replace itineraries.
+class A9WriteLMOpsListType<list<WriteSequence> writes> {
+ list <WriteSequence> Writes = writes;
+ SchedMachineModel SchedModel = ?;
+}
// Cortex-A9 machine model for scheduling and other instruction cost heuristics.
def CortexA9Model : SchedMachineModel {
let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
- let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
+ let MicroOpBufferSize = 56; // Based on available renamed registers.
let LoadLatency = 2; // Optimistic load latency assuming bypass.
// This is overriden by OperandCycles if the
// Itineraries are queried instead.
- let ILPWindow = 10; // Don't reschedule small blocks to hide
- // latency. Minimum latency requirements are already
- // modeled strictly by reserving resources.
let MispredictPenalty = 8; // Based on estimate of pipeline depth.
let Itineraries = CortexA9Itineraries;
@@ -1904,7 +1905,7 @@ def A9UnitALU : ProcResource<2>;
def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
def A9UnitAGU : ProcResource<1>;
def A9UnitLS : ProcResource<1>;
-def A9UnitFP : ProcResource<1> { let Buffered = 0; }
+def A9UnitFP : ProcResource<1> { let BufferSize = 0; }
def A9UnitB : ProcResource<1>;
//===----------------------------------------------------------------------===//
@@ -2014,7 +2015,7 @@ def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
// Define a predicate to select the LDM based on number of memory addresses.
def A9LMAdr#NumAddr#Pred :
- SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
+ SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>;
} // foreach NumAddr
@@ -2057,48 +2058,30 @@ def A9WriteL#NumAddr#Hi : WriteSequence<
//===----------------------------------------------------------------------===//
// LDM: Load multiple into 32-bit integer registers.
+def A9WriteLMOpsList : A9WriteLMOpsListType<
+ [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi,
+ A9WriteL6, A9WriteL6Hi,
+ A9WriteL7, A9WriteL7Hi,
+ A9WriteL8, A9WriteL8Hi]>;
+
// A9WriteLM variants expand into a pair of writes for each 64-bit
// value loaded. When the number of registers is odd, the last
// A9WriteLnHi is naturally ignored because the instruction has no
// following def operands. These variants take no issue resource, so
// they may need to be part of a WriteSequence that includes A9WriteIssue.
def A9WriteLM : SchedWriteVariant<[
- SchedVar<A9LMAdr1Pred, [A9WriteL1, A9WriteL1Hi]>,
- SchedVar<A9LMAdr2Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi]>,
- SchedVar<A9LMAdr3Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi]>,
- SchedVar<A9LMAdr4Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi]>,
- SchedVar<A9LMAdr5Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi]>,
- SchedVar<A9LMAdr6Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi,
- A9WriteL6, A9WriteL6Hi]>,
- SchedVar<A9LMAdr7Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi,
- A9WriteL6, A9WriteL6Hi,
- A9WriteL7, A9WriteL7Hi]>,
- SchedVar<A9LMAdr8Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi,
- A9WriteL6, A9WriteL6Hi,
- A9WriteL7, A9WriteL7Hi,
- A9WriteL8, A9WriteL8Hi]>,
+ SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>,
+ SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>,
+ SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>,
+ SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>,
+ SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>,
+ SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>,
+ SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>,
+ SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>,
// For unknown LDMs, define the maximum number of writes, but only
// make the first two consume resources.
SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
@@ -2180,49 +2163,39 @@ def A9WriteLMfp#NumAddr#Hi : WriteSequence<
// pair of writes for each 64-bit data loaded. When the number of
// registers is odd, the last WriteLMfpnHi is naturally ignored because
// the instruction has no following def operands.
+
+def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType<
+ [A9WriteLMfp1, A9WriteLMfp2, // 0-1
+ A9WriteLMfp3, A9WriteLMfp4, // 2-3
+ A9WriteLMfp5, A9WriteLMfp6, // 4-5
+ A9WriteLMfp7, A9WriteLMfp8, // 6-7
+ A9WriteLMfp1Hi, // 8-8
+ A9WriteLMfp2Hi, A9WriteLMfp2Hi, // 9-10
+ A9WriteLMfp3Hi, A9WriteLMfp3Hi, // 11-12
+ A9WriteLMfp4Hi, A9WriteLMfp4Hi, // 13-14
+ A9WriteLMfp5Hi, A9WriteLMfp5Hi, // 15-16
+ A9WriteLMfp6Hi, A9WriteLMfp6Hi, // 17-18
+ A9WriteLMfp7Hi, A9WriteLMfp7Hi, // 19-20
+ A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22
+
def A9WriteLMfpPostRA : SchedWriteVariant<[
- SchedVar<A9LMAdr1Pred, [A9WriteLMfp1, A9WriteLMfp1Hi]>,
- SchedVar<A9LMAdr2Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi]>,
- SchedVar<A9LMAdr3Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi]>,
- SchedVar<A9LMAdr4Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi]>,
- SchedVar<A9LMAdr5Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi]>,
- SchedVar<A9LMAdr6Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi,
- A9WriteLMfp6, A9WriteLMfp6Hi]>,
- SchedVar<A9LMAdr7Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi,
- A9WriteLMfp6, A9WriteLMfp6Hi,
- A9WriteLMfp7, A9WriteLMfp7Hi]>,
- SchedVar<A9LMAdr8Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi,
- A9WriteLMfp6, A9WriteLMfp6Hi,
- A9WriteLMfp7, A9WriteLMfp7Hi,
- A9WriteLMfp8, A9WriteLMfp8Hi]>,
+ SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>,
+ SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>,
+ SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>,
+ SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>,
+ SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>,
+ SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>,
+ SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>,
+ SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>,
// For unknown LDMs, define the maximum number of writes, but only
- // make the first two consume resources.
- SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3Hi, A9WriteLMfp3Hi,
- A9WriteLMfp4Hi, A9WriteLMfp4Hi,
+ // make the first two consume resources. We are optimizing for the case
+ // where the operands are DPRs, and this determines the first eight
+ // types. The remaining eight types are filled to cover the case
+ // where the operands are SPRs.
+ SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2,
+ A9WriteLMfp3Hi, A9WriteLMfp4Hi,
+ A9WriteLMfp5Hi, A9WriteLMfp6Hi,
+ A9WriteLMfp7Hi, A9WriteLMfp8Hi,
A9WriteLMfp5Hi, A9WriteLMfp5Hi,
A9WriteLMfp6Hi, A9WriteLMfp6Hi,
A9WriteLMfp7Hi, A9WriteLMfp7Hi,
@@ -2275,10 +2248,10 @@ def A9Read4 : SchedReadAdvance<3>;
// This table follows the ARM Cortex-A9 Technical Reference Manuals,
// mostly in order.
-def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
+def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
IIC_iMVNi,IIC_iMVNsi,
IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
-def :ItinRW<[A9WriteI,A9ReadALU],[IIC_iMVNr]>;
+def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>;
def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>;
@@ -2487,10 +2460,59 @@ def : SchedAlias<WriteALUsr, A9WriteALUsr>;
def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
def : SchedAlias<ReadALU, A9ReadALU>;
def : SchedAlias<ReadALUsr, A9ReadALU>;
-// FIXME: need to special case AND, ORR, EOR, BIC because they don't read
-// advance. But our instrinfo claims it does.
+def : InstRW< [WriteALU],
+ (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
+ "BICrr")>;
+def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>;
+def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>;
+
def : SchedAlias<WriteCMP, A9WriteALU>;
def : SchedAlias<WriteCMPsi, A9WriteALU>;
def : SchedAlias<WriteCMPsr, A9WriteALU>;
+
+def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi",
+ "MOVCCsr")>;
+def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>;
+def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm",
+ "MOV_ga_dyn")>;
+def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>;
+def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
+
+def : InstRW< [WriteALU], (instregex "SEL")>;
+
+def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>;
+
+def : InstRW< [A9WriteM],
+ (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS",
+ "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>;
+def : InstRW< [A9WriteM, A9WriteMHi],
+ (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL",
+ "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB",
+ "SMLALTT")>;
+// FIXME: These instructions used to have NoItinerary. Just copied the one from above.
+def : InstRW< [A9WriteM, A9WriteMHi],
+ (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX",
+ "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>;
+
+def : InstRW<[A9WriteM16, A9WriteM16Hi],
+ (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>;
+def : InstRW<[A9WriteM16, A9WriteM16Hi],
+ (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>;
+
+def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>;
+def : InstRW<[A9WriteLsi], (instregex "LDRrs")>;
+def : InstRW<[A9WriteLb],
+ (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB",
+ "LDRH", "LDRSH", "LDRSB")>;
+def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>;
+
+def : WriteRes<WriteDiv, []> { let Latency = 0; }
+
+def : WriteRes<WriteBr, [A9UnitB]>;
+def : WriteRes<WriteBrL, [A9UnitB]>;
+def : WriteRes<WriteBrTbl, [A9UnitB]>;
+def : WriteRes<WritePreLd, []>;
+def : SchedAlias<WriteCvtFP, A9WriteF>;
+def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
} // SchedModel = CortexA9Model
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
index 7c6df410706e..8d7dbc246095 100644
--- a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
@@ -1076,7 +1076,7 @@ def SwiftItineraries : ProcessorItineraries<
// Swift machine model for scheduling and other instruction cost heuristics.
def SwiftModel : SchedMachineModel {
let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
- let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
+ let MicroOpBufferSize = 45; // Based on NEON renamed registers.
let LoadLatency = 3;
let MispredictPenalty = 14; // A branch direction mispredict.
@@ -1096,9 +1096,27 @@ let SchedModel = SwiftModel in {
def SwiftUnitDiv : ProcResource<1>;
// Generic resource requirements.
+ def SwiftWriteP0OneCycle : SchedWriteRes<[SwiftUnitP0]>;
+ def SwiftWriteP0TwoCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 2; }
+ def SwiftWriteP0FourCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 4; }
+ def SwiftWriteP0SixCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 6; }
+ def SwiftWriteP0P1FourCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
+ let Latency = 4;
+ }
+ def SwiftWriteP0P1SixCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
+ let Latency = 6;
+ }
+ def SwiftWriteP01OneCycle : SchedWriteRes<[SwiftUnitP01]>;
+ def SwiftWriteP1TwoCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 2; }
+ def SwiftWriteP1FourCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 4; }
+ def SwiftWriteP1SixCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 6; }
+ def SwiftWriteP1EightCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 8; }
+ def SwiftWriteP1TwelveCyc : SchedWriteRes<[SwiftUnitP1]> { let Latency = 12; }
+ def SwiftWriteP01OneCycle2x : WriteSequence<[SwiftWriteP01OneCycle], 2>;
+ def SwiftWriteP01OneCycle3x : WriteSequence<[SwiftWriteP01OneCycle], 3>;
def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
- def SwiftWriteP01ThreeCycleTwoUops :
- SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]> {
+ def SwiftWriteP01ThreeCycleTwoUops : SchedWriteRes<[SwiftUnitP01,
+ SwiftUnitP01]> {
let Latency = 3;
let NumMicroOps = 2;
}
@@ -1107,7 +1125,23 @@ let SchedModel = SwiftModel in {
let NumMicroOps = 3;
let ResourceCycles = [3];
}
-
+ // Plain load without writeback.
+ def SwiftWriteP2ThreeCycle : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 3;
+ }
+ def SwiftWriteP2FourCycle : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 4;
+ }
+ // A store does not write to a register.
+ def SwiftWriteP2 : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 0;
+ }
+ foreach Num = 1-4 in {
+ def SwiftWrite#Num#xP2 : WriteSequence<[SwiftWriteP2], Num>;
+ }
+ def SwiftWriteP01OneCycle2x_load : WriteSequence<[SwiftWriteP01OneCycle,
+ SwiftWriteP01OneCycle,
+ SwiftWriteP2ThreeCycle]>;
// 4.2.4 Arithmetic and Logical.
// ALU operation register shifted by immediate variant.
def SwiftWriteALUsi : SchedWriteVariant<[
@@ -1137,8 +1171,906 @@ let SchedModel = SwiftModel in {
def : ReadAdvance<ReadALU, 0>;
def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
+
+ def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[
+ SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01OneCycle]>,
+ SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
+ ]>;
+
// 4.2.5 Integer comparison
def : WriteRes<WriteCMP, [SwiftUnitP01]>;
- def : WriteRes<WriteCMPsi, [SwiftUnitP01]>;
- def : WriteRes<WriteCMPsr, [SwiftUnitP01]>;
+ def : SchedAlias<WriteCMPsi, SwiftChooseShiftKindP01OneOrTwoCycle>;
+ def : SchedAlias<WriteCMPsr, SwiftWriteP01TwoCycle>;
+
+ // 4.2.6 Shift, Move
+ // Shift
+ // ASR,LSL,ROR,RRX
+ // MOV(register-shiftedregister) MVN(register-shiftedregister)
+ // Move
+ // MOV,MVN
+ // MOVT
+ // Sign/Zero extension
+ def : InstRW<[SwiftWriteP01OneCycle],
+ (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
+ "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH",
+ "t2UXTB16")>;
+ // Pseudo instructions.
+ def : InstRW<[SwiftWriteP01OneCycle2x],
+ (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi32imm",
+ "t2MOVi32imm", "t2MOV_ga_dyn")>;
+ def : InstRW<[SwiftWriteP01OneCycle3x],
+ (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel", "t2MOVi16_ga_pcrel")>;
+ def : InstRW<[SwiftWriteP01OneCycle2x_load],
+ (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
+
+ def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
+
+ def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>,
+ SchedVar<NoSchedPred, [ SwiftWriteP0OneCycle ]>
+ ]>;
+
+ // 4.2.7 Select
+ // SEL
+ def : InstRW<[SwiftPredP0OneOrTwoCycle], (instregex "SEL", "t2SEL")>;
+
+ // 4.2.8 Bitfield
+ // BFI,BFC, SBFX,UBFX
+ def : InstRW< [SwiftWriteP01TwoCycle],
+ (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
+ "(t|t2)UBFX", "(t|t2)SBFX")>;
+
+ // 4.2.9 Saturating arithmetic
+ def : InstRW< [SwiftWriteP01TwoCycle],
+ (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
+ "USAT16", "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
+ "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
+ "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
+ "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
+ "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>;
+
+ // 4.2.10 Parallel Arithmetic
+ // Not flag setting.
+ def : InstRW< [SwiftWriteALUsr],
+ (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
+ "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
+ "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
+ "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
+ // Flag setting.
+ def : InstRW< [SwiftWriteP01TwoCycle],
+ (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
+ "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
+ "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
+ "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
+ "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
+ "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
+
+ // 4.2.11 Sum of Absolute Difference
+ def : InstRW< [SwiftWriteP0P1FourCycle], (instregex "USAD8") >;
+ def : InstRW<[SwiftWriteP0P1FourCycle, ReadALU, ReadALU, SchedReadAdvance<2>],
+ (instregex "USADA8")>;
+
+ // 4.2.12 Integer Multiply (32-bit result)
+ // Two sources.
+ def : InstRW< [SwiftWriteP0FourCycle],
+ (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
+ "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
+ "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
+ "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
+
+ def SwiftWriteP0P01FiveCycleTwoUops :
+ SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
+ let Latency = 5;
+ }
+
+ def SwiftPredP0P01FourFiveCycle : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [ SwiftWriteP0P01FiveCycleTwoUops ]>,
+ SchedVar<NoSchedPred, [ SwiftWriteP0FourCycle ]>
+ ]>;
+
+ def SwiftReadAdvanceFourCyclesPred : SchedReadVariant<[
+ SchedVar<IsPredicatedPred, [SchedReadAdvance<4>]>,
+ SchedVar<NoSchedPred, [ReadALU]>
+ ]>;
+
+ // Multiply accumulate, three sources
+ def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
+ SwiftReadAdvanceFourCyclesPred],
+ (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
+ "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS",
+ "t2SMMLSR")>;
+
+ // 4.2.13 Integer Multiply (32-bit result, Q flag)
+ def : InstRW< [SwiftWriteP0FourCycle],
+ (instregex "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX")>;
+ def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
+ SwiftReadAdvanceFourCyclesPred],
+ (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
+ "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
+ "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT")>;
+ def : InstRW< [SwiftPredP0P01FourFiveCycle],
+ (instregex "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX")>;
+
+ def SwiftP0P0P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+ }
+ def SwiftWrite1Cycle : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+ }
+ def SwiftWrite5Cycle : SchedWriteRes<[]> {
+ let Latency = 5;
+ let NumMicroOps = 0;
+ }
+ def SwiftWrite6Cycle : SchedWriteRes<[]> {
+ let Latency = 6;
+ let NumMicroOps = 0;
+ }
+
+ // 4.2.14 Integer Multiply, Long
+ def : InstRW< [SwiftP0P0P01FiveCycle, SwiftWrite5Cycle],
+ (instregex "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$")>;
+
+ def Swift2P03P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [2, 3];
+ }
+
+ // 4.2.15 Integer Multiply Accumulate, Long
+ // 4.2.16 Integer Multiply Accumulate, Dual
+ // 4.2.17 Integer Multiply Accumulate Accumulate, Long
+ // We are being a bit inaccurate here.
+ def : InstRW< [SwiftWrite5Cycle, Swift2P03P01FiveCycle, ReadALU, ReadALU,
+ SchedReadAdvance<4>, SchedReadAdvance<3>],
+ (instregex "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
+ "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
+ "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", "t2SMLALBT",
+ "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", "t2SMLSLD", "t2SMLSLDX",
+ "t2UMAAL")>;
+
+ def SwiftDiv : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
+ let NumMicroOps = 1;
+ let Latency = 14;
+ let ResourceCycles = [1, 14];
+ }
+ // 4.2.18 Integer Divide
+ def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround.
+ def : InstRW <[SwiftDiv],
+ (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
+
+ // 4.2.19 Integer Load Single Element
+ // 4.2.20 Integer Load Signextended
+ def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+ }
+ def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ }
+ def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01,
+ SwiftUnitP01]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ }
+ def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+ }
+ def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2,
+ SwiftUnitP01]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ }
+ def SwiftWrBackOne : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+ }
+ def SwiftWriteLdFour : SchedWriteRes<[]> {
+ let Latency = 4;
+ let NumMicroOps = 0;
+ }
+ // Not accurate.
+ def : InstRW<[SwiftWriteP2ThreeCycle],
+ (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
+ "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "tLDR[BH](r|i|spi|pci|pciASM)",
+ "tLDR(r|i|spi|pci|pciASM)")>;
+ def : InstRW<[SwiftWriteP2ThreeCycle],
+ (instregex "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>;
+ def : InstRW<[SwiftWriteP2P01FourCyle],
+ (instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
+ "t2LDRpci_pic", "tLDRS(B|H)")>;
+ def : InstRW<[SwiftWriteP2P01ThreeCycle, SwiftWrBackOne],
+ (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
+ "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
+ "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T")>;
+ def : InstRW<[SwiftWriteP2P01P01FourCycle, SwiftWrBackOne],
+ (instregex "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
+ "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T")>;
+
+ // 4.2.21 Integer Dual Load
+ // Not accurate.
+ def : InstRW<[SwiftWriteP2P2ThreeCycle, SwiftWriteLdFour],
+ (instregex "t2LDRDi8", "LDRD$")>;
+ def : InstRW<[SwiftWriteP2P2P01ThreeCycle, SwiftWriteLdFour, SwiftWrBackOne],
+ (instregex "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
+
+ // 4.2.22 Integer Load, Multiple
+ // NumReg = 1 .. 16
+ foreach Lat = 3-25 in {
+ def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = Lat;
+ }
+ def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> {
+ let Latency = Lat;
+ let NumMicroOps = 0;
+ }
+ }
+ // Predicate.
+ foreach NumAddr = 1-16 in {
+ def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
+ }
+ def SwiftWriteLDMAddrNoWB : SchedWriteRes<[SwiftUnitP01]> { let Latency = 0; }
+ def SwiftWriteLDMAddrWB : SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]>;
+ def SwiftWriteLM : SchedWriteVariant<[
+ SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy]>,
+ SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy]>,
+ SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy]>,
+ SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy]>,
+ SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy]>,
+ SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy]>,
+ SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy]>,
+ SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy]>,
+ SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy]>,
+ SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy]>,
+ SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy]>,
+ SchedVar<SwiftLMAddr13Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy]>,
+ SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy, SwiftWriteLM16Cy]>,
+ SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy, SwiftWriteLM16Cy,
+ SwiftWriteLM17Cy]>,
+ SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5Cy, SwiftWriteLM6Cy,
+ SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM12Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy, SwiftWriteLM16Cy,
+ SwiftWriteLM17Cy, SwiftWriteLM18Cy]>,
+ // Unknow number of registers, just use resources for two registers.
+ SchedVar<NoSchedPred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
+ SwiftWriteLM5CyNo, SwiftWriteLM6CyNo,
+ SwiftWriteLM7CyNo, SwiftWriteLM8CyNo,
+ SwiftWriteLM9CyNo, SwiftWriteLM10CyNo,
+ SwiftWriteLM11CyNo, SwiftWriteLM12CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM15CyNo, SwiftWriteLM16CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo]>
+
+ ]> { let Variadic=1; }
+
+ def : InstRW<[SwiftWriteLM, SwiftWriteLDMAddrNoWB],
+ (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
+ "(t|sys)LDM(IA|DA|DB|IB)$")>;
+ def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM],
+ (instregex /*"t2LDMIA_RET", "tLDMIA_RET", "LDMIA_RET",*/
+ "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
+ def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM, SwiftWriteP1TwoCycle],
+ (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
+ // 4.2.23 Integer Store, Single Element
+ def : InstRW<[SwiftWriteP2],
+ (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX",
+ "t2STR(i12|i8|s)$", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
+
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2],
+ (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
+ "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
+ "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
+ "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
+
+ // 4.2.24 Integer Store, Dual
+ def : InstRW<[SwiftWriteP2, SwiftWriteP2, SwiftWriteP01OneCycle],
+ (instregex "STRD$", "t2STRDi8")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2, SwiftWriteP2,
+ SwiftWriteP01OneCycle],
+ (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
+
+ // 4.2.25 Integer Store, Multiple
+ def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+ }
+ foreach NumAddr = 1-16 in {
+ def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>;
+ }
+ def SwiftWriteSTM : SchedWriteVariant<[
+ SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM2]>,
+ SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM3]>,
+ SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM4]>,
+ SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM5]>,
+ SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM6]>,
+ SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM7]>,
+ SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM8]>,
+ SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM9]>,
+ SchedVar<SwiftLMAddr10Pred,[SwiftWriteSTM10]>,
+ SchedVar<SwiftLMAddr11Pred,[SwiftWriteSTM11]>,
+ SchedVar<SwiftLMAddr12Pred,[SwiftWriteSTM12]>,
+ SchedVar<SwiftLMAddr13Pred,[SwiftWriteSTM13]>,
+ SchedVar<SwiftLMAddr14Pred,[SwiftWriteSTM14]>,
+ SchedVar<SwiftLMAddr15Pred,[SwiftWriteSTM15]>,
+ SchedVar<SwiftLMAddr16Pred,[SwiftWriteSTM16]>,
+ // Unknow number of registers, just use resources for two registers.
+ SchedVar<NoSchedPred, [SwiftWriteSTM2]>
+ ]>;
+ def : InstRW<[SwiftWriteSTM],
+ (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteSTM],
+ (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
+ "PUSH", "tPUSH")>;
+
+ // 4.2.26 Branch
+ def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; }
+ def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; }
+ def : WriteRes<WriteBrTbl, [SwiftUnitP1, SwiftUnitP2]> { let Latency = 0; }
+
+ // 4.2.27 Not issued
+ def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
+ def : InstRW<[WriteNoop], (instregex "t2IT", "IT", "NOP")>;
+
+ // 4.2.28 Advanced SIMD, Integer, 2 cycle
+ def : InstRW<[SwiftWriteP0TwoCycle],
+ (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
+ "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
+ "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
+ "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL", "VQSHLU", "VBIF",
+ "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
+
+ def : InstRW<[SwiftWriteP1TwoCycle],
+ (instregex "VEXT", "VREV16", "VREV32", "VREV64")>;
+
+ // 4.2.29 Advanced SIMD, Integer, 4 cycle
+ // 4.2.30 Advanced SIMD, Integer with Accumulate
+ def : InstRW<[SwiftWriteP0FourCycle],
+ (instregex "VABA", "VABAL", "VPADAL", "VRSRA", "VSRA", "VACGE", "VACGT",
+ "VACLE", "VACLT", "VCEQ", "VCGE", "VCGT", "VCLE", "VCLT", "VRSHL",
+ "VQRSHL", "VRSHR(u|s)", "VABS(f|v)", "VQABS", "VQNEG", "VQADD",
+ "VQSUB")>;
+ def : InstRW<[SwiftWriteP1FourCycle],
+ (instregex "VRECPE", "VRSQRTE")>;
+
+ // 4.2.31 Advanced SIMD, Add and Shift with Narrow
+ def : InstRW<[SwiftWriteP0P1FourCycle],
+ (instregex "VADDHN", "VSUBHN", "VSHRN")>;
+ def : InstRW<[SwiftWriteP0P1SixCycle],
+ (instregex "VRADDHN", "VRSUBHN", "VRSHRN", "VQSHRN", "VQSHRUN",
+ "VQRSHRN", "VQRSHRUN")>;
+
+ // 4.2.32 Advanced SIMD, Vector Table Lookup
+ foreach Num = 1-4 in {
+ def SwiftWrite#Num#xP1TwoCycle : WriteSequence<[SwiftWriteP1TwoCycle], Num>;
+ }
+ def : InstRW<[SwiftWrite1xP1TwoCycle],
+ (instregex "VTB(L|X)1")>;
+ def : InstRW<[SwiftWrite2xP1TwoCycle],
+ (instregex "VTB(L|X)2")>;
+ def : InstRW<[SwiftWrite3xP1TwoCycle],
+ (instregex "VTB(L|X)3")>;
+ def : InstRW<[SwiftWrite4xP1TwoCycle],
+ (instregex "VTB(L|X)4")>;
+
+ // 4.2.33 Advanced SIMD, Transpose
+ def : InstRW<[SwiftWriteP1FourCycle, SwiftWriteP1FourCycle,
+ SwiftWriteP1TwoCycle/*RsrcOnly*/, SchedReadAdvance<2>],
+ (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
+
+ // 4.2.34 Advanced SIMD and VFP, Floating Point
+ def : InstRW<[SwiftWriteP0TwoCycle], (instregex "VABS(S|D)$", "VNEG(S|D)$")>;
+ def : InstRW<[SwiftWriteP0FourCycle],
+ (instregex "VCMP(D|S|ZD|ZS)$", "VCMPE(D|S|ZD|ZS)")>;
+ def : InstRW<[SwiftWriteP0FourCycle],
+ (instregex "VADD(S|f)", "VSUB(S|f)", "VABD", "VPADDf", "VMAX", "VMIN", "VPMAX",
+ "VPMIN")>;
+ def : InstRW<[SwiftWriteP0SixCycle], (instregex "VADDD$", "VSUBD$")>;
+ def : InstRW<[SwiftWriteP1EightCycle], (instregex "VRECPS", "VRSQRTS")>;
+
+ // 4.2.35 Advanced SIMD and VFP, Multiply
+ def : InstRW<[SwiftWriteP1FourCycle],
+ (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
+ "VMULL", "VQDMULL")>;
+ def : InstRW<[SwiftWriteP1SixCycle],
+ (instregex "VMULD", "VNMULD")>;
+ def : InstRW<[SwiftWriteP1FourCycle],
+ (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
+ "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
+ def : InstRW<[SwiftWriteP1EightCycle], (instregex "VFMAfd", "VFMSfd")>;
+ def : InstRW<[SwiftWriteP1TwelveCyc], (instregex "VFMAfq", "VFMSfq")>;
+
+ // 4.2.36 Advanced SIMD and VFP, Convert
+ def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
+ // Fixpoint conversions.
+ def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
+
+ // 4.2.37 Advanced SIMD and VFP, Move
+ def : InstRW<[SwiftWriteP0TwoCycle],
+ (instregex "VMOVv", "VMOV(S|D)$", "VMOV(S|D)cc",
+ "VMVNv", "VMVN(d|q)", "VMVN(S|D)cc",
+ "FCONST(D|S)")>;
+ def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VMOVN", "VMOVL")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP0FourCycle, SwiftWriteP1TwoCycle]>],
+ (instregex "VQMOVN")>;
+ def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VDUPLN", "VDUPf")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>],
+ (instregex "VDUP(8|16|32)")>;
+ def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "VMOVRS$")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP0TwoCycle]>],
+ (instregex "VMOVSR$", "VSETLN")>;
+ def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2FourCycle],
+ (instregex "VMOVRR(D|S)$")>;
+ def : InstRW<[SwiftWriteP2FourCycle], (instregex "VMOVDRR$")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>,
+ WriteSequence<[SwiftWrite1Cycle, SwiftWriteP2FourCycle,
+ SwiftWriteP1TwoCycle]>],
+ (instregex "VMOVSRR$")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle]>],
+ (instregex "VGETLN(u|i)")>;
+ def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle,
+ SwiftWriteP01OneCycle]>],
+ (instregex "VGETLNs")>;
+
+ // 4.2.38 Advanced SIMD and VFP, Move FPSCR
+ // Serializing instructions.
+ def SwiftWaitP0For15Cy : SchedWriteRes<[SwiftUnitP0]> {
+ let Latency = 15;
+ let ResourceCycles = [15];
+ }
+ def SwiftWaitP1For15Cy : SchedWriteRes<[SwiftUnitP1]> {
+ let Latency = 15;
+ let ResourceCycles = [15];
+ }
+ def SwiftWaitP2For15Cy : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 15;
+ let ResourceCycles = [15];
+ }
+ def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
+ (instregex "VMRS")>;
+ def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
+ (instregex "VMSR")>;
+ // Not serializing.
+ def : InstRW<[SwiftWriteP0TwoCycle], (instregex "FMSTAT")>;
+
+ // 4.2.39 Advanced SIMD and VFP, Load Single Element
+ def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDRD$", "VLDRS$")>;
+
+ // 4.2.40 Advanced SIMD and VFP, Store Single Element
+ def : InstRW<[SwiftWriteLM4Cy], (instregex "VSTRD$", "VSTRS$")>;
+
+ // 4.2.41 Advanced SIMD and VFP, Load Multiple
+ // 4.2.42 Advanced SIMD and VFP, Store Multiple
+
+ // Resource requirement for permuting, just reserves the resources.
+ foreach Num = 1-28 in {
+ def SwiftVLDMPerm#Num : SchedWriteRes<[SwiftUnitP1]> {
+ let Latency = 0;
+ let NumMicroOps = Num;
+ let ResourceCycles = [Num];
+ }
+ }
+
+ // Pre RA pseudos - load/store to a Q register as a D register pair.
+ def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDMQIA$", "VSTMQIA$")>;
+
+ // Post RA not modelled accurately. We assume that register use of width 64
+ // bit maps to a D register, 128 maps to a Q register. Not all different kinds
+ // are accurately represented.
+ def SwiftWriteVLDM : SchedWriteVariant<[
+ // Load of one S register.
+ SchedVar<SwiftLMAddr1Pred, [SwiftWriteLM4Cy]>,
+ // Load of one D register.
+ SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo]>,
+ // Load of 3 S register.
+ SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm3]>,
+ // Load of a Q register (not neccessarily true). We should not be mapping to
+ // 4 S registers, either.
+ SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo,
+ SwiftWriteLM4CyNo, SwiftWriteLM4CyNo]>,
+ // Load of 5 S registers.
+ SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm5]>,
+ // Load of 3 D registers. (Must also be able to handle s register list -
+ // though, not accurate)
+ SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM10Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
+ // Load of 7 S registers.
+ SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm7]>,
+ // Load of two Q registers.
+ SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm2]>,
+ // Load of 9 S registers.
+ SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm9]>,
+ // Load of 5 D registers.
+ SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM10Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
+ // Inaccurate: reuse describtion from 9 S registers.
+ SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm9]>,
+ // Load of three Q registers.
+ SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM11Cy,
+ SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
+ SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
+ SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
+ SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm3]>,
+ // Inaccurate: reuse describtion from 9 S registers.
+ SchedVar<SwiftLMAddr13Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm9]>,
+ // Load of 7 D registers inaccurate.
+ SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM10Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM14Cy, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm7]>,
+ SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM17Cy, SwiftWriteLM18CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
+ SwiftVLDMPerm9]>,
+ // Load of 4 Q registers.
+ SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM7Cy, SwiftWriteLM10Cy,
+ SwiftWriteLM11Cy, SwiftWriteLM14Cy,
+ SwiftWriteLM15Cy, SwiftWriteLM18CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm4]>,
+ // Unknow number of registers, just use resources for two registers.
+ SchedVar<NoSchedPred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
+ SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm2]>
+ ]> { let Variadic = 1; }
+
+ def : InstRW<[SwiftWriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
+
+ def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVLDM],
+ (instregex "VLDM[SD](IA|DB)_UPD$")>;
+
+ def SwiftWriteVSTM : SchedWriteVariant<[
+ // One S register.
+ SchedVar<SwiftLMAddr1Pred, [SwiftWriteSTM1]>,
+ // One D register.
+ SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM1]>,
+ // Three S registers.
+ SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM4]>,
+ // Assume one Q register.
+ SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM1]>,
+ SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM6]>,
+ // Assume three D registers.
+ SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM4]>,
+ SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM8]>,
+ // Assume two Q registers.
+ SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM3]>,
+ SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM10]>,
+ // Assume 5 D registers.
+ SchedVar<SwiftLMAddr10Pred, [SwiftWriteSTM6]>,
+ SchedVar<SwiftLMAddr11Pred, [SwiftWriteSTM12]>,
+ // Asume three Q registers.
+ SchedVar<SwiftLMAddr12Pred, [SwiftWriteSTM4]>,
+ SchedVar<SwiftLMAddr13Pred, [SwiftWriteSTM14]>,
+ // Assume 7 D registers.
+ SchedVar<SwiftLMAddr14Pred, [SwiftWriteSTM8]>,
+ SchedVar<SwiftLMAddr15Pred, [SwiftWriteSTM16]>,
+ // Assume four Q registers.
+ SchedVar<SwiftLMAddr16Pred, [SwiftWriteSTM5]>,
+ // Asumme two Q registers.
+ SchedVar<NoSchedPred, [SwiftWriteSTM3]>
+ ]> { let Variadic = 1; }
+
+ def : InstRW<[SwiftWriteVSTM], (instregex "VSTM[SD](IA|DB)$")>;
+
+ def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVSTM],
+ (instregex "VSTM[SD](IA|DB)_UPD")>;
+
+ // 4.2.43 Advanced SIMD, Element or Structure Load and Store
+ def SwiftWrite2xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+ }
+ def SwiftWrite3xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
+ let Latency = 4;
+ let ResourceCycles = [3];
+ }
+ foreach Num = 1-2 in {
+ def SwiftExt#Num#xP0 : SchedWriteRes<[SwiftUnitP0]> {
+ let Latency = 0;
+ let NumMicroOps = Num;
+ let ResourceCycles = [Num];
+ }
+ }
+ // VLDx
+ // Multiple structures.
+ // Single element structure loads.
+ // We assume aligned.
+ // Single/two register.
+ def : InstRW<[SwiftWriteLM4Cy], (instregex "VLD1(d|q)(8|16|32|64)$")>;
+ def : InstRW<[SwiftWriteLM4Cy, SwiftWriteP01OneCycle],
+ (instregex "VLD1(d|q)(8|16|32|64)wb")>;
+ // Three register.
+ def : InstRW<[SwiftWrite3xP2FourCy],
+ (instregex "VLD1(d|q)(8|16|32|64)T$", "VLD1d64TPseudo")>;
+ def : InstRW<[SwiftWrite3xP2FourCy, SwiftWriteP01OneCycle],
+ (instregex "VLD1(d|q)(8|16|32|64)Twb")>;
+ /// Four Register.
+ def : InstRW<[SwiftWrite2xP2FourCy],
+ (instregex "VLD1(d|q)(8|16|32|64)Q$", "VLD1d64QPseudo")>;
+ def : InstRW<[SwiftWrite2xP2FourCy, SwiftWriteP01OneCycle],
+ (instregex "VLD1(d|q)(8|16|32|64)Qwb")>;
+ // Two element structure loads.
+ // Two/four register.
+ def : InstRW<[SwiftWriteLM9Cy, SwiftExt2xP0, SwiftVLDMPerm2],
+ (instregex "VLD2(d|q|b)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
+ SwiftVLDMPerm2],
+ (instregex "VLD2(d|q|b)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
+ // Three element structure.
+ def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
+ SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
+ (instregex "VLD3(d|q)(8|16|32)$")>;
+ def : InstRW<[SwiftWriteLM9Cy, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
+ (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
+
+ def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
+ SwiftWriteP01OneCycle, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
+ (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
+ def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm3,
+ SwiftWrite3xP2FourCy],
+ (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+ // Four element structure loads.
+ def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
+ SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4,
+ SwiftWrite3xP2FourCy],
+ (instregex "VLD4(d|q)(8|16|32)$")>;
+ def : InstRW<[SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4,
+ SwiftWrite3xP2FourCy],
+ (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
+ def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
+ SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
+ SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
+ (instregex "VLD4(d|q)(8|16|32)_UPD")>;
+ def : InstRW<[SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
+ SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
+ (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+ // Single all/lane loads.
+ // One element structure.
+ def : InstRW<[SwiftWriteLM6Cy, SwiftVLDMPerm2],
+ (instregex "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm2],
+ (instregex "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)",
+ "VLD1LNq(8|16|32)Pseudo_UPD")>;
+ // Two element structure.
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftExt1xP0, SwiftVLDMPerm2],
+ (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
+ "VLD2LN(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftWriteP01OneCycle,
+ SwiftExt1xP0, SwiftVLDMPerm2],
+ (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
+ SwiftExt1xP0, SwiftVLDMPerm2],
+ (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb")>;
+ def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
+ SwiftExt1xP0, SwiftVLDMPerm2],
+ (instregex "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
+ // Three element structure.
+ def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, SwiftExt1xP0,
+ SwiftVLDMPerm3],
+ (instregex "VLD3(DUP|LN)(d|q)(8|16|32)$",
+ "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy,
+ SwiftWriteP01OneCycle, SwiftExt1xP0, SwiftVLDMPerm3],
+ (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
+ def : InstRW<[SwiftWriteLM7Cy, SwiftWriteP01OneCycle, SwiftWriteLM8Cy,
+ SwiftWriteLM8Cy, SwiftExt1xP0, SwiftVLDMPerm3],
+ (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
+ // Four element struture.
+ def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
+ SwiftWriteLM10CyNo, SwiftExt1xP0, SwiftVLDMPerm5],
+ (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$",
+ "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
+ SwiftWriteLM10CyNo, SwiftWriteP01OneCycle, SwiftExt1xP0,
+ SwiftVLDMPerm5],
+ (instregex "VLD4(DUP|LN)(d|q)(8|16|32)_UPD")>;
+ def : InstRW<[SwiftWriteLM8Cy, SwiftWriteP01OneCycle, SwiftWriteLM9Cy,
+ SwiftWriteLM10CyNo, SwiftWriteLM10CyNo, SwiftExt1xP0,
+ SwiftVLDMPerm5],
+ (instregex "VLD4(DUP|LN)(d|q)(8|16|32)Pseudo_UPD")>;
+ // VSTx
+ // Multiple structures.
+ // Single element structure store.
+ def : InstRW<[SwiftWrite1xP2], (instregex "VST1d(8|16|32|64)$")>;
+ def : InstRW<[SwiftWrite2xP2], (instregex "VST1q(8|16|32|64)$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2],
+ (instregex "VST1d(8|16|32|64)wb")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2],
+ (instregex "VST1q(8|16|32|64)wb")>;
+ def : InstRW<[SwiftWrite3xP2],
+ (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite3xP2],
+ (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
+ def : InstRW<[SwiftWrite4xP2],
+ (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2],
+ (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
+ // Two element structure store.
+ def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
+ (instregex "VST2(d|b)(8|16|32)$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
+ (instregex "VST2(b|d)(8|16|32)wb")>;
+ def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
+ (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
+ (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
+ // Three element structure store.
+ def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST3(d|q)(8|16|32)_UPD",
+ "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
+ // Four element structure store.
+ def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm4],
+ (instregex "VST4(d|q)(8|16|32)_UPD",
+ "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
+ // Single/all lane store.
+ // One element structure.
+ def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
+ (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
+ (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
+ // Two element structure.
+ def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm2],
+ (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm2],
+ (instregex "VST2LN(d|q)(8|16|32)_UPD",
+ "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
+ // Three element structure.
+ def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
+ (instregex "VST3LN(d|q)(8|16|32)_UPD",
+ "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
+ // Four element structure.
+ def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
+ (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
+ def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2, SwiftVLDMPerm2],
+ (instregex "VST4LN(d|q)(8|16|32)_UPD",
+ "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
+
+ // 4.2.44 VFP, Divide and Square Root
+ def SwiftDiv17 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
+ let NumMicroOps = 1;
+ let Latency = 17;
+ let ResourceCycles = [1, 15];
+ }
+ def SwiftDiv32 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
+ let NumMicroOps = 1;
+ let Latency = 32;
+ let ResourceCycles = [1, 30];
+ }
+ def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
+ def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
+
+ // Not specified.
+ def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
+ // Preload.
+ def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
+ let ResourceCycles = [0];
+ }
+
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 41a7e0c2c8a5..93add6ee33cf 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -26,7 +26,7 @@ ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
}
SDValue
-ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
@@ -140,7 +140,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
// GNU library uses (ptr, value, size)
// See RTABI section 4.3.4
SDValue ARMSelectionDAGInfo::
-EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVolatile,
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
index 6419a737295a..56c9375855dd 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -45,7 +45,7 @@ public:
~ARMSelectionDAGInfo();
virtual
- SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
@@ -55,7 +55,7 @@ public:
// Adjust parameters for memset, see RTABI section 4.3.4
virtual
- SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
SDValue Op1, SDValue Op2,
SDValue Op3, unsigned Align,
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 8653c462f06b..a11629852ad7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -32,20 +32,53 @@ ReserveR9("arm-reserve-r9", cl::Hidden,
cl::desc("Reserve R9, making it unavailable as GPR"));
static cl::opt<bool>
-DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
+ArmUseMOVT("arm-use-movt", cl::init(true), cl::Hidden);
static cl::opt<bool>
UseFusedMulOps("arm-use-mulops",
cl::init(true), cl::Hidden);
-static cl::opt<bool>
-StrictAlign("arm-strict-align", cl::Hidden,
- cl::desc("Disallow all unaligned memory accesses"));
+enum AlignMode {
+ DefaultAlign,
+ StrictAlign,
+ NoStrictAlign
+};
+
+static cl::opt<AlignMode>
+Align(cl::desc("Load/store alignment support"),
+ cl::Hidden, cl::init(DefaultAlign),
+ cl::values(
+ clEnumValN(DefaultAlign, "arm-default-align",
+ "Generate unaligned accesses only on hardware/OS "
+ "combinations that are known to support them"),
+ clEnumValN(StrictAlign, "arm-strict-align",
+ "Disallow all unaligned memory accesses"),
+ clEnumValN(NoStrictAlign, "arm-no-strict-align",
+ "Allow unaligned memory accesses"),
+ clEnumValEnd));
+
+enum ITMode {
+ DefaultIT,
+ RestrictedIT,
+ NoRestrictedIT
+};
+
+static cl::opt<ITMode>
+IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
+ cl::ZeroOrMore,
+ cl::values(clEnumValN(DefaultIT, "arm-default-it",
+ "Generate IT block based on arch"),
+ clEnumValN(RestrictedIT, "arm-restrict-it",
+ "Disallow deprecated IT based on ARMv8"),
+ clEnumValN(NoRestrictedIT, "arm-no-restrict-it",
+ "Allow IT blocks based on ARMv7"),
+ clEnumValEnd));
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, const TargetOptions &Options)
: ARMGenSubtargetInfo(TT, CPU, FS)
, ARMProcFamily(Others)
+ , ARMProcClass(None)
, stackAlignment(4)
, CPUString(CPU)
, TargetTriple(TT)
@@ -60,11 +93,14 @@ void ARMSubtarget::initializeEnvironment() {
HasV5TOps = false;
HasV5TEOps = false;
HasV6Ops = false;
+ HasV6MOps = false;
HasV6T2Ops = false;
HasV7Ops = false;
+ HasV8Ops = false;
HasVFPv2 = false;
HasVFPv3 = false;
HasVFPv4 = false;
+ HasFPARMv8 = false;
HasNEON = false;
UseNEONForSinglePrecisionFP = false;
UseMulOps = UseFusedMulOps;
@@ -73,7 +109,6 @@ void ARMSubtarget::initializeEnvironment() {
SlowFPBrcc = false;
InThumbMode = false;
HasThumb2 = false;
- IsMClass = false;
NoARM = false;
PostRAScheduler = false;
IsR9Reserved = ReserveR9;
@@ -90,8 +125,12 @@ void ARMSubtarget::initializeEnvironment() {
AvoidMOVsShifterOperand = false;
HasRAS = false;
HasMPExtension = false;
+ HasVirtualization = false;
FPOnlySP = false;
+ HasPerfMon = false;
HasTrustZone = false;
+ HasCrypto = false;
+ HasCRC = false;
AllowsUnalignedMem = false;
Thumb2DSP = false;
UseNaClTrap = false;
@@ -115,8 +154,13 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
}
void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
- if (CPUString.empty())
- CPUString = "generic";
+ if (CPUString.empty()) {
+ if (isTargetIOS() && TargetTriple.getArchName().endswith("v7s"))
+ // Default to the Swift CPU when targeting armv7s/thumbv7s.
+ CPUString = "swift";
+ else
+ CPUString = "generic";
+ }
// Insert the architecture feature derived from the target triple into the
// feature string. This is important for setting features that are implied
@@ -134,7 +178,7 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Thumb2 implies at least V6T2. FIXME: Fix tests to explicitly specify a
// ARM version or CPU and then remove this.
if (!HasV6T2Ops && hasThumb2())
- HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true;
+ HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6MOps = HasV6T2Ops = true;
// Keep a pointer to static instruction cost data for the specified CPU.
SchedModel = getSchedModelForCPU(CPUString);
@@ -151,21 +195,56 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isAAPCS_ABI())
stackAlignment = 8;
- if (!isTargetIOS())
- UseMovt = hasV6T2Ops();
- else {
+ UseMovt = hasV6T2Ops() && ArmUseMOVT;
+
+ if (!isTargetIOS()) {
+ IsR9Reserved = ReserveR9;
+ } else {
IsR9Reserved = ReserveR9 | !HasV6Ops;
- UseMovt = DarwinUseMOVT && hasV6T2Ops();
SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0);
}
if (!isThumb() || hasThumb2())
PostRAScheduler = true;
- // v6+ may or may not support unaligned mem access depending on the system
- // configuration.
- if (!StrictAlign && hasV6Ops() && isTargetDarwin())
- AllowsUnalignedMem = true;
+ switch (Align) {
+ case DefaultAlign:
+ // Assume pre-ARMv6 doesn't support unaligned accesses.
+ //
+ // ARMv6 may or may not support unaligned accesses depending on the
+ // SCTLR.U bit, which is architecture-specific. We assume ARMv6
+ // Darwin targets support unaligned accesses, and others don't.
+ //
+ // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
+ // which raises an alignment fault on unaligned accesses. Linux
+ // defaults this bit to 0 and handles it as a system-wide (not
+ // per-process) setting. It is therefore safe to assume that ARMv7+
+ // Linux targets support unaligned accesses. The same goes for NaCl.
+ //
+ // The above behavior is consistent with GCC.
+ AllowsUnalignedMem = (
+ (hasV7Ops() && (isTargetLinux() || isTargetNaCl())) ||
+ (hasV6Ops() && isTargetDarwin()));
+ break;
+ case StrictAlign:
+ AllowsUnalignedMem = false;
+ break;
+ case NoStrictAlign:
+ AllowsUnalignedMem = true;
+ break;
+ }
+
+ switch (IT) {
+ case DefaultIT:
+ RestrictIT = hasV8Ops() ? true : false;
+ break;
+ case RestrictedIT:
+ RestrictIT = true;
+ break;
+ case NoRestrictedIT:
+ RestrictIT = false;
+ break;
+ }
// NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
uint64_t Bits = getFeatureBits();
@@ -231,12 +310,15 @@ unsigned ARMSubtarget::getMispredictionPenalty() const {
return SchedModel->MispredictPenalty;
}
+bool ARMSubtarget::hasSinCos() const {
+ return getTargetTriple().getOS() == Triple::IOS &&
+ !getTargetTriple().isOSVersionLT(7, 0);
+}
+
bool ARMSubtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const {
- Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
- CriticalPathRCs.clear();
- CriticalPathRCs.push_back(&ARM::GPRRegClass);
+ Mode = TargetSubtargetInfo::ANTIDEP_NONE;
return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index 038eb76ae1d2..5276901bbb92 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -31,26 +31,36 @@ class TargetOptions;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
- Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift
+ Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift, CortexA53, CortexA57
+ };
+ enum ARMProcClassEnum {
+ None, AClass, RClass, MClass
};
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
ARMProcFamilyEnum ARMProcFamily;
- /// HasV4TOps, HasV5TOps, HasV5TEOps, HasV6Ops, HasV6T2Ops, HasV7Ops -
+ /// ARMProcClass - ARM processor class: None, AClass, RClass or MClass.
+ ARMProcClassEnum ARMProcClass;
+
+ /// HasV4TOps, HasV5TOps, HasV5TEOps,
+ /// HasV6Ops, HasV6MOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
/// Specify whether target support specific ARM ISA variants.
bool HasV4TOps;
bool HasV5TOps;
bool HasV5TEOps;
bool HasV6Ops;
+ bool HasV6MOps;
bool HasV6T2Ops;
bool HasV7Ops;
+ bool HasV8Ops;
- /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what
+ /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
/// floating point ISAs are supported.
bool HasVFPv2;
bool HasVFPv3;
bool HasVFPv4;
+ bool HasFPARMv8;
bool HasNEON;
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
@@ -79,10 +89,6 @@ protected:
/// HasThumb2 - True if Thumb2 instructions are supported.
bool HasThumb2;
- /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
- /// v6m, v7m for example.
- bool IsMClass;
-
/// NoARM - True if subtarget does not support ARM mode execution.
bool NoARM;
@@ -144,18 +150,37 @@ protected:
/// extension (ARMv7 only).
bool HasMPExtension;
+ /// HasVirtualization - True if the subtarget supports the Virtualization
+ /// extension.
+ bool HasVirtualization;
+
/// FPOnlySP - If true, the floating point unit only supports single
/// precision.
bool FPOnlySP;
+ /// If true, the processor supports the Performance Monitor Extensions. These
+ /// include a generic cycle-counter as well as more fine-grained (often
+ /// implementation-specific) events.
+ bool HasPerfMon;
+
/// HasTrustZone - if true, processor supports TrustZone security extensions
bool HasTrustZone;
+ /// HasCrypto - if true, processor supports Cryptography extensions
+ bool HasCrypto;
+
+ /// HasCRC - if true, processor supports CRC instructions
+ bool HasCRC;
+
/// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
/// accesses for some types. For details, see
/// ARMTargetLowering::allowsUnalignedMemoryAccesses().
bool AllowsUnalignedMem;
+ /// RestrictIT - If true, the subtarget disallows generation of deprecated IT
+ /// blocks to conform to ARMv8 rule.
+ bool RestrictIT;
+
/// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith
/// and such) instructions in Thumb2 code.
bool Thumb2DSP;
@@ -187,10 +212,6 @@ protected:
public:
enum {
- isELF, isDarwin
- } TargetType;
-
- enum {
ARM_ABI_APCS,
ARM_ABI_AAPCS // ARM EABI
} TargetABI;
@@ -224,8 +245,10 @@ public:
bool hasV5TOps() const { return HasV5TOps; }
bool hasV5TEOps() const { return HasV5TEOps; }
bool hasV6Ops() const { return HasV6Ops; }
+ bool hasV6MOps() const { return HasV6MOps; }
bool hasV6T2Ops() const { return HasV6T2Ops; }
bool hasV7Ops() const { return HasV7Ops; }
+ bool hasV8Ops() const { return HasV8Ops; }
bool isCortexA5() const { return ARMProcFamily == CortexA5; }
bool isCortexA8() const { return ARMProcFamily == CortexA8; }
@@ -241,7 +264,11 @@ public:
bool hasVFP2() const { return HasVFPv2; }
bool hasVFP3() const { return HasVFPv3; }
bool hasVFP4() const { return HasVFPv4; }
+ bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
+ bool hasCrypto() const { return HasCrypto; }
+ bool hasCRC() const { return HasCRC; }
+ bool hasVirtualization() const { return HasVirtualization; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
@@ -249,11 +276,15 @@ public:
bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool hasDataBarrier() const { return HasDataBarrier; }
+ bool hasAnyDataBarrier() const {
+ return HasDataBarrier || (hasV6Ops() && !isThumb());
+ }
bool useMulOps() const { return UseMulOps; }
bool useFPVMLx() const { return !SlowFPVMLx; }
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
bool isFPOnlySP() const { return FPOnlySP; }
+ bool hasPerfMon() const { return HasPerfMon; }
bool hasTrustZone() const { return HasTrustZone; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
@@ -268,12 +299,19 @@ public:
const Triple &getTargetTriple() const { return TargetTriple; }
- bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; }
+ bool isTargetIOS() const { return TargetTriple.isiOS(); }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
- bool isTargetNaCl() const {
- return TargetTriple.getOS() == Triple::NaCl;
- }
+ bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
+ bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetELF() const { return !isTargetDarwin(); }
+ // ARM EABI is the bare-metal EABI described in ARM ABI documents and
+ // can be accessed via -target arm-none-eabi. This is NOT GNUEABI.
+ // FIXME: Add a flag for bare-metal for that target and set Triple::EABI
+ // even for GNUEABI, so we can make a distinction here and still conform to
+ // the EABI on GNU (and Android) mode. This requires change in Clang, too.
+ bool isTargetAEABI() const {
+ return TargetTriple.getEnvironment() == Triple::EABI;
+ }
bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
@@ -282,8 +320,9 @@ public:
bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
bool isThumb2() const { return InThumbMode && HasThumb2; }
bool hasThumb2() const { return HasThumb2; }
- bool isMClass() const { return IsMClass; }
- bool isARClass() const { return !IsMClass; }
+ bool isMClass() const { return ARMProcClass == MClass; }
+ bool isRClass() const { return ARMProcClass == RClass; }
+ bool isAClass() const { return ARMProcClass == AClass; }
bool isR9Reserved() const { return IsR9Reserved; }
@@ -292,9 +331,15 @@ public:
bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+ bool restrictIT() const { return RestrictIT; }
+
const std::string & getCPUString() const { return CPUString; }
unsigned getMispredictionPenalty() const;
+
+ /// This function returns true if the target has sincos() routine in its
+ /// compiler runtime or math libraries.
+ bool hasSinCos() const;
/// enablePostRAScheduler - True at 'More' optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 42c7d2c437e0..c2bf78877875 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -60,7 +60,7 @@ void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
// Add first the target-independent BasicTTI pass, then our ARM pass. This
// allows the ARM pass to delegate to the target independent layer when
// appropriate.
- PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+ PM.add(createBasicTargetTransformInfoPass(this));
PM.add(createARMTargetTransformInfoPass(this));
}
@@ -85,6 +85,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget) {
+ initAsmInfo();
if (!Subtarget.hasARMOps())
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
"support ARM mode execution!");
@@ -117,6 +118,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
FrameLowering(Subtarget.hasThumb2()
? new ARMFrameLowering(Subtarget)
: (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+ initAsmInfo();
}
namespace {
@@ -148,7 +150,7 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
bool ARMPassConfig::addPreISel() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge)
- addPass(createGlobalMergePass(TM->getTargetLowering()));
+ addPass(createGlobalMergePass(TM));
return false;
}
@@ -167,7 +169,7 @@ bool ARMPassConfig::addPreRegAlloc() {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only())
addPass(createARMLoadStoreOptimizationPass(true));
- if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9())
+ if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
addPass(createMLxExpansionPass());
// Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
// enabled when NEON is available.
@@ -194,8 +196,13 @@ bool ARMPassConfig::addPreSched2() {
addPass(createARMExpandPseudoPass());
if (getOptLevel() != CodeGenOpt::None) {
- if (!getARMSubtarget().isThumb1Only())
+ if (!getARMSubtarget().isThumb1Only()) {
+ // in v8, IfConversion depends on Thumb instruction widths
+ if (getARMSubtarget().restrictIT() &&
+ !getARMSubtarget().prefers32BitThumb())
+ addPass(createThumb2SizeReductionPass());
addPass(&IfConverterID);
+ }
}
if (getARMSubtarget().isThumb2())
addPass(createThumb2ITBlockPass());
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
index dfdf6ab356a3..7ec71b20c64d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -47,7 +47,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
MCStreamer &Streamer) const {
assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
- return MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+ return MCSymbolRefExpr::Create(getSymbol(*Mang, GV),
MCSymbolRefExpr::VK_ARM_TARGET2,
getContext());
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 53ece668c3f5..6bbb38facc24 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -124,11 +124,14 @@ public:
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const;
- unsigned getAddressComputationCost(Type *Val) const;
+ unsigned getAddressComputationCost(Type *Val, bool IsComplex) const;
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
OperandValueKind Op1Info = OK_AnyValue,
OperandValueKind Op2Info = OK_AnyValue) const;
+
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const;
/// @}
};
@@ -182,7 +185,7 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
assert(ISD && "Invalid opcode");
// Single to/from double precision conversions.
- static const CostTblEntry<MVT> NEONFltDblTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> NEONFltDblTbl[] = {
// Vector fptrunc/fpext conversions.
{ ISD::FP_ROUND, MVT::v2f64, 2 },
{ ISD::FP_EXTEND, MVT::v2f32, 2 },
@@ -192,8 +195,7 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
ISD == ISD::FP_EXTEND)) {
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
- int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl),
- ISD, LT.second);
+ int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second);
if (Idx != -1)
return LT.first * NEONFltDblTbl[Idx].Cost;
}
@@ -207,7 +209,8 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
// Some arithmetic, load and store operations have specific instructions
// to cast up/down their types automatically at no extra cost.
// TODO: Get these tables to know at least what the related operations are.
- static const TypeConversionCostTblEntry<MVT> NEONVectorConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ NEONVectorConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
@@ -283,15 +286,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
};
if (SrcTy.isVector() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup<MVT>(NEONVectorConversionTbl,
- array_lengthof(NEONVectorConversionTbl),
- ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
+ DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
return NEONVectorConversionTbl[Idx].Cost;
}
// Scalar float to integer conversions.
- static const TypeConversionCostTblEntry<MVT> NEONFloatConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ NEONFloatConversionTbl[] = {
{ ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
@@ -314,16 +317,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
};
if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup<MVT>(NEONFloatConversionTbl,
- array_lengthof(NEONFloatConversionTbl),
- ISD, DstTy.getSimpleVT(),
- SrcTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
+ DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
return NEONFloatConversionTbl[Idx].Cost;
}
// Scalar integer to float conversions.
- static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ NEONIntegerConversionTbl[] = {
{ ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
@@ -347,16 +349,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
};
if (SrcTy.isInteger() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup<MVT>(NEONIntegerConversionTbl,
- array_lengthof(NEONIntegerConversionTbl),
- ISD, DstTy.getSimpleVT(),
- SrcTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD,
+ DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
return NEONIntegerConversionTbl[Idx].Cost;
}
// Scalar integer conversion costs.
- static const TypeConversionCostTblEntry<MVT> ARMIntegerConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ ARMIntegerConversionTbl[] = {
// i16 -> i64 requires two dependent operations.
{ ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
@@ -368,11 +369,8 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
};
if (SrcTy.isInteger()) {
- int Idx =
- ConvertCostTableLookup<MVT>(ARMIntegerConversionTbl,
- array_lengthof(ARMIntegerConversionTbl),
- ISD, DstTy.getSimpleVT(),
- SrcTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
+ DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
return ARMIntegerConversionTbl[Idx].Cost;
}
@@ -400,7 +398,8 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
// On NEON a a vector select gets lowered to vbsl.
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
// Lowering of some vector selects is currently far from perfect.
- static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ NEONVectorSelectTbl[] = {
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
{ ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
{ ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
@@ -411,12 +410,13 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
EVT SelCondTy = TLI->getValueType(CondTy);
EVT SelValTy = TLI->getValueType(ValTy);
- int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl,
- array_lengthof(NEONVectorSelectTbl),
- ISD, SelCondTy.getSimpleVT(),
- SelValTy.getSimpleVT());
- if (Idx != -1)
- return NEONVectorSelectTbl[Idx].Cost;
+ if (SelCondTy.isSimple() && SelValTy.isSimple()) {
+ int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
+ SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONVectorSelectTbl[Idx].Cost;
+ }
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
return LT.first;
@@ -425,7 +425,16 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned ARMTTI::getAddressComputationCost(Type *Ty) const {
+unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
+ // Address computations in vectorized code with non-consecutive addresses will
+ // likely result in more instructions compared to scalar code where the
+ // computation can more often be merged into the index mode. The resulting
+ // extra micro-ops can significantly decrease throughput.
+ unsigned NumVectorInstToHideOverhead = 10;
+
+ if (Ty->isVectorTy() && IsComplex)
+ return NumVectorInstToHideOverhead;
+
// In many cases the address computation is not merged into the instruction
// addressing mode.
return 1;
@@ -437,7 +446,7 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
if (Kind != SK_Reverse)
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
- static const CostTblEntry<MVT> NEONShuffleTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
// Reverse shuffle cost one instruction if we are shuffling within a double
// word (vrev) or two if we shuffle a quad word (vrev, vext).
{ ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 },
@@ -453,8 +462,7 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
- int Idx = CostTableLookup<MVT>(NEONShuffleTbl, array_lengthof(NEONShuffleTbl),
- ISD::VECTOR_SHUFFLE, LT.second);
+ int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx == -1)
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
@@ -469,7 +477,7 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueK
const unsigned FunctionCallDivCost = 20;
const unsigned ReciprocalDivCost = 10;
- static const CostTblEntry<MVT> CostTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> CostTbl[] = {
// Division.
// These costs are somewhat random. Choose a cost of 20 to indicate that
// vectorizing devision (added function call) is going to be very expensive.
@@ -513,14 +521,37 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueK
int Idx = -1;
if (ST->hasNEON())
- Idx = CostTableLookup<MVT>(CostTbl, array_lengthof(CostTbl), ISDOpcode,
- LT.second);
+ Idx = CostTableLookup(CostTbl, ISDOpcode, LT.second);
if (Idx != -1)
return LT.first * CostTbl[Idx].Cost;
-
- return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
- Op2Info);
+ unsigned Cost =
+ TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
+
+ // This is somewhat of a hack. The problem that we are facing is that SROA
+ // creates a sequence of shift, and, or instructions to construct values.
+ // These sequences are recognized by the ISel and have zero-cost. Not so for
+ // the vectorized code. Because we have support for v2i64 but not i64 those
+ // sequences look particularily beneficial to vectorize.
+ // To work around this we increase the cost of v2i64 operations to make them
+ // seem less beneficial.
+ if (LT.second == MVT::v2i64 &&
+ Op2Info == TargetTransformInfo::OK_UniformConstantValue)
+ Cost += 4;
+
+ return Cost;
}
+unsigned ARMTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+
+ if (Src->isVectorTy() && Alignment != 16 &&
+ Src->getVectorElementType()->isDoubleTy()) {
+ // Unaligned loads/stores are extremely inefficient.
+ // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
+ return LT.first * 4;
+ }
+ return LT.first;
+}
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 1dd2953b29b9..e3f9e0dc609a 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -7,6 +7,9 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMBuildAttrs.h"
+#include "ARMFPUName.h"
+#include "ARMFeatures.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
@@ -24,6 +27,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -47,11 +51,33 @@ enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
class ARMAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ const MCInstrInfo &MII;
const MCRegisterInfo *MRI;
+ ARMTargetStreamer &getTargetStreamer() {
+ MCTargetStreamer &TS = getParser().getStreamer().getTargetStreamer();
+ return static_cast<ARMTargetStreamer &>(TS);
+ }
+
+ // Unwind directives state
+ SMLoc FnStartLoc;
+ SMLoc CantUnwindLoc;
+ SMLoc PersonalityLoc;
+ SMLoc HandlerDataLoc;
+ int FPReg;
+ void resetUnwindDirectiveParserState() {
+ FnStartLoc = SMLoc();
+ CantUnwindLoc = SMLoc();
+ PersonalityLoc = SMLoc();
+ HandlerDataLoc = SMLoc();
+ FPReg = -1;
+ }
+
// Map of register aliases registers via the .req directive.
StringMap<unsigned> RegisterReqs;
+ bool NextSymbolIsThumb;
+
struct {
ARMCC::CondCodes Cond; // Condition for IT block.
unsigned Mask:4; // Condition mask for instructions.
@@ -76,7 +102,7 @@ class ARMAsmParser : public MCTargetAsmParser {
if (!inITBlock()) return;
// Move to the next instruction in the IT block, if there is one. If not,
// mark the block as done.
- unsigned TZ = CountTrailingZeros_32(ITState.Mask);
+ unsigned TZ = countTrailingZeros(ITState.Mask);
if (++ITState.CurPosition == 5 - TZ)
ITState.CurPosition = ~0U; // Done with the IT block after this.
}
@@ -113,11 +139,22 @@ class ARMAsmParser : public MCTargetAsmParser {
bool parseDirectiveUnreq(SMLoc L);
bool parseDirectiveArch(SMLoc L);
bool parseDirectiveEabiAttr(SMLoc L);
+ bool parseDirectiveCPU(SMLoc L);
+ bool parseDirectiveFPU(SMLoc L);
+ bool parseDirectiveFnStart(SMLoc L);
+ bool parseDirectiveFnEnd(SMLoc L);
+ bool parseDirectiveCantUnwind(SMLoc L);
+ bool parseDirectivePersonality(SMLoc L);
+ bool parseDirectiveHandlerData(SMLoc L);
+ bool parseDirectiveSetFP(SMLoc L);
+ bool parseDirectivePad(SMLoc L);
+ bool parseDirectiveRegSave(SMLoc L, bool IsVector);
StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
bool &CarrySetting, unsigned &ProcessorIMod,
StringRef &ITMask);
- void getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+ bool &CanAcceptCarrySet,
bool &CanAcceptPredicationCode);
bool isThumb() const {
@@ -130,12 +167,25 @@ class ARMAsmParser : public MCTargetAsmParser {
bool isThumbTwo() const {
return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2);
}
+ bool hasThumb() const {
+ return STI.getFeatureBits() & ARM::HasV4TOps;
+ }
bool hasV6Ops() const {
return STI.getFeatureBits() & ARM::HasV6Ops;
}
+ bool hasV6MOps() const {
+ return STI.getFeatureBits() & ARM::HasV6MOps;
+ }
bool hasV7Ops() const {
return STI.getFeatureBits() & ARM::HasV7Ops;
}
+ bool hasV8Ops() const {
+ return STI.getFeatureBits() & ARM::HasV8Ops;
+ }
+ bool hasARM() const {
+ return !(STI.getFeatureBits() & ARM::FeatureNoARM);
+ }
+
void SwitchMode() {
unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
setAvailableFeatures(FB);
@@ -161,6 +211,8 @@ class ARMAsmParser : public MCTargetAsmParser {
SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseMemBarrierOptOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseInstSyncBarrierOptOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseProcIFlagsOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseMSRMaskOperand(
@@ -185,51 +237,19 @@ class ARMAsmParser : public MCTargetAsmParser {
SMLoc &EndLoc);
// Asm Match Converter Methods
- void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtLdWriteBackRegAddrMode2(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtLdWriteBackRegAddrModeImm12(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtStWriteBackRegAddrModeImm12(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtStWriteBackRegAddrMode2(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtStWriteBackRegAddrMode3(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtLdExtTWriteBackImm(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtLdExtTWriteBackReg(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtStExtTWriteBackImm(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtStExtTWriteBackReg(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtLdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtStrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtLdWriteBackRegAddrMode3(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
void cvtThumbMultiply(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtVLDwbFixed(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtVLDwbRegister(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtVSTwbFixed(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtVSTwbRegister(MCInst &Inst,
+ void cvtThumbBranches(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
+
bool validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
bool processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
bool shouldOmitCCOutOperand(StringRef Mnemonic,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-
+ bool shouldOmitPredicateOperand(StringRef Mnemonic,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
public:
enum ARMMatchResultTy {
Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY,
@@ -241,12 +261,13 @@ public:
};
- ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+ const MCInstrInfo &MII)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), FPReg(-1) {
MCAsmParserExtension::Initialize(_Parser);
// Cache the MCRegisterInfo.
- MRI = &getContext().getRegisterInfo();
+ MRI = getContext().getRegisterInfo();
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -254,12 +275,7 @@ public:
// Not in an ITBlock to start with.
ITState.CurPosition = ~0U;
- // Set ELF header flags.
- // FIXME: This should eventually end up somewhere else where more
- // intelligent flag decisions can be made. For now we are just maintaining
- // the statu/parseDirects quo for ARM and setting EF_ARM_EABI_VER5 as the default.
- if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&Parser.getStreamer()))
- MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
+ NextSymbolIsThumb = false;
}
// Implementation of the MCTargetAsmParser interface:
@@ -276,6 +292,8 @@ public:
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out, unsigned &ErrorInfo,
bool MatchingInlineAsm);
+ void onLabelParsed(MCSymbol *Symbol);
+
};
} // end anonymous namespace
@@ -293,6 +311,7 @@ class ARMOperand : public MCParsedAsmOperand {
k_CoprocOption,
k_Immediate,
k_MemBarrierOpt,
+ k_InstSyncBarrierOpt,
k_Memory,
k_PostIndexRegister,
k_MSRMask,
@@ -336,6 +355,10 @@ class ARMOperand : public MCParsedAsmOperand {
ARM_MB::MemBOpt Val;
};
+ struct ISBOptOp {
+ ARM_ISB::InstSyncBOpt Val;
+ };
+
struct IFlagsOp {
ARM_PROC::IFlags Val;
};
@@ -422,6 +445,7 @@ class ARMOperand : public MCParsedAsmOperand {
struct CopOp Cop;
struct CoprocOptionOp CoprocOption;
struct MBOptOp MBOpt;
+ struct ISBOptOp ISBOpt;
struct ITMaskOp ITMask;
struct IFlagsOp IFlags;
struct MMaskOp MMask;
@@ -482,6 +506,8 @@ public:
case k_MemBarrierOpt:
MBOpt = o.MBOpt;
break;
+ case k_InstSyncBarrierOpt:
+ ISBOpt = o.ISBOpt;
case k_Memory:
Memory = o.Memory;
break;
@@ -564,6 +590,11 @@ public:
return MBOpt.Val;
}
+ ARM_ISB::InstSyncBOpt getInstSyncBarrierOpt() const {
+ assert(Kind == k_InstSyncBarrierOpt && "Invalid access!");
+ return ISBOpt.Val;
+ }
+
ARM_PROC::IFlags getProcIFlags() const {
assert(Kind == k_ProcIFlags && "Invalid access!");
return IFlags.Val;
@@ -582,6 +613,56 @@ public:
bool isITMask() const { return Kind == k_ITCondMask; }
bool isITCondCode() const { return Kind == k_CondCode; }
bool isImm() const { return Kind == k_Immediate; }
+ // checks whether this operand is an unsigned offset which fits is a field
+ // of specified width and scaled by a specific number of bits
+ template<unsigned width, unsigned scale>
+ bool isUnsignedOffset() const {
+ if (!isImm()) return false;
+ if (isa<MCSymbolRefExpr>(Imm.Val)) return true;
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Val = CE->getValue();
+ int64_t Align = 1LL << scale;
+ int64_t Max = Align * ((1LL << width) - 1);
+ return ((Val % Align) == 0) && (Val >= 0) && (Val <= Max);
+ }
+ return false;
+ }
+ // checks whether this operand is an signed offset which fits is a field
+ // of specified width and scaled by a specific number of bits
+ template<unsigned width, unsigned scale>
+ bool isSignedOffset() const {
+ if (!isImm()) return false;
+ if (isa<MCSymbolRefExpr>(Imm.Val)) return true;
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Val = CE->getValue();
+ int64_t Align = 1LL << scale;
+ int64_t Max = Align * ((1LL << (width-1)) - 1);
+ int64_t Min = -Align * (1LL << (width-1));
+ return ((Val % Align) == 0) && (Val >= Min) && (Val <= Max);
+ }
+ return false;
+ }
+
+ // checks whether this operand is a memory operand computed as an offset
+ // applied to PC. the offset may have 8 bits of magnitude and is represented
+ // with two bits of shift. textually it may be either [pc, #imm], #imm or
+ // relocable expression...
+ bool isThumbMemPC() const {
+ int64_t Val = 0;
+ if (isImm()) {
+ if (isa<MCSymbolRefExpr>(Imm.Val)) return true;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val);
+ if (!CE) return false;
+ Val = CE->getValue();
+ }
+ else if (isMem()) {
+ if(!Memory.OffsetImm || Memory.OffsetRegNum) return false;
+ if(Memory.BaseRegNum != ARM::PC) return false;
+ Val = Memory.OffsetImm->getValue();
+ }
+ else return false;
+ return ((Val % 4) == 0) && (Val >= 0) && (Val <= 1020);
+ }
bool isFPImm() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -610,13 +691,6 @@ public:
int64_t Value = CE->getValue();
return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
}
- bool isImm0_4() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 5;
- }
bool isImm0_1020s4() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -639,6 +713,13 @@ public:
// explicitly exclude zero. we want that to use the normal 0_508 version.
return ((Value & 3) == 0) && Value > 0 && Value <= 508;
}
+ bool isImm0_239() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 240;
+ }
bool isImm0_255() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -800,6 +881,15 @@ public:
int64_t Value = CE->getValue();
return Value >= 0 && Value < 65536;
}
+ bool isImm256_65535Expr() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // If it's not a constant expression, it'll generate a fixup and be
+ // handled later.
+ if (!CE) return true;
+ int64_t Value = CE->getValue();
+ return Value >= 256 && Value < 65536;
+ }
bool isImm0_65535Expr() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -879,7 +969,8 @@ public:
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
- return ARM_AM::getT2SOImmVal(~Value) != -1;
+ return ARM_AM::getT2SOImmVal(Value) == -1 &&
+ ARM_AM::getT2SOImmVal(~Value) != -1;
}
bool isT2SOImmNeg() const {
if (!isImm()) return false;
@@ -903,6 +994,7 @@ public:
bool isSPRRegList() const { return Kind == k_SPRRegisterList; }
bool isToken() const { return Kind == k_Token; }
bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; }
+ bool isInstSyncBarrierOpt() const { return Kind == k_InstSyncBarrierOpt; }
bool isMem() const { return Kind == k_Memory; }
bool isShifterImm() const { return Kind == k_ShifterImmediate; }
bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; }
@@ -949,7 +1041,7 @@ public:
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Val = CE->getValue();
- return Val > -4096 && Val < 4096;
+ return (Val == INT32_MIN) || (Val > -4096 && Val < 4096);
}
bool isAddrMode3() const {
// If we have an immediate that's not a constant, treat it as a label
@@ -1659,6 +1751,37 @@ public:
Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
}
+ void addUnsignedOffset_b8s2Operands(MCInst &Inst, unsigned N) const {
+ if(const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() >> 2));
+ return;
+ }
+
+ const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val);
+ assert(SR && "Unknown value type!");
+ Inst.addOperand(MCOperand::CreateExpr(SR));
+ }
+
+ void addThumbMemPCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ if (isImm()) {
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (CE) {
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ return;
+ }
+
+ const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val);
+ assert(SR && "Unknown value type!");
+ Inst.addOperand(MCOperand::CreateExpr(SR));
+ return;
+ }
+
+ assert(isMem() && "Unknown value type!");
+ assert(isa<MCConstantExpr>(Memory.OffsetImm) && "Unknown value type!");
+ Inst.addOperand(MCOperand::CreateImm(Memory.OffsetImm->getValue()));
+ }
+
void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The operand is actually a so_imm, but we have its bitwise
@@ -1680,6 +1803,11 @@ public:
Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
}
+ void addInstSyncBarrierOptOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getInstSyncBarrierOpt())));
+ }
+
void addMemNoOffsetOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
@@ -1688,8 +1816,6 @@ public:
void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
int32_t Imm = Memory.OffsetImm->getValue();
- // FIXME: Handle #-0
- if (Imm == INT32_MIN) Imm = 0;
Inst.addOperand(MCOperand::CreateImm(Imm));
}
@@ -2228,21 +2354,24 @@ public:
}
static ARMOperand *
- CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs,
+ CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned> > &Regs,
SMLoc StartLoc, SMLoc EndLoc) {
+ assert (Regs.size() > 0 && "RegList contains no registers?");
KindTy Kind = k_RegisterList;
- if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().first))
+ if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().second))
Kind = k_DPRRegisterList;
else if (ARMMCRegisterClasses[ARM::SPRRegClassID].
- contains(Regs.front().first))
+ contains(Regs.front().second))
Kind = k_SPRRegisterList;
+ // Sort based on the register encoding values.
+ array_pod_sort(Regs.begin(), Regs.end());
+
ARMOperand *Op = new ARMOperand(Kind);
- for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+ for (SmallVectorImpl<std::pair<unsigned, unsigned> >::const_iterator
I = Regs.begin(), E = Regs.end(); I != E; ++I)
- Op->Registers.push_back(I->first);
- array_pod_sort(Op->Registers.begin(), Op->Registers.end());
+ Op->Registers.push_back(I->second);
Op->StartLoc = StartLoc;
Op->EndLoc = EndLoc;
return Op;
@@ -2345,6 +2474,15 @@ public:
return Op;
}
+ static ARMOperand *CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt,
+ SMLoc S) {
+ ARMOperand *Op = new ARMOperand(k_InstSyncBarrierOpt);
+ Op->ISBOpt.Val = Opt;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
ARMOperand *Op = new ARMOperand(k_ProcIFlags);
Op->IFlags.Val = IFlags;
@@ -2397,7 +2535,10 @@ void ARMOperand::print(raw_ostream &OS) const {
getImm()->print(OS);
break;
case k_MemBarrierOpt:
- OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">";
+ OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt(), false) << ">";
+ break;
+ case k_InstSyncBarrierOpt:
+ OS << "<ARM_ISB::" << InstSyncBOptToString(getInstSyncBarrierOpt()) << ">";
break;
case k_Memory:
OS << "<memory "
@@ -2731,8 +2872,9 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
return -1;
switch (Name[2]) {
default: return -1;
- case '0': return 10;
- case '1': return 11;
+ // p10 and p11 are invalid for coproc instructions (reserved for FP/NEON)
+ case '0': return CoprocOp == 'p'? -1: 10;
+ case '1': return CoprocOp == 'p'? -1: 11;
case '2': return 12;
case '3': return 13;
case '4': return 14;
@@ -2910,12 +3052,14 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// The reglist instructions have at most 16 registers, so reserve
// space for that many.
- SmallVector<std::pair<unsigned, SMLoc>, 16> Registers;
+ int EReg = 0;
+ SmallVector<std::pair<unsigned, unsigned>, 16> Registers;
// Allow Q regs and just interpret them as the two D sub-registers.
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
Reg = getDRegFromQReg(Reg);
- Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+ EReg = MRI->getEncodingValue(Reg);
+ Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
++Reg;
}
const MCRegisterClass *RC;
@@ -2929,7 +3073,8 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return Error(RegLoc, "invalid register in register list");
// Store the register.
- Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+ EReg = MRI->getEncodingValue(Reg);
+ Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
// This starts immediately after the first register token in the list,
// so we can see either a comma or a minus (range separator) as a legal
@@ -2959,7 +3104,8 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Add all the registers in the range to the register list.
while (Reg != EndReg) {
Reg = getNextRegister(Reg);
- Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+ EReg = MRI->getEncodingValue(Reg);
+ Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
}
continue;
}
@@ -2992,14 +3138,15 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
continue;
}
// VFP register lists must also be contiguous.
- // It's OK to use the enumeration values directly here rather, as the
- // VFP register classes have the enum sorted properly.
if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] &&
Reg != OldReg + 1)
return Error(RegLoc, "non-contiguous register range");
- Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
- if (isQReg)
- Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc));
+ EReg = MRI->getEncodingValue(Reg);
+ Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+ if (isQReg) {
+ EReg = MRI->getEncodingValue(++Reg);
+ Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+ }
}
if (Parser.getTok().isNot(AsmToken::RCurly))
@@ -3036,7 +3183,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
// There's an optional '#' token here. Normally there wouldn't be, but
// inline assemble puts one in, and it's friendly to accept that.
if (Parser.getTok().is(AsmToken::Hash))
- Parser.Lex(); // Eat the '#'
+ Parser.Lex(); // Eat '#' or '$'.
const MCExpr *LaneIndex;
SMLoc Loc = Parser.getTok().getLoc();
@@ -3334,18 +3481,27 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
.Case("sy", ARM_MB::SY)
.Case("st", ARM_MB::ST)
+ .Case("ld", ARM_MB::LD)
.Case("sh", ARM_MB::ISH)
.Case("ish", ARM_MB::ISH)
.Case("shst", ARM_MB::ISHST)
.Case("ishst", ARM_MB::ISHST)
+ .Case("ishld", ARM_MB::ISHLD)
.Case("nsh", ARM_MB::NSH)
.Case("un", ARM_MB::NSH)
.Case("nshst", ARM_MB::NSHST)
+ .Case("nshld", ARM_MB::NSHLD)
.Case("unst", ARM_MB::NSHST)
.Case("osh", ARM_MB::OSH)
.Case("oshst", ARM_MB::OSHST)
+ .Case("oshld", ARM_MB::OSHLD)
.Default(~0U);
+ // ishld, oshld, nshld and ld are only available from ARMv8.
+ if (!hasV8Ops() && (Opt == ARM_MB::ISHLD || Opt == ARM_MB::OSHLD ||
+ Opt == ARM_MB::NSHLD || Opt == ARM_MB::LD))
+ Opt = ~0U;
+
if (Opt == ~0U)
return MatchOperand_NoMatch;
@@ -3354,7 +3510,7 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Tok.is(AsmToken::Dollar) ||
Tok.is(AsmToken::Integer)) {
if (Parser.getTok().isNot(AsmToken::Integer))
- Parser.Lex(); // Eat the '#'.
+ Parser.Lex(); // Eat '#' or '$'.
SMLoc Loc = Parser.getTok().getLoc();
const MCExpr *MemBarrierID;
@@ -3383,6 +3539,57 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
+/// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ unsigned Opt;
+
+ if (Tok.is(AsmToken::Identifier)) {
+ StringRef OptStr = Tok.getString();
+
+ if (OptStr.equals_lower("sy"))
+ Opt = ARM_ISB::SY;
+ else
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ } else if (Tok.is(AsmToken::Hash) ||
+ Tok.is(AsmToken::Dollar) ||
+ Tok.is(AsmToken::Integer)) {
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ Parser.Lex(); // Eat '#' or '$'.
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ const MCExpr *ISBarrierID;
+ if (getParser().parseExpression(ISBarrierID)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ISBarrierID);
+ if (!CE) {
+ Error(Loc, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+
+ int Val = CE->getValue();
+ if (Val & ~0xf) {
+ Error(Loc, "immediate value out of range");
+ return MatchOperand_ParseFail;
+ }
+
+ Opt = ARM_ISB::RESERVED_0 + Val;
+ } else
+ return MatchOperand_ParseFail;
+
+ Operands.push_back(ARMOperand::CreateInstSyncBarrierOpt(
+ (ARM_ISB::InstSyncBOpt)Opt, S));
+ return MatchOperand_Success;
+}
+
+
/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
@@ -3602,7 +3809,7 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Error(S, "'be' or 'le' operand expected");
return MatchOperand_ParseFail;
}
- int Val = StringSwitch<int>(Tok.getString())
+ int Val = StringSwitch<int>(Tok.getString().lower())
.Case("be", 1)
.Case("le", 0)
.Default(-1);
@@ -3875,7 +4082,7 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Do immediates first, as we always parse those if we have a '#'.
if (Parser.getTok().is(AsmToken::Hash) ||
Parser.getTok().is(AsmToken::Dollar)) {
- Parser.Lex(); // Eat the '#'.
+ Parser.Lex(); // Eat '#' or '$'.
// Explicitly look for a '-', as we need to encode negative zero
// differently.
bool isNegative = Parser.getTok().is(AsmToken::Minus);
@@ -3926,260 +4133,9 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-/// cvtT2LdrdPre - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtT2LdrdPre(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Rt, Rt2
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateReg(0));
- // addr
- ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2);
- // pred
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtT2StrdPre - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtT2StrdPre(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateReg(0));
- // Rt, Rt2
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
- // addr
- ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2);
- // pred
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
-
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
-
- ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2);
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2);
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtLdWriteBackRegAddrMode2(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
-
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
-
- ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3);
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtLdWriteBackRegAddrModeImm12(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
-
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
-
- ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2);
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-
-/// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtStWriteBackRegAddrModeImm12(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2);
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtStWriteBackRegAddrMode2(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3);
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtStWriteBackRegAddrMode3(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3);
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtLdExtTWriteBackImm(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Rt
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- // addr
- ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
- // offset
- ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1);
- // pred
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtLdExtTWriteBackReg(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Rt
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- // addr
- ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
- // offset
- ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2);
- // pred
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtStExtTWriteBackImm - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtStExtTWriteBackImm(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- // Rt
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- // addr
- ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
- // offset
- ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1);
- // pred
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtStExtTWriteBackReg - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtStExtTWriteBackReg(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- // Rt
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- // addr
- ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1);
- // offset
- ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2);
- // pred
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtLdrdPre - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtLdrdPre(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Rt, Rt2
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- // addr
- ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3);
- // pred
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtStrdPre - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtStrdPre(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- // Rt, Rt2
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
- // addr
- ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3);
- // pred
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
-void ARMAsmParser::
-cvtLdWriteBackRegAddrMode3(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3);
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-/// cvtThumbMultiply - Convert parsed operands to MCInst.
-/// Needed here because the Asm Gen Matcher can't handle properly tied operands
-/// when they refer multiple MIOperands inside a single one.
+/// Convert parsed operands to MCInst. Needed here because this instruction
+/// only has two register operands, but multiplication is commutative so
+/// assemblers should accept both "mul rD, rN, rD" and "mul rD, rD, rN".
void ARMAsmParser::
cvtThumbMultiply(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
@@ -4198,59 +4154,62 @@ cvtThumbMultiply(MCInst &Inst,
}
void ARMAsmParser::
-cvtVLDwbFixed(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Vd
- ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- // Vn
- ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
- // pred
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
+cvtThumbBranches(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ int CondOp = -1, ImmOp = -1;
+ switch(Inst.getOpcode()) {
+ case ARM::tB:
+ case ARM::tBcc: CondOp = 1; ImmOp = 2; break;
-void ARMAsmParser::
-cvtVLDwbRegister(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Vd
- ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- // Vn
- ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
- // Vm
- ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
- // pred
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
+ case ARM::t2B:
+ case ARM::t2Bcc: CondOp = 1; ImmOp = 3; break;
-void ARMAsmParser::
-cvtVSTwbFixed(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- // Vn
- ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
- // Vt
- ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
- // pred
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
-}
-
-void ARMAsmParser::
-cvtVSTwbRegister(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Create a writeback register dummy placeholder.
- Inst.addOperand(MCOperand::CreateImm(0));
- // Vn
- ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
- // Vm
- ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
- // Vt
- ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
- // pred
- ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ default: llvm_unreachable("Unexpected instruction in cvtThumbBranches");
+ }
+ // first decide whether or not the branch should be conditional
+ // by looking at it's location relative to an IT block
+ if(inITBlock()) {
+ // inside an IT block we cannot have any conditional branches. any
+ // such instructions needs to be converted to unconditional form
+ switch(Inst.getOpcode()) {
+ case ARM::tBcc: Inst.setOpcode(ARM::tB); break;
+ case ARM::t2Bcc: Inst.setOpcode(ARM::t2B); break;
+ }
+ } else {
+ // outside IT blocks we can only have unconditional branches with AL
+ // condition code or conditional branches with non-AL condition code
+ unsigned Cond = static_cast<ARMOperand*>(Operands[CondOp])->getCondCode();
+ switch(Inst.getOpcode()) {
+ case ARM::tB:
+ case ARM::tBcc:
+ Inst.setOpcode(Cond == ARMCC::AL ? ARM::tB : ARM::tBcc);
+ break;
+ case ARM::t2B:
+ case ARM::t2Bcc:
+ Inst.setOpcode(Cond == ARMCC::AL ? ARM::t2B : ARM::t2Bcc);
+ break;
+ }
+ }
+
+ // now decide on encoding size based on branch target range
+ switch(Inst.getOpcode()) {
+ // classify tB as either t2B or t1B based on range of immediate operand
+ case ARM::tB: {
+ ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]);
+ if(!op->isSignedOffset<11, 1>() && isThumbTwo())
+ Inst.setOpcode(ARM::t2B);
+ break;
+ }
+ // classify tBcc as either t2Bcc or t1Bcc based on range of immediate operand
+ case ARM::tBcc: {
+ ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]);
+ if(!op->isSignedOffset<8, 1>() && isThumbTwo())
+ Inst.setOpcode(ARM::t2Bcc);
+ break;
+ }
+ }
+ ((ARMOperand*)Operands[ImmOp])->addImmOperands(Inst, 1);
+ ((ARMOperand*)Operands[CondOp])->addCondCodeOperands(Inst, 2);
}
/// Parse an ARM memory expression, return false if successful else return true
@@ -4354,7 +4313,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.getTok().is(AsmToken::Dollar) ||
Parser.getTok().is(AsmToken::Integer)) {
if (Parser.getTok().isNot(AsmToken::Integer))
- Parser.Lex(); // Eat the '#'.
+ Parser.Lex(); // Eat '#' or '$'.
E = Parser.getTok().getLoc();
bool isNegative = getParser().getTok().is(AsmToken::Minus);
@@ -4536,7 +4495,7 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
TyOp->getToken() != ".f64"))
return MatchOperand_NoMatch;
- Parser.Lex(); // Eat the '#'.
+ Parser.Lex(); // Eat '#' or '$'.
// Handle negation, as that still comes through as a separate token.
bool isNegative = false;
@@ -4752,11 +4711,14 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" ||
Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" ||
- Mnemonic == "vaclt" || Mnemonic == "vacle" ||
+ Mnemonic == "vaclt" || Mnemonic == "vacle" || Mnemonic == "hlt" ||
Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" ||
Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" ||
Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
- Mnemonic == "fmuls")
+ Mnemonic == "fmuls" || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" ||
+ Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" ||
+ Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" ||
+ Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic.startswith("vsel"))
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
@@ -4836,8 +4798,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
//
// FIXME: It would be nice to autogen this.
void ARMAsmParser::
-getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
- bool &CanAcceptPredicationCode) {
+getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+ bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) {
if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
Mnemonic == "add" || Mnemonic == "adc" ||
@@ -4853,28 +4815,35 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
} else
CanAcceptCarrySet = false;
- if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" ||
- Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" ||
- Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" ||
- Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" ||
- Mnemonic == "dsb" || Mnemonic == "isb" || Mnemonic == "setend" ||
- (Mnemonic == "clrex" && !isThumb()) ||
- (Mnemonic == "nop" && isThumbOne()) ||
- ((Mnemonic == "pld" || Mnemonic == "pli" || Mnemonic == "pldw" ||
- Mnemonic == "ldc2" || Mnemonic == "ldc2l" ||
- Mnemonic == "stc2" || Mnemonic == "stc2l") && !isThumb()) ||
- ((Mnemonic.startswith("rfe") || Mnemonic.startswith("srs")) &&
- !isThumb()) ||
- Mnemonic.startswith("cps") || (Mnemonic == "movs" && isThumbOne())) {
+ if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" ||
+ Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" ||
+ Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic.startswith("crc32") ||
+ Mnemonic.startswith("cps") || Mnemonic.startswith("vsel") ||
+ Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" ||
+ Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" ||
+ Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" ||
+ Mnemonic == "vrintm" || Mnemonic.startswith("aes") ||
+ Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
+ (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) {
+ // These mnemonics are never predicable
CanAcceptPredicationCode = false;
+ } else if (!isThumb()) {
+ // Some instructions are only predicable in Thumb mode
+ CanAcceptPredicationCode
+ = Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" &&
+ Mnemonic != "mcrr2" && Mnemonic != "mrc2" && Mnemonic != "mrrc2" &&
+ Mnemonic != "dmb" && Mnemonic != "dsb" && Mnemonic != "isb" &&
+ Mnemonic != "pld" && Mnemonic != "pli" && Mnemonic != "pldw" &&
+ Mnemonic != "ldc2" && Mnemonic != "ldc2l" &&
+ Mnemonic != "stc2" && Mnemonic != "stc2l" &&
+ !Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs");
+ } else if (isThumbOne()) {
+ if (hasV6MOps())
+ CanAcceptPredicationCode = Mnemonic != "movs";
+ else
+ CanAcceptPredicationCode = Mnemonic != "nop" && Mnemonic != "movs";
} else
CanAcceptPredicationCode = true;
-
- if (isThumb()) {
- if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" ||
- Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp")
- CanAcceptPredicationCode = false;
- }
}
bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
@@ -4929,15 +4898,6 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
static_cast<ARMOperand*>(Operands[5])->isImm()) {
// Nest conditions rather than one big 'if' statement for readability.
//
- // If either register is a high reg, it's either one of the SP
- // variants (handled above) or a 32-bit encoding, so we just
- // check against T3. If the second register is the PC, this is an
- // alternate form of ADR, which uses encoding T4, so check for that too.
- if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
- !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) &&
- static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC &&
- static_cast<ARMOperand*>(Operands[5])->isT2SOImm())
- return false;
// If both registers are low, we're in an IT block, and the immediate is
// in range, we should use encoding T1 instead, which has a cc_out.
if (inITBlock() &&
@@ -4945,6 +4905,11 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) &&
static_cast<ARMOperand*>(Operands[5])->isImm0_7())
return false;
+ // Check against T3. If the second register is the PC, this is an
+ // alternate form of ADR, which uses encoding T4, so check for that too.
+ if (static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC &&
+ static_cast<ARMOperand*>(Operands[5])->isT2SOImm())
+ return false;
// Otherwise, we use encoding T4, which does not have a cc_out
// operand.
@@ -5007,6 +4972,26 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
return false;
}
+bool ARMAsmParser::shouldOmitPredicateOperand(
+ StringRef Mnemonic, SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ // VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON
+ unsigned RegIdx = 3;
+ if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") &&
+ static_cast<ARMOperand *>(Operands[2])->getToken() == ".f32") {
+ if (static_cast<ARMOperand *>(Operands[3])->isToken() &&
+ static_cast<ARMOperand *>(Operands[3])->getToken() == ".f32")
+ RegIdx = 4;
+
+ if (static_cast<ARMOperand *>(Operands[RegIdx])->isReg() &&
+ (ARMMCRegisterClasses[ARM::DPRRegClassID]
+ .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg()) ||
+ ARMMCRegisterClasses[ARM::QPRRegClassID]
+ .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg())))
+ return true;
+ }
+ return false;
+}
+
static bool isDataTypeToken(StringRef Tok) {
return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" ||
Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" ||
@@ -5102,7 +5087,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// the matcher deal with finding the right instruction or generating an
// appropriate error.
bool CanAcceptCarrySet, CanAcceptPredicationCode;
- getMnemonicAcceptInfo(Mnemonic, CanAcceptCarrySet, CanAcceptPredicationCode);
+ getMnemonicAcceptInfo(Mnemonic, Name, CanAcceptCarrySet, CanAcceptPredicationCode);
// If we had a carry-set on an instruction that can't do that, issue an
// error.
@@ -5153,7 +5138,17 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
doesIgnoreDataTypeSuffix(Mnemonic, ExtraToken))
continue;
- if (ExtraToken != ".n") {
+ // For for ARM mode generate an error if the .n qualifier is used.
+ if (ExtraToken == ".n" && !isThumb()) {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start);
+ return Error(Loc, "instruction with .n (narrow) qualifier not allowed in "
+ "arm mode");
+ }
+
+ // The .n qualifier is always discarded as that is what the tables
+ // and matcher expect. In ARM mode the .w qualifier has no effect,
+ // so discard it to avoid errors that can be caused by the matcher.
+ if (ExtraToken != ".n" && (isThumb() || ExtraToken != ".w")) {
SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start);
Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc));
}
@@ -5199,6 +5194,15 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
delete Op;
}
+ // Some instructions have the same mnemonic, but don't always
+ // have a predicate. Distinguish them here and delete the
+ // predicate if needed.
+ if (shouldOmitPredicateOperand(Mnemonic, Operands)) {
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete Op;
+ }
+
// ARM mode 'blx' need special handling, as the register operand version
// is predicable, but the label operand version is not. So, we can't rely
// on the Mnemonic based checking to correctly figure out when to put
@@ -5218,8 +5222,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// expressed as a GPRPair, so we have to manually merge them.
// FIXME: We would really like to be able to tablegen'erate this.
if (!isThumb() && Operands.size() > 4 &&
- (Mnemonic == "ldrexd" || Mnemonic == "strexd")) {
- bool isLoad = (Mnemonic == "ldrexd");
+ (Mnemonic == "ldrexd" || Mnemonic == "strexd" || Mnemonic == "ldaexd" ||
+ Mnemonic == "stlexd")) {
+ bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd");
unsigned Idx = isLoad ? 2 : 3;
ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]);
ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]);
@@ -5250,6 +5255,26 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
}
+ // FIXME: As said above, this is all a pretty gross hack. This instruction
+ // does not fit with other "subs" and tblgen.
+ // Adjust operands of B9.3.19 SUBS PC, LR, #imm (Thumb2) system instruction
+ // so the Mnemonic is the original name "subs" and delete the predicate
+ // operand so it will match the table entry.
+ if (isThumbTwo() && Mnemonic == "sub" && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::PC &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::LR &&
+ static_cast<ARMOperand*>(Operands[5])->isImm()) {
+ ARMOperand *Op0 = static_cast<ARMOperand*>(Operands[0]);
+ Operands.erase(Operands.begin());
+ delete Op0;
+ Operands.insert(Operands.begin(), ARMOperand::CreateToken(Name, NameLoc));
+
+ ARMOperand *Op1 = static_cast<ARMOperand*>(Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete Op1;
+ }
return false;
}
@@ -5283,45 +5308,44 @@ static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) {
return false;
}
-// FIXME: We would really prefer to have MCInstrInfo (the wrapper around
-// the ARMInsts array) instead. Getting that here requires awkward
-// API changes, though. Better way?
-namespace llvm {
-extern const MCInstrDesc ARMInsts[];
-}
-static const MCInstrDesc &getInstDesc(unsigned Opcode) {
- return ARMInsts[Opcode];
+// Return true if instruction has the interesting property of being
+// allowed in IT blocks, but not being predicable.
+static bool instIsBreakpoint(const MCInst &Inst) {
+ return Inst.getOpcode() == ARM::tBKPT ||
+ Inst.getOpcode() == ARM::BKPT ||
+ Inst.getOpcode() == ARM::tHLT ||
+ Inst.getOpcode() == ARM::HLT;
+
}
// FIXME: We would really like to be able to tablegen'erate this.
bool ARMAsmParser::
validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
SMLoc Loc = Operands[0]->getStartLoc();
+
// Check the IT block state first.
- // NOTE: BKPT instruction has the interesting property of being
- // allowed in IT blocks, but not being predicable. It just always
- // executes.
- if (inITBlock() && Inst.getOpcode() != ARM::tBKPT &&
- Inst.getOpcode() != ARM::BKPT) {
- unsigned bit = 1;
+ // NOTE: BKPT and HLT instructions have the interesting property of being
+ // allowed in IT blocks, but not being predicable. They just always execute.
+ if (inITBlock() && !instIsBreakpoint(Inst)) {
+ unsigned Bit = 1;
if (ITState.FirstCond)
ITState.FirstCond = false;
else
- bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
+ Bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
// The instruction must be predicable.
if (!MCID.isPredicable())
return Error(Loc, "instructions in IT block must be predicable");
unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm();
- unsigned ITCond = bit ? ITState.Cond :
+ unsigned ITCond = Bit ? ITState.Cond :
ARMCC::getOppositeCondition(ITState.Cond);
if (Cond != ITCond) {
// Find the condition code Operand to get its SMLoc information.
SMLoc CondLoc;
- for (unsigned i = 1; i < Operands.size(); ++i)
- if (static_cast<ARMOperand*>(Operands[i])->isCondCode())
- CondLoc = Operands[i]->getStartLoc();
+ for (unsigned I = 1; I < Operands.size(); ++I)
+ if (static_cast<ARMOperand*>(Operands[I])->isCondCode())
+ CondLoc = Operands[I]->getStartLoc();
return Error(CondLoc, "incorrect condition in IT block; got '" +
StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) +
"', but expected '" +
@@ -5330,20 +5354,55 @@ validateInstruction(MCInst &Inst,
// Check for non-'al' condition codes outside of the IT block.
} else if (isThumbTwo() && MCID.isPredicable() &&
Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
- ARMCC::AL && Inst.getOpcode() != ARM::tB &&
- Inst.getOpcode() != ARM::t2B)
+ ARMCC::AL && Inst.getOpcode() != ARM::tBcc &&
+ Inst.getOpcode() != ARM::t2Bcc)
return Error(Loc, "predicated instructions must be in IT block");
- switch (Inst.getOpcode()) {
+ const unsigned Opcode = Inst.getOpcode();
+ switch (Opcode) {
case ARM::LDRD:
case ARM::LDRD_PRE:
case ARM::LDRD_POST: {
+ const unsigned RtReg = Inst.getOperand(0).getReg();
+
+ // Rt can't be R14.
+ if (RtReg == ARM::LR)
+ return Error(Operands[3]->getStartLoc(),
+ "Rt can't be R14");
+
+ const unsigned Rt = MRI->getEncodingValue(RtReg);
+ // Rt must be even-numbered.
+ if ((Rt & 1) == 1)
+ return Error(Operands[3]->getStartLoc(),
+ "Rt must be even-numbered");
+
// Rt2 must be Rt + 1.
- unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
- unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ const unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"destination operands must be sequential");
+
+ if (Opcode == ARM::LDRD_PRE || Opcode == ARM::LDRD_POST) {
+ const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(3).getReg());
+ // For addressing modes with writeback, the base register needs to be
+ // different from the destination registers.
+ if (Rn == Rt || Rn == Rt2)
+ return Error(Operands[3]->getStartLoc(),
+ "base register needs to be different from destination "
+ "registers");
+ }
+
+ return false;
+ }
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRD_PRE:
+ case ARM::t2LDRD_POST: {
+ // Rt2 must be different from Rt.
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ if (Rt2 == Rt)
+ return Error(Operands[3]->getStartLoc(),
+ "destination operands can't be identical");
return false;
}
case ARM::STRD: {
@@ -5367,50 +5426,77 @@ validateInstruction(MCInst &Inst,
}
case ARM::SBFX:
case ARM::UBFX: {
- // width must be in range [1, 32-lsb]
- unsigned lsb = Inst.getOperand(2).getImm();
- unsigned widthm1 = Inst.getOperand(3).getImm();
- if (widthm1 >= 32 - lsb)
+ // Width must be in range [1, 32-lsb].
+ unsigned LSB = Inst.getOperand(2).getImm();
+ unsigned Widthm1 = Inst.getOperand(3).getImm();
+ if (Widthm1 >= 32 - LSB)
return Error(Operands[5]->getStartLoc(),
"bitfield width must be in range [1,32-lsb]");
return false;
}
+ // Notionally handles ARM::tLDMIA_UPD too.
case ARM::tLDMIA: {
// If we're parsing Thumb2, the .w variant is available and handles
- // most cases that are normally illegal for a Thumb1 LDM
- // instruction. We'll make the transformation in processInstruction()
- // if necessary.
+ // most cases that are normally illegal for a Thumb1 LDM instruction.
+ // We'll make the transformation in processInstruction() if necessary.
//
// Thumb LDM instructions are writeback iff the base register is not
// in the register list.
unsigned Rn = Inst.getOperand(0).getReg();
- bool hasWritebackToken =
+ bool HasWritebackToken =
(static_cast<ARMOperand*>(Operands[3])->isToken() &&
static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
- bool listContainsBase;
- if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) && !isThumbTwo())
- return Error(Operands[3 + hasWritebackToken]->getStartLoc(),
+ bool ListContainsBase;
+ if (checkLowRegisterList(Inst, 3, Rn, 0, ListContainsBase) && !isThumbTwo())
+ return Error(Operands[3 + HasWritebackToken]->getStartLoc(),
"registers must be in range r0-r7");
// If we should have writeback, then there should be a '!' token.
- if (!listContainsBase && !hasWritebackToken && !isThumbTwo())
+ if (!ListContainsBase && !HasWritebackToken && !isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"writeback operator '!' expected");
// If we should not have writeback, there must not be a '!'. This is
// true even for the 32-bit wide encodings.
- if (listContainsBase && hasWritebackToken)
+ if (ListContainsBase && HasWritebackToken)
return Error(Operands[3]->getStartLoc(),
"writeback operator '!' not allowed when base register "
"in register list");
break;
}
- case ARM::t2LDMIA_UPD: {
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::LDMDA_UPD:
+ // ARM variants loading and updating the same register are only officially
+ // UNPREDICTABLE on v7 upwards. Goodness knows what they did before.
+ if (!hasV7Ops())
+ break;
+ // Fallthrough
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
- return Error(Operands[4]->getStartLoc(),
- "writeback operator '!' not allowed when base register "
- "in register list");
+ return Error(Operands.back()->getStartLoc(),
+ "writeback register not allowed in register list");
break;
}
+ case ARM::sysLDMIA_UPD:
+ case ARM::sysLDMDA_UPD:
+ case ARM::sysLDMDB_UPD:
+ case ARM::sysLDMIB_UPD:
+ if (!listContainsReg(Inst, 3, ARM::PC))
+ return Error(Operands[4]->getStartLoc(),
+ "writeback register only allowed on system LDM "
+ "if PC in register-list");
+ break;
+ case ARM::sysSTMIA_UPD:
+ case ARM::sysSTMDA_UPD:
+ case ARM::sysSTMDB_UPD:
+ case ARM::sysSTMIB_UPD:
+ return Error(Operands[2]->getStartLoc(),
+ "system STM cannot have writeback register");
+ break;
case ARM::tMUL: {
// The second source operand must be the same register as the destination
// operand.
@@ -5434,26 +5520,35 @@ validateInstruction(MCInst &Inst,
// so only issue a diagnostic for thumb1. The instructions will be
// switched to the t2 encodings in processInstruction() if necessary.
case ARM::tPOP: {
- bool listContainsBase;
- if (checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase) &&
+ bool ListContainsBase;
+ if (checkLowRegisterList(Inst, 2, 0, ARM::PC, ListContainsBase) &&
!isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"registers must be in range r0-r7 or pc");
break;
}
case ARM::tPUSH: {
- bool listContainsBase;
- if (checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase) &&
+ bool ListContainsBase;
+ if (checkLowRegisterList(Inst, 2, 0, ARM::LR, ListContainsBase) &&
!isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"registers must be in range r0-r7 or lr");
break;
}
case ARM::tSTMIA_UPD: {
- bool listContainsBase;
- if (checkLowRegisterList(Inst, 4, 0, 0, listContainsBase) && !isThumbTwo())
+ bool ListContainsBase, InvalidLowList;
+ InvalidLowList = checkLowRegisterList(Inst, 4, Inst.getOperand(0).getReg(),
+ 0, ListContainsBase);
+ if (InvalidLowList && !isThumbTwo())
return Error(Operands[4]->getStartLoc(),
"registers must be in range r0-r7");
+
+ // This would be converted to a 32-bit stm, but that's not valid if the
+ // writeback register is in the list.
+ if (InvalidLowList && ListContainsBase)
+ return Error(Operands[4]->getStartLoc(),
+ "writeback operator '!' not allowed when base register "
+ "in register list");
break;
}
case ARM::tADDrSP: {
@@ -5466,6 +5561,28 @@ validateInstruction(MCInst &Inst,
}
break;
}
+ // Final range checking for Thumb unconditional branch instructions.
+ case ARM::tB:
+ if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<11, 1>())
+ return Error(Operands[2]->getStartLoc(), "branch target out of range");
+ break;
+ case ARM::t2B: {
+ int op = (Operands[2]->isImm()) ? 2 : 3;
+ if (!(static_cast<ARMOperand*>(Operands[op]))->isSignedOffset<24, 1>())
+ return Error(Operands[op]->getStartLoc(), "branch target out of range");
+ break;
+ }
+ // Final range checking for Thumb conditional branch instructions.
+ case ARM::tBcc:
+ if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<8, 1>())
+ return Error(Operands[2]->getStartLoc(), "branch target out of range");
+ break;
+ case ARM::t2Bcc: {
+ int Op = (Operands[2]->isImm()) ? 2 : 3;
+ if (!(static_cast<ARMOperand*>(Operands[Op]))->isSignedOffset<20, 1>())
+ return Error(Operands[Op]->getStartLoc(), "branch target out of range");
+ break;
+ }
}
return false;
@@ -5749,7 +5866,9 @@ processInstruction(MCInst &Inst,
case ARM::t2LDRpcrel:
// Select the narrow version if the immediate will fit.
if (Inst.getOperand(1).getImm() > 0 &&
- Inst.getOperand(1).getImm() <= 0xff)
+ Inst.getOperand(1).getImm() <= 0xff &&
+ !(static_cast<ARMOperand*>(Operands[2])->isToken() &&
+ static_cast<ARMOperand*>(Operands[2])->getToken() == ".w"))
Inst.setOpcode(ARM::tLDRpci);
else
Inst.setOpcode(ARM::t2LDRpci);
@@ -7398,11 +7517,10 @@ processInstruction(MCInst &Inst,
MCOperand &MO = Inst.getOperand(1);
unsigned Mask = MO.getImm();
unsigned OrigMask = Mask;
- unsigned TZ = CountTrailingZeros_32(Mask);
+ unsigned TZ = countTrailingZeros(Mask);
if ((Inst.getOperand(0).getImm() & 1) == 0) {
assert(Mask && TZ <= 3 && "illegal IT mask value!");
- for (unsigned i = 3; i != TZ; --i)
- Mask ^= 1 << i;
+ Mask ^= (0xE << TZ) & 0xF;
}
MO.setImm(Mask);
@@ -7503,7 +7621,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
// 16-bit thumb arithmetic instructions either require or preclude the 'S'
// suffix depending on whether they're in an IT block or not.
unsigned Opc = Inst.getOpcode();
- const MCInstrDesc &MCID = getInstDesc(Opc);
+ const MCInstrDesc &MCID = MII.get(Opc);
if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
assert(MCID.hasOptionalDef() &&
"optionally flag setting instruction missing optional def operand");
@@ -7564,12 +7682,22 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
}
- // Some instructions need post-processing to, for example, tweak which
- // encoding is selected. Loop on it while changes happen so the
- // individual transformations can chain off each other. E.g.,
- // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
- while (processInstruction(Inst, Operands))
- ;
+ { // processInstruction() updates inITBlock state, we need to save it away
+ bool wasInITBlock = inITBlock();
+
+ // Some instructions need post-processing to, for example, tweak which
+ // encoding is selected. Loop on it while changes happen so the
+ // individual transformations can chain off each other. E.g.,
+ // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
+ while (processInstruction(Inst, Operands))
+ ;
+
+ // Only after the instruction is fully processed, we can validate it
+ if (wasInITBlock && hasV8Ops() && isThumb() &&
+ !isV8EligibleForIT(&Inst, 2)) {
+ Warning(IDLoc, "deprecated instruction in IT block");
+ }
+ }
// Only move forward at the very end so that everything in validate
// and process gets a consistent answer about whether we're in an IT
@@ -7622,15 +7750,15 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "instruction variant requires ARMv6 or later");
case Match_RequiresThumb2:
return Error(IDLoc, "instruction variant requires Thumb2");
- case Match_ImmRange0_4: {
+ case Match_ImmRange0_15: {
SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
- return Error(ErrorLoc, "immediate operand must be in the range [0,4]");
+ return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
}
- case Match_ImmRange0_15: {
+ case Match_ImmRange0_239: {
SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
- return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
+ return Error(ErrorLoc, "immediate operand must be in the range [0,239]");
}
}
@@ -7658,6 +7786,28 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveArch(DirectiveID.getLoc());
else if (IDVal == ".eabi_attribute")
return parseDirectiveEabiAttr(DirectiveID.getLoc());
+ else if (IDVal == ".cpu")
+ return parseDirectiveCPU(DirectiveID.getLoc());
+ else if (IDVal == ".fpu")
+ return parseDirectiveFPU(DirectiveID.getLoc());
+ else if (IDVal == ".fnstart")
+ return parseDirectiveFnStart(DirectiveID.getLoc());
+ else if (IDVal == ".fnend")
+ return parseDirectiveFnEnd(DirectiveID.getLoc());
+ else if (IDVal == ".cantunwind")
+ return parseDirectiveCantUnwind(DirectiveID.getLoc());
+ else if (IDVal == ".personality")
+ return parseDirectivePersonality(DirectiveID.getLoc());
+ else if (IDVal == ".handlerdata")
+ return parseDirectiveHandlerData(DirectiveID.getLoc());
+ else if (IDVal == ".setfp")
+ return parseDirectiveSetFP(DirectiveID.getLoc());
+ else if (IDVal == ".pad")
+ return parseDirectivePad(DirectiveID.getLoc());
+ else if (IDVal == ".save")
+ return parseDirectiveRegSave(DirectiveID.getLoc(), false);
+ else if (IDVal == ".vsave")
+ return parseDirectiveRegSave(DirectiveID.getLoc(), true);
return true;
}
@@ -7693,6 +7843,9 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
return Error(L, "unexpected token in directive");
Parser.Lex();
+ if (!hasThumb())
+ return Error(L, "target does not support Thumb mode");
+
if (!isThumb())
SwitchMode();
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
@@ -7706,19 +7859,27 @@ bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
return Error(L, "unexpected token in directive");
Parser.Lex();
+ if (!hasARM())
+ return Error(L, "target does not support ARM mode");
+
if (isThumb())
SwitchMode();
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
return false;
}
+void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
+ if (NextSymbolIsThumb) {
+ getParser().getStreamer().EmitThumbFunc(Symbol);
+ NextSymbolIsThumb = false;
+ }
+}
+
/// parseDirectiveThumbFunc
/// ::= .thumbfunc symbol_name
bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
- const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI.hasSubsectionsViaSymbols();
- StringRef Name;
- bool needFuncName = true;
+ const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
+ bool isMachO = MAI->hasSubsectionsViaSymbols();
// Darwin asm has (optionally) function name after .thumb_func direction
// ELF doesn't
@@ -7727,29 +7888,19 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
if (Tok.isNot(AsmToken::EndOfStatement)) {
if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
return Error(L, "unexpected token in .thumb_func directive");
- Name = Tok.getIdentifier();
+ MCSymbol *Func =
+ getParser().getContext().GetOrCreateSymbol(Tok.getIdentifier());
+ getParser().getStreamer().EmitThumbFunc(Func);
Parser.Lex(); // Consume the identifier token.
- needFuncName = false;
+ return false;
}
}
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(L, "unexpected token in directive");
- // Eat the end of statement and any blank lines that follow.
- while (getLexer().is(AsmToken::EndOfStatement))
- Parser.Lex();
-
- // FIXME: assuming function name will be the line following .thumb_func
- // We really should be checking the next symbol definition even if there's
- // stuff in between.
- if (needFuncName) {
- Name = Parser.getTok().getIdentifier();
- }
+ NextSymbolIsThumb = true;
- // Mark symbol as a thumb symbol.
- MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name);
- getParser().getStreamer().EmitThumbFunc(Func);
return false;
}
@@ -7795,10 +7946,16 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
Parser.Lex();
if (Val == 16) {
+ if (!hasThumb())
+ return Error(L, "target does not support Thumb mode");
+
if (!isThumb())
SwitchMode();
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
} else {
+ if (!hasARM())
+ return Error(L, "target does not support ARM mode");
+
if (isThumb())
SwitchMode();
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
@@ -7855,7 +8012,267 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
/// parseDirectiveEabiAttr
/// ::= .eabi_attribute int, int
bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
- return true;
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ return Error(L, "integer expected");
+ int64_t Tag = Parser.getTok().getIntVal();
+ Parser.Lex(); // eat tag integer
+
+ if (Parser.getTok().isNot(AsmToken::Comma))
+ return Error(L, "comma expected");
+ Parser.Lex(); // skip comma
+
+ L = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ return Error(L, "integer expected");
+ int64_t Value = Parser.getTok().getIntVal();
+ Parser.Lex(); // eat value integer
+
+ getTargetStreamer().emitAttribute(Tag, Value);
+ return false;
+}
+
+/// parseDirectiveCPU
+/// ::= .cpu str
+bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
+ StringRef CPU = getParser().parseStringToEndOfStatement().trim();
+ getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU);
+ return false;
+}
+
+/// parseDirectiveFPU
+/// ::= .fpu str
+bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
+ StringRef FPU = getParser().parseStringToEndOfStatement().trim();
+
+ unsigned ID = StringSwitch<unsigned>(FPU)
+#define ARM_FPU_NAME(NAME, ID) .Case(NAME, ARM::ID)
+#include "ARMFPUName.def"
+ .Default(ARM::INVALID_FPU);
+
+ if (ID == ARM::INVALID_FPU)
+ return Error(L, "Unknown FPU name");
+
+ getTargetStreamer().emitFPU(ID);
+ return false;
+}
+
+/// parseDirectiveFnStart
+/// ::= .fnstart
+bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
+ if (FnStartLoc.isValid()) {
+ Error(L, ".fnstart starts before the end of previous one");
+ Error(FnStartLoc, "previous .fnstart starts here");
+ return true;
+ }
+
+ FnStartLoc = L;
+ getTargetStreamer().emitFnStart();
+ return false;
+}
+
+/// parseDirectiveFnEnd
+/// ::= .fnend
+bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) {
+ // Check the ordering of unwind directives
+ if (!FnStartLoc.isValid())
+ return Error(L, ".fnstart must precede .fnend directive");
+
+ // Reset the unwind directives parser state
+ resetUnwindDirectiveParserState();
+ getTargetStreamer().emitFnEnd();
+ return false;
+}
+
+/// parseDirectiveCantUnwind
+/// ::= .cantunwind
+bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) {
+ // Check the ordering of unwind directives
+ CantUnwindLoc = L;
+ if (!FnStartLoc.isValid())
+ return Error(L, ".fnstart must precede .cantunwind directive");
+ if (HandlerDataLoc.isValid()) {
+ Error(L, ".cantunwind can't be used with .handlerdata directive");
+ Error(HandlerDataLoc, ".handlerdata was specified here");
+ return true;
+ }
+ if (PersonalityLoc.isValid()) {
+ Error(L, ".cantunwind can't be used with .personality directive");
+ Error(PersonalityLoc, ".personality was specified here");
+ return true;
+ }
+
+ getTargetStreamer().emitCantUnwind();
+ return false;
+}
+
+/// parseDirectivePersonality
+/// ::= .personality name
+bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
+ // Check the ordering of unwind directives
+ PersonalityLoc = L;
+ if (!FnStartLoc.isValid())
+ return Error(L, ".fnstart must precede .personality directive");
+ if (CantUnwindLoc.isValid()) {
+ Error(L, ".personality can't be used with .cantunwind directive");
+ Error(CantUnwindLoc, ".cantunwind was specified here");
+ return true;
+ }
+ if (HandlerDataLoc.isValid()) {
+ Error(L, ".personality must precede .handlerdata directive");
+ Error(HandlerDataLoc, ".handlerdata was specified here");
+ return true;
+ }
+
+ // Parse the name of the personality routine
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Parser.eatToEndOfStatement();
+ return Error(L, "unexpected input in .personality directive.");
+ }
+ StringRef Name(Parser.getTok().getIdentifier());
+ Parser.Lex();
+
+ MCSymbol *PR = getParser().getContext().GetOrCreateSymbol(Name);
+ getTargetStreamer().emitPersonality(PR);
+ return false;
+}
+
+/// parseDirectiveHandlerData
+/// ::= .handlerdata
+bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) {
+ // Check the ordering of unwind directives
+ HandlerDataLoc = L;
+ if (!FnStartLoc.isValid())
+ return Error(L, ".fnstart must precede .personality directive");
+ if (CantUnwindLoc.isValid()) {
+ Error(L, ".handlerdata can't be used with .cantunwind directive");
+ Error(CantUnwindLoc, ".cantunwind was specified here");
+ return true;
+ }
+
+ getTargetStreamer().emitHandlerData();
+ return false;
+}
+
+/// parseDirectiveSetFP
+/// ::= .setfp fpreg, spreg [, offset]
+bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
+ // Check the ordering of unwind directives
+ if (!FnStartLoc.isValid())
+ return Error(L, ".fnstart must precede .setfp directive");
+ if (HandlerDataLoc.isValid())
+ return Error(L, ".setfp must precede .handlerdata directive");
+
+ // Parse fpreg
+ SMLoc NewFPRegLoc = Parser.getTok().getLoc();
+ int NewFPReg = tryParseRegister();
+ if (NewFPReg == -1)
+ return Error(NewFPRegLoc, "frame pointer register expected");
+
+ // Consume comma
+ if (!Parser.getTok().is(AsmToken::Comma))
+ return Error(Parser.getTok().getLoc(), "comma expected");
+ Parser.Lex(); // skip comma
+
+ // Parse spreg
+ SMLoc NewSPRegLoc = Parser.getTok().getLoc();
+ int NewSPReg = tryParseRegister();
+ if (NewSPReg == -1)
+ return Error(NewSPRegLoc, "stack pointer register expected");
+
+ if (NewSPReg != ARM::SP && NewSPReg != FPReg)
+ return Error(NewSPRegLoc,
+ "register should be either $sp or the latest fp register");
+
+ // Update the frame pointer register
+ FPReg = NewFPReg;
+
+ // Parse offset
+ int64_t Offset = 0;
+ if (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // skip comma
+
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ return Error(Parser.getTok().getLoc(), "'#' expected");
+ }
+ Parser.Lex(); // skip hash token.
+
+ const MCExpr *OffsetExpr;
+ SMLoc ExLoc = Parser.getTok().getLoc();
+ SMLoc EndLoc;
+ if (getParser().parseExpression(OffsetExpr, EndLoc))
+ return Error(ExLoc, "malformed setfp offset");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
+ if (!CE)
+ return Error(ExLoc, "setfp offset must be an immediate");
+
+ Offset = CE->getValue();
+ }
+
+ getTargetStreamer().emitSetFP(static_cast<unsigned>(NewFPReg),
+ static_cast<unsigned>(NewSPReg), Offset);
+ return false;
+}
+
+/// parseDirective
+/// ::= .pad offset
+bool ARMAsmParser::parseDirectivePad(SMLoc L) {
+ // Check the ordering of unwind directives
+ if (!FnStartLoc.isValid())
+ return Error(L, ".fnstart must precede .pad directive");
+ if (HandlerDataLoc.isValid())
+ return Error(L, ".pad must precede .handlerdata directive");
+
+ // Parse the offset
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
+ return Error(Parser.getTok().getLoc(), "'#' expected");
+ }
+ Parser.Lex(); // skip hash token.
+
+ const MCExpr *OffsetExpr;
+ SMLoc ExLoc = Parser.getTok().getLoc();
+ SMLoc EndLoc;
+ if (getParser().parseExpression(OffsetExpr, EndLoc))
+ return Error(ExLoc, "malformed pad offset");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
+ if (!CE)
+ return Error(ExLoc, "pad offset must be an immediate");
+
+ getTargetStreamer().emitPad(CE->getValue());
+ return false;
+}
+
+/// parseDirectiveRegSave
+/// ::= .save { registers }
+/// ::= .vsave { registers }
+bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
+ // Check the ordering of unwind directives
+ if (!FnStartLoc.isValid())
+ return Error(L, ".fnstart must precede .save or .vsave directives");
+ if (HandlerDataLoc.isValid())
+ return Error(L, ".save or .vsave must precede .handlerdata directive");
+
+ // RAII object to make sure parsed operands are deleted.
+ struct CleanupObject {
+ SmallVector<MCParsedAsmOperand *, 1> Operands;
+ ~CleanupObject() {
+ for (unsigned I = 0, E = Operands.size(); I != E; ++I)
+ delete Operands[I];
+ }
+ } CO;
+
+ // Parse the register list
+ if (parseRegisterList(CO.Operands))
+ return true;
+ ARMOperand *Op = (ARMOperand*)CO.Operands[0];
+ if (!IsVector && !Op->isRegList())
+ return Error(L, ".save expects GPR registers");
+ if (IsVector && !Op->isDPRRegList())
+ return Error(L, ".vsave expects DPR registers");
+
+ getTargetStreamer().emitRegSave(Op->getRegList(), IsVector);
+ return false;
}
/// Force static initialization.
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index ac937f35345e..9c7988f8a857 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -65,7 +65,7 @@ namespace {
void setITState(char Firstcond, char Mask) {
// (3 - the number of trailing zeros) is the number of then / else.
unsigned CondBit0 = Firstcond & 1;
- unsigned NumTZ = CountTrailingZeros_32(Mask);
+ unsigned NumTZ = countTrailingZeros<uint8_t>(Mask);
unsigned char CCBits = static_cast<unsigned char>(Firstcond & 0xf);
assert(NumTZ <= 3 && "Invalid IT mask!");
// push condition codes onto the stack the correct order for the pops
@@ -156,12 +156,17 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -236,6 +241,14 @@ static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val,
@@ -268,6 +281,8 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
@@ -308,8 +323,6 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
@@ -332,6 +345,14 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder);
+static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder);
+static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder);
+static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder);
static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
@@ -348,6 +369,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
@@ -402,7 +425,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
"Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!");
// We want to read exactly 4 bytes of data.
- if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ if (Region.readBytes(Address, 4, bytes) == -1) {
Size = 0;
return MCDisassembler::Fail;
}
@@ -431,6 +454,13 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
+ result = decodeInstruction(DecoderTableVFPV832, MI, insn, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ MI.clear();
result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address,
this, STI);
if (result != MCDisassembler::Fail) {
@@ -467,7 +497,22 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
+ result = decodeInstruction(DecoderTablev8NEON32, MI, insn, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ MI.clear();
+ result = decodeInstruction(DecoderTablev8Crypto32, MI, insn, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+ MI.clear();
Size = 0;
return MCDisassembler::Fail;
}
@@ -492,102 +537,9 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
bool isBranch, uint64_t InstSize,
MCInst &MI, const void *Decoder) {
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
- LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
- struct LLVMOpInfo1 SymbolicOp;
- memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
- SymbolicOp.Value = Value;
- void *DisInfo = Dis->getDisInfoBlock();
-
- if (!getOpInfo ||
- !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
- // Clear SymbolicOp.Value from above and also all other fields.
- memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
- LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
- if (!SymbolLookUp)
- return false;
- uint64_t ReferenceType;
- if (isBranch)
- ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
- else
- ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
- const char *ReferenceName;
- uint64_t SymbolValue = 0x00000000ffffffffULL & Value;
- const char *Name = SymbolLookUp(DisInfo, SymbolValue, &ReferenceType,
- Address, &ReferenceName);
- if (Name) {
- SymbolicOp.AddSymbol.Name = Name;
- SymbolicOp.AddSymbol.Present = true;
- }
- // For branches always create an MCExpr so it gets printed as hex address.
- else if (isBranch) {
- SymbolicOp.Value = Value;
- }
- if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
- (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
- if (!Name && !isBranch)
- return false;
- }
-
- MCContext *Ctx = Dis->getMCContext();
- const MCExpr *Add = NULL;
- if (SymbolicOp.AddSymbol.Present) {
- if (SymbolicOp.AddSymbol.Name) {
- StringRef Name(SymbolicOp.AddSymbol.Name);
- MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
- Add = MCSymbolRefExpr::Create(Sym, *Ctx);
- } else {
- Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
- }
- }
-
- const MCExpr *Sub = NULL;
- if (SymbolicOp.SubtractSymbol.Present) {
- if (SymbolicOp.SubtractSymbol.Name) {
- StringRef Name(SymbolicOp.SubtractSymbol.Name);
- MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
- Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
- } else {
- Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
- }
- }
-
- const MCExpr *Off = NULL;
- if (SymbolicOp.Value != 0)
- Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
-
- const MCExpr *Expr;
- if (Sub) {
- const MCExpr *LHS;
- if (Add)
- LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
- else
- LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
- if (Off != 0)
- Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
- else
- Expr = LHS;
- } else if (Add) {
- if (Off != 0)
- Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
- else
- Expr = Add;
- } else {
- if (Off != 0)
- Expr = Off;
- else
- Expr = MCConstantExpr::Create(0, *Ctx);
- }
-
- if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16)
- MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx)));
- else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16)
- MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
- else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
- MI.addOperand(MCOperand::CreateExpr(Expr));
- else
- llvm_unreachable("bad SymbolicOp.VariantKind");
-
- return true;
+ // FIXME: Does it make sense for value to be negative?
+ return Dis->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address, isBranch,
+ /* Offset */ 0, InstSize);
}
/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
@@ -602,17 +554,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value,
const void *Decoder) {
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
- LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
- if (SymbolLookUp) {
- void *DisInfo = Dis->getDisInfoBlock();
- uint64_t ReferenceType;
- ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
- const char *ReferenceName;
- (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
- if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr ||
- ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
- (*Dis->CommentStream) << "literal pool for: " << ReferenceName;
- }
+ Dis->tryAddingPcLoadReferenceComment(Value, Address);
}
// Thumb1 instructions don't have explicit S bits. Rather, they
@@ -751,7 +693,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
"Asked to disassemble in Thumb mode but Subtarget is in ARM mode!");
// We want to read exactly 2 bytes of data.
- if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1) {
+ if (Region.readBytes(Address, 2, bytes) == -1) {
Size = 0;
return MCDisassembler::Fail;
}
@@ -803,7 +745,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
// We want to read exactly 4 bytes of data.
- if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ if (Region.readBytes(Address, 4, bytes) == -1) {
Size = 0;
return MCDisassembler::Fail;
}
@@ -832,23 +774,34 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return result;
}
- MI.clear();
- result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI);
- if (result != MCDisassembler::Fail) {
- Size = 4;
- UpdateThumbVFPPredicate(MI);
- return result;
+ if (fieldFromInstruction(insn32, 28, 4) == 0xE) {
+ MI.clear();
+ result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ UpdateThumbVFPPredicate(MI);
+ return result;
+ }
}
MI.clear();
- result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address,
- this, STI);
+ result = decodeInstruction(DecoderTableVFPV832, MI, insn32, Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
- Check(result, AddThumbPredicate(MI));
return result;
}
+ if (fieldFromInstruction(insn32, 28, 4) == 0xE) {
+ MI.clear();
+ result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ Check(result, AddThumbPredicate(MI));
+ return result;
+ }
+ }
+
if (fieldFromInstruction(insn32, 24, 8) == 0xF9) {
MI.clear();
uint32_t NEONLdStInsn = insn32;
@@ -876,8 +829,31 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Check(result, AddThumbPredicate(MI));
return result;
}
+
+ MI.clear();
+ uint32_t NEONCryptoInsn = insn32;
+ NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24
+ NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
+ NEONCryptoInsn |= 0x12000000; // Set bits 28 and 25
+ result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn,
+ Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ MI.clear();
+ uint32_t NEONv8Insn = insn32;
+ NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
+ result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
}
+ MI.clear();
Size = 0;
return MCDisassembler::Fail;
}
@@ -920,6 +896,21 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
return S;
}
+static DecodeStatus
+DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo == 15)
+ {
+ Inst.addOperand(MCOperand::CreateReg(ARM::APSR_NZCV));
+ return MCDisassembler::Success;
+ }
+
+ Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+ return S;
+}
+
static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 7)
@@ -927,6 +918,26 @@ static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
}
+static const uint16_t GPRPairDecoderTable[] = {
+ ARM::R0_R1, ARM::R2_R3, ARM::R4_R5, ARM::R6_R7,
+ ARM::R8_R9, ARM::R10_R11, ARM::R12_SP
+};
+
+static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo > 13)
+ return MCDisassembler::Fail;
+
+ if ((RegNo & 1) || RegNo == 0xe)
+ S = MCDisassembler::SoftFail;
+
+ unsigned RegisterPair = GPRPairDecoderTable[RegNo/2];
+ Inst.addOperand(MCOperand::CreateReg(RegisterPair));
+ return S;
+}
+
static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
unsigned Register = 0;
@@ -959,8 +970,11 @@ static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
- if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail;
- return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+ DecodeStatus S = MCDisassembler::Success;
+ if (RegNo == 13 || RegNo == 15)
+ S = MCDisassembler::SoftFail;
+ Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+ return S;
}
static const uint16_t SPRDecoderTable[] = {
@@ -1030,7 +1044,7 @@ static const uint16_t QPRDecoderTable[] = {
static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
- if (RegNo > 31)
+ if (RegNo > 31 || (RegNo & 1) != 0)
return MCDisassembler::Fail;
RegNo >>= 1;
@@ -1189,30 +1203,32 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- bool writebackLoad = false;
- unsigned writebackReg = 0;
+ bool NeedDisjointWriteback = false;
+ unsigned WritebackReg = 0;
switch (Inst.getOpcode()) {
- default:
- break;
- case ARM::LDMIA_UPD:
- case ARM::LDMDB_UPD:
- case ARM::LDMIB_UPD:
- case ARM::LDMDA_UPD:
- case ARM::t2LDMIA_UPD:
- case ARM::t2LDMDB_UPD:
- writebackLoad = true;
- writebackReg = Inst.getOperand(0).getReg();
- break;
+ default:
+ break;
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ NeedDisjointWriteback = true;
+ WritebackReg = Inst.getOperand(0).getReg();
+ break;
}
// Empty register lists are not allowed.
- if (CountPopulation_32(Val) == 0) return MCDisassembler::Fail;
+ if (Val == 0) return MCDisassembler::Fail;
for (unsigned i = 0; i < 16; ++i) {
if (Val & (1 << i)) {
if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
return MCDisassembler::Fail;
// Writeback not allowed if Rn is in the target list.
- if (writebackLoad && writebackReg == Inst.end()[-1].getReg())
+ if (NeedDisjointWriteback && WritebackReg == Inst.end()[-1].getReg())
Check(S, MCDisassembler::SoftFail);
}
}
@@ -1227,6 +1243,13 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
unsigned Vd = fieldFromInstruction(Val, 8, 5);
unsigned regs = fieldFromInstruction(Val, 0, 8);
+ // In case of unpredictable encoding, tweak the operands.
+ if (regs == 0 || (Vd + regs) > 32) {
+ regs = Vd + regs > 32 ? 32 - Vd : regs;
+ regs = std::max( 1u, regs);
+ S = MCDisassembler::SoftFail;
+ }
+
if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder)))
return MCDisassembler::Fail;
for (unsigned i = 0; i < (regs - 1); ++i) {
@@ -1242,9 +1265,15 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
DecodeStatus S = MCDisassembler::Success;
unsigned Vd = fieldFromInstruction(Val, 8, 5);
- unsigned regs = fieldFromInstruction(Val, 0, 8);
+ unsigned regs = fieldFromInstruction(Val, 1, 7);
- regs = regs >> 1;
+ // In case of unpredictable encoding, tweak the operands.
+ if (regs == 0 || regs > 16 || (Vd + regs) > 32) {
+ regs = Vd + regs > 32 ? 32 - Vd : regs;
+ regs = std::max( 1u, regs);
+ regs = std::min(16u, regs);
+ S = MCDisassembler::SoftFail;
+ }
if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -1334,6 +1363,11 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
break;
}
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ if ((featureBits & ARM::HasV8Ops) && (coproc != 14))
+ return MCDisassembler::Fail;
+
Inst.addOperand(MCOperand::CreateImm(coproc));
Inst.addOperand(MCOperand::CreateImm(CRd));
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
@@ -1797,6 +1831,29 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
return S;
}
+static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+
+ if (pred == 0xF)
+ return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
+
static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
unsigned Insn,
uint64_t Address, const void *Decoder) {
@@ -1807,6 +1864,7 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
unsigned reglist = fieldFromInstruction(Insn, 0, 16);
if (pred == 0xF) {
+ // Ambiguous with RFE and SRS
switch (Inst.getOpcode()) {
case ARM::LDMDA:
Inst.setOpcode(ARM::RFEDA);
@@ -1857,11 +1915,16 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
Inst.setOpcode(ARM::SRSIB_UPD);
break;
default:
- if (!Check(S, MCDisassembler::Fail)) return MCDisassembler::Fail;
+ return MCDisassembler::Fail;
}
// For stores (which become SRS's, the only operand is the mode.
if (fieldFromInstruction(Insn, 20, 1) == 0) {
+ // Check SRS encoding constraints
+ if (!(fieldFromInstruction(Insn, 22, 1) == 1 &&
+ fieldFromInstruction(Insn, 20, 1) == 0))
+ return MCDisassembler::Fail;
+
Inst.addOperand(
MCOperand::CreateImm(fieldFromInstruction(Insn, 0, 4)));
return S;
@@ -1891,6 +1954,13 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
DecodeStatus S = MCDisassembler::Success;
+ // This decoder is called from multiple location that do not check
+ // the full encoding is valid before they do.
+ if (fieldFromInstruction(Insn, 5, 1) != 0 ||
+ fieldFromInstruction(Insn, 16, 1) != 0 ||
+ fieldFromInstruction(Insn, 20, 8) != 0x10)
+ return MCDisassembler::Fail;
+
// imod == '01' --> UNPREDICTABLE
// NOTE: Even though this is technically UNPREDICTABLE, we choose to
// return failure here. The '01' imod value is unprintable, so there's
@@ -2106,7 +2176,7 @@ DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
unsigned imm10 = fieldFromInstruction(Insn, 16, 10);
unsigned imm11 = fieldFromInstruction(Insn, 0, 11);
unsigned tmp = (S << 23) | (I1 << 22) | (I2 << 21) | (imm10 << 11) | imm11;
- int imm32 = SignExtend32<24>(tmp << 1);
+ int imm32 = SignExtend32<25>(tmp << 1);
if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(imm32));
@@ -2432,6 +2502,57 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
return S;
}
+static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned type = fieldFromInstruction(Insn, 8, 4);
+ unsigned align = fieldFromInstruction(Insn, 4, 2);
+ if (type == 6 && (align & 2)) return MCDisassembler::Fail;
+ if (type == 7 && (align & 2)) return MCDisassembler::Fail;
+ if (type == 10 && align == 3) return MCDisassembler::Fail;
+
+ unsigned load = fieldFromInstruction(Insn, 21, 1);
+ return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder)
+ : DecodeVSTInstruction(Inst, Insn, Address, Decoder);
+}
+
+static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+ if (size == 3) return MCDisassembler::Fail;
+
+ unsigned type = fieldFromInstruction(Insn, 8, 4);
+ unsigned align = fieldFromInstruction(Insn, 4, 2);
+ if (type == 8 && align == 3) return MCDisassembler::Fail;
+ if (type == 9 && align == 3) return MCDisassembler::Fail;
+
+ unsigned load = fieldFromInstruction(Insn, 21, 1);
+ return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder)
+ : DecodeVSTInstruction(Inst, Insn, Address, Decoder);
+}
+
+static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+ if (size == 3) return MCDisassembler::Fail;
+
+ unsigned align = fieldFromInstruction(Insn, 4, 2);
+ if (align & 2) return MCDisassembler::Fail;
+
+ unsigned load = fieldFromInstruction(Insn, 21, 1);
+ return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder)
+ : DecodeVSTInstruction(Inst, Insn, Address, Decoder);
+}
+
+static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+ if (size == 3) return MCDisassembler::Fail;
+
+ unsigned load = fieldFromInstruction(Insn, 21, 1);
+ return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder)
+ : DecodeVSTInstruction(Inst, Insn, Address, Decoder);
+}
+
static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3115,6 +3236,17 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
unsigned Rm = fieldFromInstruction(Val, 2, 4);
unsigned imm = fieldFromInstruction(Val, 0, 2);
+ // Thumb stores cannot use PC as dest register.
+ switch (Inst.getOpcode()) {
+ case ARM::t2STRHs:
+ case ARM::t2STRBs:
+ case ARM::t2STRs:
+ if (Rn == 15)
+ return MCDisassembler::Fail;
+ default:
+ break;
+ }
+
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
@@ -3128,53 +3260,282 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+
+ if (Rn == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRBs:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ break;
+ case ARM::t2LDRHs:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ break;
+ case ARM::t2LDRSHs:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ break;
+ case ARM::t2LDRSBs:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ break;
+ case ARM::t2LDRs:
+ Inst.setOpcode(ARM::t2LDRpci);
+ break;
+ case ARM::t2PLDs:
+ Inst.setOpcode(ARM::t2PLDpci);
+ break;
+ case ARM::t2PLIs:
+ Inst.setOpcode(ARM::t2PLIpci);
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+
+ return DecodeT2LoadLabel(Inst, Insn, Address, Decoder);
+ }
+
+ if (Rt == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRSHs:
+ return MCDisassembler::Fail;
+ case ARM::t2LDRHs:
+ // FIXME: this instruction is only available with MP extensions,
+ // this should be checked first but we don't have access to the
+ // feature bits here.
+ Inst.setOpcode(ARM::t2PLDWs);
+ break;
+ default:
+ break;
+ }
+ }
+
switch (Inst.getOpcode()) {
case ARM::t2PLDs:
case ARM::t2PLDWs:
case ARM::t2PLIs:
break;
- default: {
- unsigned Rt = fieldFromInstruction(Insn, 12, 4);
- if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ default:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ unsigned addrmode = fieldFromInstruction(Insn, 4, 2);
+ addrmode |= fieldFromInstruction(Insn, 0, 4) << 2;
+ addrmode |= fieldFromInstruction(Insn, 16, 4) << 6;
+ if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder)))
return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned U = fieldFromInstruction(Insn, 9, 1);
+ unsigned imm = fieldFromInstruction(Insn, 0, 8);
+ imm |= (U << 8);
+ imm |= (Rn << 9);
+
+ if (Rn == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRi8:
+ Inst.setOpcode(ARM::t2LDRpci);
+ break;
+ case ARM::t2LDRBi8:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ break;
+ case ARM::t2LDRSBi8:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ break;
+ case ARM::t2LDRHi8:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ break;
+ case ARM::t2LDRSHi8:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ break;
+ case ARM::t2PLDi8:
+ Inst.setOpcode(ARM::t2PLDpci);
+ break;
+ case ARM::t2PLIi8:
+ Inst.setOpcode(ARM::t2PLIpci);
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ return DecodeT2LoadLabel(Inst, Insn, Address, Decoder);
+ }
+
+ if (Rt == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRSHi8:
+ return MCDisassembler::Fail;
+ default:
+ break;
}
}
+ switch (Inst.getOpcode()) {
+ case ARM::t2PLDi8:
+ case ARM::t2PLIi8:
+ case ARM::t2PLDWi8:
+ break;
+ default:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
+
+static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
- if (Rn == 0xF) {
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= (Rn << 13);
+
+ if (Rn == 15) {
switch (Inst.getOpcode()) {
- case ARM::t2LDRBs:
- Inst.setOpcode(ARM::t2LDRBpci);
- break;
- case ARM::t2LDRHs:
- Inst.setOpcode(ARM::t2LDRHpci);
- break;
- case ARM::t2LDRSHs:
- Inst.setOpcode(ARM::t2LDRSHpci);
- break;
- case ARM::t2LDRSBs:
- Inst.setOpcode(ARM::t2LDRSBpci);
+ case ARM::t2LDRi12:
+ Inst.setOpcode(ARM::t2LDRpci);
+ break;
+ case ARM::t2LDRHi12:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ break;
+ case ARM::t2LDRSHi12:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ break;
+ case ARM::t2LDRBi12:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ break;
+ case ARM::t2LDRSBi12:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ break;
+ case ARM::t2PLDi12:
+ Inst.setOpcode(ARM::t2PLDpci);
+ break;
+ case ARM::t2PLIi12:
+ Inst.setOpcode(ARM::t2PLIpci);
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ return DecodeT2LoadLabel(Inst, Insn, Address, Decoder);
+ }
+
+ if (Rt == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRSHi12:
+ return MCDisassembler::Fail;
+ case ARM::t2LDRHi12:
+ Inst.setOpcode(ARM::t2PLDi12);
+ break;
+ default:
+ break;
+ }
+ }
+
+ switch (Inst.getOpcode()) {
+ case ARM::t2PLDi12:
+ case ARM::t2PLDWi12:
+ case ARM::t2PLIi12:
+ break;
+ default:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, DecodeT2AddrModeImm12(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
+
+static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 8);
+ imm |= (Rn << 9);
+
+ if (Rn == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRT:
+ Inst.setOpcode(ARM::t2LDRpci);
+ break;
+ case ARM::t2LDRBT:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ break;
+ case ARM::t2LDRHT:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ break;
+ case ARM::t2LDRSBT:
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ break;
+ case ARM::t2LDRSHT:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ return DecodeT2LoadLabel(Inst, Insn, Address, Decoder);
+ }
+
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
+
+static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void* Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned U = fieldFromInstruction(Insn, 23, 1);
+ int imm = fieldFromInstruction(Insn, 0, 12);
+
+ if (Rt == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDRBpci:
+ case ARM::t2LDRHpci:
+ Inst.setOpcode(ARM::t2PLDpci);
break;
- case ARM::t2PLDs:
- Inst.setOpcode(ARM::t2PLDi12);
- Inst.addOperand(MCOperand::CreateReg(ARM::PC));
+ case ARM::t2LDRSBpci:
+ Inst.setOpcode(ARM::t2PLIpci);
break;
- default:
+ case ARM::t2LDRSHpci:
return MCDisassembler::Fail;
+ default:
+ break;
}
+ }
- int imm = fieldFromInstruction(Insn, 0, 12);
- if (!fieldFromInstruction(Insn, 23, 1)) imm *= -1;
- Inst.addOperand(MCOperand::CreateImm(imm));
-
- return S;
+ switch(Inst.getOpcode()) {
+ case ARM::t2PLDpci:
+ case ARM::t2PLIpci:
+ break;
+ default:
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
}
- unsigned addrmode = fieldFromInstruction(Insn, 4, 2);
- addrmode |= fieldFromInstruction(Insn, 0, 4) << 2;
- addrmode |= fieldFromInstruction(Insn, 16, 4) << 6;
- if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder)))
- return MCDisassembler::Fail;
+ if (!U) {
+ // Special case for #-0.
+ if (imm == 0)
+ imm = INT32_MIN;
+ else
+ imm = -imm;
+ }
+ Inst.addOperand(MCOperand::CreateImm(imm));
return S;
}
@@ -3243,6 +3604,21 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
unsigned Rn = fieldFromInstruction(Val, 9, 4);
unsigned imm = fieldFromInstruction(Val, 0, 9);
+ // Thumb stores cannot use PC as dest register.
+ switch (Inst.getOpcode()) {
+ case ARM::t2STRT:
+ case ARM::t2STRBT:
+ case ARM::t2STRHT:
+ case ARM::t2STRi8:
+ case ARM::t2STRHi8:
+ case ARM::t2STRBi8:
+ if (Rn == 15)
+ return MCDisassembler::Fail;
+ break;
+ default:
+ break;
+ }
+
// Some instructions always use an additive offset.
switch (Inst.getOpcode()) {
case ARM::t2LDRT:
@@ -3278,6 +3654,37 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
addr |= Rn << 9;
unsigned load = fieldFromInstruction(Insn, 20, 1);
+ if (Rn == 15) {
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDR_PRE:
+ case ARM::t2LDR_POST:
+ Inst.setOpcode(ARM::t2LDRpci);
+ break;
+ case ARM::t2LDRB_PRE:
+ case ARM::t2LDRB_POST:
+ Inst.setOpcode(ARM::t2LDRBpci);
+ break;
+ case ARM::t2LDRH_PRE:
+ case ARM::t2LDRH_POST:
+ Inst.setOpcode(ARM::t2LDRHpci);
+ break;
+ case ARM::t2LDRSB_PRE:
+ case ARM::t2LDRSB_POST:
+ if (Rt == 15)
+ Inst.setOpcode(ARM::t2PLIpci);
+ else
+ Inst.setOpcode(ARM::t2LDRSBpci);
+ break;
+ case ARM::t2LDRSH_PRE:
+ case ARM::t2LDRSH_POST:
+ Inst.setOpcode(ARM::t2LDRSHpci);
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ return DecodeT2LoadLabel(Inst, Insn, Address, Decoder);
+ }
+
if (!load) {
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3304,6 +3711,17 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
unsigned Rn = fieldFromInstruction(Val, 13, 4);
unsigned imm = fieldFromInstruction(Val, 0, 12);
+ // Thumb stores cannot use PC as dest register.
+ switch (Inst.getOpcode()) {
+ case ARM::t2STRi12:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi12:
+ if (Rn == 15)
+ return MCDisassembler::Fail;
+ default:
+ break;
+ }
+
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(imm));
@@ -3401,6 +3819,11 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
if (Val == 0xA || Val == 0xB)
return MCDisassembler::Fail;
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ if ((featureBits & ARM::HasV8Ops) && !(Val == 14 || Val == 15))
+ return MCDisassembler::Fail;
+
Inst.addOperand(MCOperand::CreateImm(Val));
return MCDisassembler::Success;
}
@@ -3536,6 +3959,15 @@ static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val & ~0xf)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
if (!Val) return MCDisassembler::Fail;
@@ -3551,11 +3983,10 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
unsigned pred = fieldFromInstruction(Insn, 28, 4);
- if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
+ if (Rn == 0xF)
+ S = MCDisassembler::SoftFail;
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
- return MCDisassembler::Fail;
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ if (!Check(S, DecodeGPRPairRegisterClass(Inst, Rt, Address, Decoder)))
return MCDisassembler::Fail;
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3565,7 +3996,6 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder){
DecodeStatus S = MCDisassembler::Success;
@@ -3578,12 +4008,10 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
- if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
- if (Rd == Rn || Rd == Rt || Rd == Rt+1) return MCDisassembler::Fail;
+ if (Rn == 0xF || Rd == Rn || Rd == Rt || Rd == Rt+1)
+ S = MCDisassembler::SoftFail;
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
- return MCDisassembler::Fail;
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder)))
+ if (!Check(S, DecodeGPRPairRegisterClass(Inst, Rt, Address, Decoder)))
return MCDisassembler::Fail;
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -4310,10 +4738,8 @@ static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
S = MCDisassembler::SoftFail;
}
- if (mask == 0x0) {
- mask |= 0x8;
- S = MCDisassembler::SoftFail;
- }
+ if (mask == 0x0)
+ return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(pred));
Inst.addOperand(MCOperand::CreateImm(mask));
@@ -4453,16 +4879,18 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
unsigned imm = fieldFromInstruction(Insn, 16, 6);
unsigned cmode = fieldFromInstruction(Insn, 8, 4);
+ unsigned op = fieldFromInstruction(Insn, 5, 1);
DecodeStatus S = MCDisassembler::Success;
// VMOVv2f32 is ambiguous with these decodings.
if (!(imm & 0x38) && cmode == 0xF) {
+ if (op == 1) return MCDisassembler::Fail;
Inst.setOpcode(ARM::VMOVv2f32);
return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
}
- if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail);
+ if (!(imm & 0x20)) return MCDisassembler::Fail;
if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -4481,16 +4909,18 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
unsigned imm = fieldFromInstruction(Insn, 16, 6);
unsigned cmode = fieldFromInstruction(Insn, 8, 4);
+ unsigned op = fieldFromInstruction(Insn, 5, 1);
DecodeStatus S = MCDisassembler::Success;
// VMOVv4f32 is ambiguous with these decodings.
if (!(imm & 0x38) && cmode == 0xF) {
+ if (op == 1) return MCDisassembler::Fail;
Inst.setOpcode(ARM::VMOVv4f32);
return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
}
- if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail);
+ if (!(imm & 0x20)) return MCDisassembler::Fail;
if (!Check(S, DecodeQPRRegisterClass(Inst, Vd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -4501,15 +4931,6 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder)
-{
- unsigned Imm = fieldFromInstruction(Insn, 0, 3);
- if (Imm > 4) return MCDisassembler::Fail;
- Inst.addOperand(MCOperand::CreateImm(Imm));
- return MCDisassembler::Success;
-}
-
static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 3bcd083a35fb..f89702853d41 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -76,14 +76,23 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot) {
unsigned Opcode = MI->getOpcode();
+ switch(Opcode) {
+
// Check for HINT instructions w/ canonical names.
- if (Opcode == ARM::HINT || Opcode == ARM::t2HINT) {
+ case ARM::HINT:
+ case ARM::tHINT:
+ case ARM::t2HINT:
switch (MI->getOperand(0).getImm()) {
case 0: O << "\tnop"; break;
case 1: O << "\tyield"; break;
case 2: O << "\twfe"; break;
case 3: O << "\twfi"; break;
case 4: O << "\tsev"; break;
+ case 5:
+ if ((getAvailableFeatures() & ARM::HasV8Ops)) {
+ O << "\tsevl";
+ break;
+ } // Fallthrough for non-v8
default:
// Anything else should just print normally.
printInstruction(MI, O);
@@ -95,10 +104,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
O << ".w";
printAnnotation(O, Annot);
return;
- }
// Check for MOVs and print canonical forms, instead.
- if (Opcode == ARM::MOVsr) {
+ case ARM::MOVsr: {
// FIXME: Thumb variants?
const MCOperand &Dst = MI->getOperand(0);
const MCOperand &MO1 = MI->getOperand(1);
@@ -121,7 +129,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- if (Opcode == ARM::MOVsi) {
+ case ARM::MOVsi: {
// FIXME: Thumb variants?
const MCOperand &Dst = MI->getOperand(0);
const MCOperand &MO1 = MI->getOperand(1);
@@ -149,81 +157,91 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
-
// A8.6.123 PUSH
- if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP &&
- MI->getNumOperands() > 5) {
- // Should only print PUSH if there are at least two registers in the list.
- O << '\t' << "push";
- printPredicateOperand(MI, 2, O);
- if (Opcode == ARM::t2STMDB_UPD)
- O << ".w";
- O << '\t';
- printRegisterList(MI, 4, O);
- printAnnotation(O, Annot);
- return;
- }
- if (Opcode == ARM::STR_PRE_IMM && MI->getOperand(2).getReg() == ARM::SP &&
- MI->getOperand(3).getImm() == -4) {
- O << '\t' << "push";
- printPredicateOperand(MI, 4, O);
- O << "\t{";
- printRegName(O, MI->getOperand(1).getReg());
- O << "}";
- printAnnotation(O, Annot);
- return;
- }
+ case ARM::STMDB_UPD:
+ case ARM::t2STMDB_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
+ // Should only print PUSH if there are at least two registers in the list.
+ O << '\t' << "push";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2STMDB_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
+
+ case ARM::STR_PRE_IMM:
+ if (MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(3).getImm() == -4) {
+ O << '\t' << "push";
+ printPredicateOperand(MI, 4, O);
+ O << "\t{";
+ printRegName(O, MI->getOperand(1).getReg());
+ O << "}";
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
// A8.6.122 POP
- if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP &&
- MI->getNumOperands() > 5) {
- // Should only print POP if there are at least two registers in the list.
- O << '\t' << "pop";
- printPredicateOperand(MI, 2, O);
- if (Opcode == ARM::t2LDMIA_UPD)
- O << ".w";
- O << '\t';
- printRegisterList(MI, 4, O);
- printAnnotation(O, Annot);
- return;
- }
- if (Opcode == ARM::LDR_POST_IMM && MI->getOperand(2).getReg() == ARM::SP &&
- MI->getOperand(4).getImm() == 4) {
- O << '\t' << "pop";
- printPredicateOperand(MI, 5, O);
- O << "\t{";
- printRegName(O, MI->getOperand(0).getReg());
- O << "}";
- printAnnotation(O, Annot);
- return;
- }
-
+ case ARM::LDMIA_UPD:
+ case ARM::t2LDMIA_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
+ // Should only print POP if there are at least two registers in the list.
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2LDMIA_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
+
+ case ARM::LDR_POST_IMM:
+ if (MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(4).getImm() == 4) {
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 5, O);
+ O << "\t{";
+ printRegName(O, MI->getOperand(0).getReg());
+ O << "}";
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
// A8.6.355 VPUSH
- if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP) {
- O << '\t' << "vpush";
- printPredicateOperand(MI, 2, O);
- O << '\t';
- printRegisterList(MI, 4, O);
- printAnnotation(O, Annot);
- return;
- }
+ case ARM::VSTMSDB_UPD:
+ case ARM::VSTMDDB_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpush";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
// A8.6.354 VPOP
- if ((Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMDIA_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP) {
- O << '\t' << "vpop";
- printPredicateOperand(MI, 2, O);
- O << '\t';
- printRegisterList(MI, 4, O);
- printAnnotation(O, Annot);
- return;
- }
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMDIA_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpop";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
- if (Opcode == ARM::tLDMIA) {
+ case ARM::tLDMIA: {
bool Writeback = true;
unsigned BaseReg = MI->getOperand(0).getReg();
for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
@@ -243,24 +261,16 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- // Thumb1 NOP
- if (Opcode == ARM::tMOVr && MI->getOperand(0).getReg() == ARM::R8 &&
- MI->getOperand(1).getReg() == ARM::R8) {
- O << "\tnop";
- printPredicateOperand(MI, 2, O);
- printAnnotation(O, Annot);
- return;
- }
-
// Combine 2 GPRs from disassember into a GPRPair to match with instr def.
// ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
// a single GPRPair reg operand is used in the .td file to replace the two
// GPRs. However, when decoding them, the two GRPs cannot be automatically
// expressed as a GPRPair, so we have to manually merge them.
// FIXME: We would really like to be able to tablegen'erate this.
- if (Opcode == ARM::LDREXD || Opcode == ARM::STREXD) {
+ case ARM::LDREXD: case ARM::STREXD:
+ case ARM::LDAEXD: case ARM::STLEXD:
const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID);
- bool isStore = Opcode == ARM::STREXD;
+ bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD;
unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
if (MRC.contains(Reg)) {
MCInst NewMI;
@@ -315,15 +325,29 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- if (MO1.isExpr())
+ if (MO1.isExpr()) {
O << *MO1.getExpr();
- else if (MO1.isImm()) {
- O << markup("<mem:") << "[pc, "
- << markup("<imm:") << "#" << formatImm(MO1.getImm())
- << markup(">]>", "]");
+ return;
}
- else
- llvm_unreachable("Unknown LDR label operand?");
+
+ O << markup("<mem:") << "[pc, ";
+
+ int32_t OffImm = (int32_t)MO1.getImm();
+ bool isSub = OffImm < 0;
+
+ // Special value for #-0. All others are normal.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ if (isSub) {
+ O << markup("<imm:")
+ << "#-" << formatImm(-OffImm)
+ << markup(">");
+ } else {
+ O << markup("<imm:")
+ << "#" << formatImm(OffImm)
+ << markup(">");
+ }
+ O << "]" << markup(">");
}
// so_reg is a 4-operand unit corresponding to register forms of the A5.1
@@ -660,8 +684,8 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
uint32_t v = ~MO.getImm();
- int32_t lsb = CountTrailingZeros_32(v);
- int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
+ int32_t lsb = countTrailingZeros(v);
+ int32_t width = (32 - countLeadingZeros (v)) - lsb;
assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
O << markup("<imm:") << '#' << lsb << markup(">")
<< ", "
@@ -671,7 +695,13 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned val = MI->getOperand(OpNum).getImm();
- O << ARM_MB::MemBOptToString(val);
+ O << ARM_MB::MemBOptToString(val, (getAvailableFeatures() & ARM::HasV8Ops));
+}
+
+void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_ISB::InstSyncBOptToString(val);
}
void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
@@ -889,6 +919,7 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
}
+template<unsigned scale>
void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
@@ -898,7 +929,7 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
return;
}
- int32_t OffImm = (int32_t)MO.getImm();
+ int32_t OffImm = (int32_t)MO.getImm() << scale;
O << markup("<imm:");
if (OffImm == INT32_MIN)
@@ -931,7 +962,7 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
unsigned Mask = MI->getOperand(OpNum).getImm();
unsigned Firstcond = MI->getOperand(OpNum-1).getImm();
unsigned CondBit0 = Firstcond & 1;
- unsigned NumTZ = CountTrailingZeros_32(Mask);
+ unsigned NumTZ = countTrailingZeros(Mask);
assert(NumTZ <= 3 && "Invalid IT mask!");
for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
bool T = ((Mask >> Pos) & 1) == CondBit0;
@@ -1059,6 +1090,7 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
O << "]" << markup(">");
}
+template<bool AlwaysPrintImm0>
void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
@@ -1069,22 +1101,25 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
printRegName(O, MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm();
+ bool isSub = OffImm < 0;
// Don't print +0.
- if (OffImm != 0)
- O << ", ";
- if (OffImm != 0 && UseMarkup)
- O << "<imm:";
if (OffImm == INT32_MIN)
- O << "#-0";
- else if (OffImm < 0)
- O << "#-" << -OffImm;
- else if (OffImm > 0)
- O << "#" << OffImm;
- if (OffImm != 0 && UseMarkup)
- O << ">";
+ OffImm = 0;
+ if (isSub) {
+ O << ", "
+ << markup("<imm:")
+ << "#-" << -OffImm
+ << markup(">");
+ } else if (AlwaysPrintImm0 || OffImm > 0) {
+ O << ", "
+ << markup("<imm:")
+ << "#" << OffImm
+ << markup(">");
+ }
O << "]" << markup(">");
}
+template<bool AlwaysPrintImm0>
void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
@@ -1100,22 +1135,24 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
printRegName(O, MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm();
+ bool isSub = OffImm < 0;
assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
// Don't print +0.
- if (OffImm != 0)
- O << ", ";
- if (OffImm != 0 && UseMarkup)
- O << "<imm:";
if (OffImm == INT32_MIN)
- O << "#-0";
- else if (OffImm < 0)
- O << "#-" << -OffImm;
- else if (OffImm > 0)
- O << "#" << OffImm;
- if (OffImm != 0 && UseMarkup)
- O << ">";
+ OffImm = 0;
+ if (isSub) {
+ O << ", "
+ << markup("<imm:")
+ << "#-" << -OffImm
+ << markup(">");
+ } else if (AlwaysPrintImm0 || OffImm > 0) {
+ O << ", "
+ << markup("<imm:")
+ << "#" << OffImm
+ << markup(">");
+ }
O << "]" << markup(">");
}
@@ -1142,7 +1179,9 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
const MCOperand &MO1 = MI->getOperand(OpNum);
int32_t OffImm = (int32_t)MO1.getImm();
O << ", " << markup("<imm:");
- if (OffImm < 0)
+ if (OffImm == INT32_MIN)
+ O << "#-0";
+ else if (OffImm < 0)
O << "#-" << -OffImm;
else
O << "#" << OffImm;
@@ -1157,19 +1196,14 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
- // Don't print +0.
- if (OffImm != 0)
- O << ", ";
- if (OffImm != 0 && UseMarkup)
- O << "<imm:";
+ O << ", " << markup("<imm:");
if (OffImm == INT32_MIN)
O << "#-0";
else if (OffImm < 0)
O << "#-" << -OffImm;
- else if (OffImm > 0)
+ else
O << "#" << OffImm;
- if (OffImm != 0 && UseMarkup)
- O << ">";
+ O << markup(">");
}
void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 344104e87359..15ae8d1267f7 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -71,10 +71,12 @@ public:
void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printInstSyncBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ template <unsigned scale>
void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -96,8 +98,10 @@ public:
template<bool AlwaysPrintImm0>
void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
+ template<bool AlwaysPrintImm0>
void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
+ template<bool AlwaysPrintImm0>
void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index 62473b2bfdee..b6c85c2e9466 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -140,7 +140,7 @@ namespace ARM_AM {
if ((Imm & ~255U) == 0) return 0;
// Use CTZ to compute the rotate amount.
- unsigned TZ = CountTrailingZeros_32(Imm);
+ unsigned TZ = countTrailingZeros(Imm);
// Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
// not 9.
@@ -153,7 +153,7 @@ namespace ARM_AM {
// For values like 0xF000000F, we should ignore the low 6 bits, then
// retry the hunt.
if (Imm & 63U) {
- unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U);
+ unsigned TZ2 = countTrailingZeros(Imm & ~63U);
unsigned RotAmt2 = TZ2 & ~1;
if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
return (32-RotAmt2)&31; // HW rotates right, not left.
@@ -221,7 +221,7 @@ namespace ARM_AM {
if ((Imm & ~255U) == 0) return 0;
// Use CTZ to compute the shift amount.
- return CountTrailingZeros_32(Imm);
+ return countTrailingZeros(Imm);
}
/// isThumbImmShiftedVal - Return true if the specified value can be obtained
@@ -240,7 +240,7 @@ namespace ARM_AM {
if ((Imm & ~65535U) == 0) return 0;
// Use CTZ to compute the shift amount.
- return CountTrailingZeros_32(Imm);
+ return countTrailingZeros(Imm);
}
/// isThumbImm16ShiftedVal - Return true if the specified value can be
@@ -296,7 +296,7 @@ namespace ARM_AM {
/// encoding is possible.
/// See ARM Reference Manual A6.3.2.
static inline int getT2SOImmValRotateVal(unsigned V) {
- unsigned RotAmt = CountLeadingZeros_32(V);
+ unsigned RotAmt = countLeadingZeros(V);
if (RotAmt >= 24)
return -1;
@@ -328,7 +328,7 @@ namespace ARM_AM {
static inline unsigned getT2SOImmValRotate(unsigned V) {
if ((V & ~255U) == 0) return 0;
// Use CTZ to compute the rotate amount.
- unsigned RotAmt = CountTrailingZeros_32(V);
+ unsigned RotAmt = countTrailingZeros(V);
return (32 - RotAmt) & 31;
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index e66e98567873..5615b808fc11 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -25,9 +25,9 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -152,7 +152,7 @@ static unsigned getRelaxedOpcode(unsigned Op) {
switch (Op) {
default: return Op;
case ARM::tBcc: return ARM::t2Bcc;
- case ARM::tLDRpciASM: return ARM::t2LDRpci;
+ case ARM::tLDRpci: return ARM::t2LDRpci;
case ARM::tADR: return ARM::t2ADR;
case ARM::tB: return ARM::t2B;
}
@@ -419,7 +419,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit;
uint32_t imm10Bits = (offset & 0x1FF800) >> 11;
uint32_t imm11Bits = (offset & 0x000007FF);
-
+
uint32_t Binary = 0;
uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
@@ -434,8 +434,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// four (see fixup_arm_thumb_cp). The 32-bit immediate value is encoded as
// imm32 = SignExtend(S:I1:I2:imm10H:imm10L:00)
// where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S).
- // The value is encoded into disjoint bit positions in the destination
- // opcode. x = unchanged, I = immediate value bit, S = sign extension bit,
+ // The value is encoded into disjoint bit positions in the destination
+ // opcode. x = unchanged, I = immediate value bit, S = sign extension bit,
// J = either J1 or J2 bit, 0 = zero.
//
// BLX: xxxxxSIIIIIIIIII xxJxJIIIIIIIIII0
@@ -450,10 +450,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit;
uint32_t imm10HBits = (offset & 0xFFC00) >> 10;
uint32_t imm10LBits = (offset & 0x3FF);
-
+
uint32_t Binary = 0;
uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
- uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+ uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
((uint16_t)imm10LBits) << 1);
Binary |= secondHalf << 16;
Binary |= firstHalf;
@@ -640,16 +640,16 @@ public:
// FIXME: This should be in a separate file.
class DarwinARMAsmBackend : public ARMAsmBackend {
public:
- const object::mach::CPUSubtypeARM Subtype;
+ const MachO::CPUSubTypeARM Subtype;
DarwinARMAsmBackend(const Target &T, const StringRef TT,
- object::mach::CPUSubtypeARM st)
+ MachO::CPUSubTypeARM st)
: ARMAsmBackend(T, TT), Subtype(st) {
HasDataInCodeSupport = true;
}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
- object::mach::CTM_ARM,
+ MachO::CPU_TYPE_ARM,
Subtype);
}
@@ -660,28 +660,33 @@ public:
} // end anonymous namespace
-MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin()) {
- object::mach::CPUSubtypeARM CS =
- StringSwitch<object::mach::CPUSubtypeARM>(TheTriple.getArchName())
- .Cases("armv4t", "thumbv4t", object::mach::CSARM_V4T)
- .Cases("armv5e", "thumbv5e",object::mach::CSARM_V5TEJ)
- .Cases("armv6", "thumbv6", object::mach::CSARM_V6)
- .Cases("armv6m", "thumbv6m", object::mach::CSARM_V6M)
- .Cases("armv7em", "thumbv7em", object::mach::CSARM_V7EM)
- .Cases("armv7f", "thumbv7f", object::mach::CSARM_V7F)
- .Cases("armv7k", "thumbv7k", object::mach::CSARM_V7K)
- .Cases("armv7m", "thumbv7m", object::mach::CSARM_V7M)
- .Cases("armv7s", "thumbv7s", object::mach::CSARM_V7S)
- .Default(object::mach::CSARM_V7);
+ MachO::CPUSubTypeARM CS =
+ StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
+ .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
+ .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ)
+ .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6)
+ .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M)
+ .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM)
+ .Cases("armv7f", "thumbv7f", MachO::CPU_SUBTYPE_ARM_V7F)
+ .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K)
+ .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M)
+ .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S)
+ .Default(MachO::CPU_SUBTYPE_ARM_V7);
return new DarwinARMAsmBackend(T, TT, CS);
}
- if (TheTriple.isOSWindows())
+#if 0
+ // FIXME: Introduce yet another checker but assert(0).
+ if (TheTriple.isOSBinFormatCOFF())
assert(0 && "Windows not supported on ARM");
+#endif
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
return new ELFARMAsmBackend(T, TT, OSABI);
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index de48a0e0f30a..af939fc19129 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -121,46 +121,89 @@ namespace ARM_MB {
// the option field for memory barrier operations.
enum MemBOpt {
RESERVED_0 = 0,
- RESERVED_1 = 1,
+ OSHLD = 1,
OSHST = 2,
OSH = 3,
RESERVED_4 = 4,
- RESERVED_5 = 5,
+ NSHLD = 5,
NSHST = 6,
NSH = 7,
RESERVED_8 = 8,
- RESERVED_9 = 9,
+ ISHLD = 9,
ISHST = 10,
ISH = 11,
RESERVED_12 = 12,
- RESERVED_13 = 13,
+ LD = 13,
ST = 14,
SY = 15
};
- inline static const char *MemBOptToString(unsigned val) {
+ inline static const char *MemBOptToString(unsigned val, bool HasV8) {
switch (val) {
default: llvm_unreachable("Unknown memory operation");
case SY: return "sy";
case ST: return "st";
- case RESERVED_13: return "#0xd";
+ case LD: return HasV8 ? "ld" : "#0xd";
case RESERVED_12: return "#0xc";
case ISH: return "ish";
case ISHST: return "ishst";
- case RESERVED_9: return "#0x9";
+ case ISHLD: return HasV8 ? "ishld" : "#0x9";
case RESERVED_8: return "#0x8";
case NSH: return "nsh";
case NSHST: return "nshst";
- case RESERVED_5: return "#0x5";
+ case NSHLD: return HasV8 ? "nshld" : "#0x5";
case RESERVED_4: return "#0x4";
case OSH: return "osh";
case OSHST: return "oshst";
- case RESERVED_1: return "#0x1";
+ case OSHLD: return HasV8 ? "oshld" : "#0x1";
case RESERVED_0: return "#0x0";
}
}
} // namespace ARM_MB
+namespace ARM_ISB {
+ enum InstSyncBOpt {
+ RESERVED_0 = 0,
+ RESERVED_1 = 1,
+ RESERVED_2 = 2,
+ RESERVED_3 = 3,
+ RESERVED_4 = 4,
+ RESERVED_5 = 5,
+ RESERVED_6 = 6,
+ RESERVED_7 = 7,
+ RESERVED_8 = 8,
+ RESERVED_9 = 9,
+ RESERVED_10 = 10,
+ RESERVED_11 = 11,
+ RESERVED_12 = 12,
+ RESERVED_13 = 13,
+ RESERVED_14 = 14,
+ SY = 15
+ };
+
+ inline static const char *InstSyncBOptToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unkown memory operation");
+ case RESERVED_0: return "#0x0";
+ case RESERVED_1: return "#0x1";
+ case RESERVED_2: return "#0x2";
+ case RESERVED_3: return "#0x3";
+ case RESERVED_4: return "#0x4";
+ case RESERVED_5: return "#0x5";
+ case RESERVED_6: return "#0x6";
+ case RESERVED_7: return "#0x7";
+ case RESERVED_8: return "#0x8";
+ case RESERVED_9: return "#0x9";
+ case RESERVED_10: return "#0xa";
+ case RESERVED_11: return "#0xb";
+ case RESERVED_12: return "#0xc";
+ case RESERVED_13: return "#0xd";
+ case RESERVED_14: return "#0xe";
+ case SY: return "sy";
+ }
+ }
+} // namespace ARM_ISB
+
/// isARMLowRegister - Returns true if the register is a low register (r0-r7).
///
static inline bool isARMLowRegister(unsigned Reg) {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6c3d2476688c..471897de5c1c 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -13,6 +13,8 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMBuildAttrs.h"
+#include "ARMFPUName.h"
#include "ARMRegisterInfo.h"
#include "ARMUnwindOp.h"
#include "ARMUnwindOpAsm.h"
@@ -27,6 +29,7 @@
#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
@@ -36,7 +39,9 @@
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
using namespace llvm;
@@ -45,8 +50,218 @@ static std::string GetAEABIUnwindPersonalityName(unsigned Index) {
return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str();
}
+static const char *GetFPUName(unsigned ID) {
+ switch (ID) {
+ default:
+ llvm_unreachable("Unknown FPU kind");
+ break;
+#define ARM_FPU_NAME(NAME, ID) case ARM::ID: return NAME;
+#include "ARMFPUName.def"
+ }
+ return NULL;
+}
+
namespace {
+class ARMELFStreamer;
+
+class ARMTargetAsmStreamer : public ARMTargetStreamer {
+ formatted_raw_ostream &OS;
+ MCInstPrinter &InstPrinter;
+
+ virtual void emitFnStart();
+ virtual void emitFnEnd();
+ virtual void emitCantUnwind();
+ virtual void emitPersonality(const MCSymbol *Personality);
+ virtual void emitHandlerData();
+ virtual void emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0);
+ virtual void emitPad(int64_t Offset);
+ virtual void emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector);
+
+ virtual void switchVendor(StringRef Vendor);
+ virtual void emitAttribute(unsigned Attribute, unsigned Value);
+ virtual void emitTextAttribute(unsigned Attribute, StringRef String);
+ virtual void emitFPU(unsigned FPU);
+ virtual void finishAttributeSection();
+
+public:
+ ARMTargetAsmStreamer(formatted_raw_ostream &OS, MCInstPrinter &InstPrinter);
+};
+
+ARMTargetAsmStreamer::ARMTargetAsmStreamer(formatted_raw_ostream &OS,
+ MCInstPrinter &InstPrinter)
+ : OS(OS), InstPrinter(InstPrinter) {}
+void ARMTargetAsmStreamer::emitFnStart() { OS << "\t.fnstart\n"; }
+void ARMTargetAsmStreamer::emitFnEnd() { OS << "\t.fnend\n"; }
+void ARMTargetAsmStreamer::emitCantUnwind() { OS << "\t.cantunwind\n"; }
+void ARMTargetAsmStreamer::emitPersonality(const MCSymbol *Personality) {
+ OS << "\t.personality " << Personality->getName() << '\n';
+}
+void ARMTargetAsmStreamer::emitHandlerData() { OS << "\t.handlerdata\n"; }
+void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
+ int64_t Offset) {
+ OS << "\t.setfp\t";
+ InstPrinter.printRegName(OS, FpReg);
+ OS << ", ";
+ InstPrinter.printRegName(OS, SpReg);
+ if (Offset)
+ OS << ", #" << Offset;
+ OS << '\n';
+}
+void ARMTargetAsmStreamer::emitPad(int64_t Offset) {
+ OS << "\t.pad\t#" << Offset << '\n';
+}
+void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) {
+ assert(RegList.size() && "RegList should not be empty");
+ if (isVector)
+ OS << "\t.vsave\t{";
+ else
+ OS << "\t.save\t{";
+
+ InstPrinter.printRegName(OS, RegList[0]);
+
+ for (unsigned i = 1, e = RegList.size(); i != e; ++i) {
+ OS << ", ";
+ InstPrinter.printRegName(OS, RegList[i]);
+ }
+
+ OS << "}\n";
+}
+void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) {
+}
+void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
+ OS << "\t.eabi_attribute\t" << Attribute << ", " << Twine(Value) << "\n";
+}
+void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef String) {
+ switch (Attribute) {
+ default: llvm_unreachable("Unsupported Text attribute in ASM Mode");
+ case ARMBuildAttrs::CPU_name:
+ OS << "\t.cpu\t" << String.lower() << "\n";
+ break;
+ }
+}
+void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
+ OS << "\t.fpu\t" << GetFPUName(FPU) << "\n";
+}
+void ARMTargetAsmStreamer::finishAttributeSection() {
+}
+
+class ARMTargetELFStreamer : public ARMTargetStreamer {
+private:
+ // This structure holds all attributes, accounting for
+ // their string/numeric value, so we can later emmit them
+ // in declaration order, keeping all in the same vector
+ struct AttributeItem {
+ enum {
+ HiddenAttribute = 0,
+ NumericAttribute,
+ TextAttribute
+ } Type;
+ unsigned Tag;
+ unsigned IntValue;
+ StringRef StringValue;
+
+ static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) {
+ return (LHS.Tag < RHS.Tag);
+ }
+ };
+
+ StringRef CurrentVendor;
+ unsigned FPU;
+ SmallVector<AttributeItem, 64> Contents;
+
+ const MCSection *AttributeSection;
+
+ // FIXME: this should be in a more generic place, but
+ // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf
+ static size_t getULEBSize(int Value) {
+ size_t Size = 0;
+ do {
+ Value >>= 7;
+ Size += sizeof(int8_t); // Is this really necessary?
+ } while (Value);
+ return Size;
+ }
+
+ AttributeItem *getAttributeItem(unsigned Attribute) {
+ for (size_t i = 0; i < Contents.size(); ++i)
+ if (Contents[i].Tag == Attribute)
+ return &Contents[i];
+ return 0;
+ }
+
+ void setAttributeItem(unsigned Attribute, unsigned Value,
+ bool OverwriteExisting) {
+ // Look for existing attribute item
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->IntValue = Value;
+ return;
+ }
+
+ // Create new attribute item
+ AttributeItem Item = {
+ AttributeItem::NumericAttribute,
+ Attribute,
+ Value,
+ StringRef("")
+ };
+ Contents.push_back(Item);
+ }
+
+ void setAttributeItem(unsigned Attribute, StringRef Value,
+ bool OverwriteExisting) {
+ // Look for existing attribute item
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->StringValue = Value;
+ return;
+ }
+
+ // Create new attribute item
+ AttributeItem Item = {
+ AttributeItem::TextAttribute,
+ Attribute,
+ 0,
+ Value
+ };
+ Contents.push_back(Item);
+ }
+
+ void emitFPUDefaultAttributes();
+
+ ARMELFStreamer &getStreamer();
+
+ virtual void emitFnStart();
+ virtual void emitFnEnd();
+ virtual void emitCantUnwind();
+ virtual void emitPersonality(const MCSymbol *Personality);
+ virtual void emitHandlerData();
+ virtual void emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0);
+ virtual void emitPad(int64_t Offset);
+ virtual void emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector);
+
+ virtual void switchVendor(StringRef Vendor);
+ virtual void emitAttribute(unsigned Attribute, unsigned Value);
+ virtual void emitTextAttribute(unsigned Attribute, StringRef String);
+ virtual void emitFPU(unsigned FPU);
+ virtual void finishAttributeSection();
+
+ size_t calculateContentSize() const;
+
+public:
+ ARMTargetELFStreamer()
+ : ARMTargetStreamer(), CurrentVendor("aeabi"), FPU(ARM::INVALID_FPU),
+ AttributeSection(0) {
+ }
+};
+
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
/// the appropriate points in the object files. These symbols are defined in the
/// ARM ELF ABI: infocenter.arm.com/help/topic/com.arm.../IHI0044D_aaelf.pdf.
@@ -61,27 +276,29 @@ namespace {
/// by MachO. Beware!
class ARMELFStreamer : public MCELFStreamer {
public:
- ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
- MCCodeEmitter *Emitter, bool IsThumb)
- : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter),
+ friend class ARMTargetELFStreamer;
+
+ ARMELFStreamer(MCContext &Context, MCTargetStreamer *TargetStreamer,
+ MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool IsThumb)
+ : MCELFStreamer(Context, TargetStreamer, TAB, OS, Emitter),
IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) {
Reset();
}
~ARMELFStreamer() {}
+ virtual void FinishImpl();
+
// ARM exception handling directives
- virtual void EmitFnStart();
- virtual void EmitFnEnd();
- virtual void EmitCantUnwind();
- virtual void EmitPersonality(const MCSymbol *Per);
- virtual void EmitHandlerData();
- virtual void EmitSetFP(unsigned NewFpReg,
- unsigned NewSpReg,
- int64_t Offset = 0);
- virtual void EmitPad(int64_t Offset);
- virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
- bool isVector);
+ void emitFnStart();
+ void emitFnEnd();
+ void emitCantUnwind();
+ void emitPersonality(const MCSymbol *Per);
+ void emitHandlerData();
+ void emitSetFP(unsigned NewFpReg, unsigned NewSpReg, int64_t Offset = 0);
+ void emitPad(int64_t Offset);
+ void emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector);
virtual void ChangeSection(const MCSection *Section,
const MCExpr *Subsection) {
@@ -109,18 +326,17 @@ public:
/// This is one of the functions used to emit data into an ELF section, so the
/// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
/// necessary.
- virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+ virtual void EmitBytes(StringRef Data) {
EmitDataMappingSymbol();
- MCELFStreamer::EmitBytes(Data, AddrSpace);
+ MCELFStreamer::EmitBytes(Data);
}
/// This is one of the functions used to emit data into an ELF section, so the
/// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
/// necessary.
- virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
- unsigned AddrSpace) {
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) {
EmitDataMappingSymbol();
- MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace);
+ MCELFStreamer::EmitValueImpl(Value, Size);
}
virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
@@ -142,10 +358,6 @@ public:
}
}
- static bool classof(const MCStreamer *S) {
- return S->getKind() == SK_ARMELFStreamer;
- }
-
private:
enum ElfMappingSymbol {
EMS_None,
@@ -184,7 +396,7 @@ private:
MCELF::SetType(SD, ELF::STT_NOTYPE);
MCELF::SetBinding(SD, ELF::STB_LOCAL);
SD.setExternal(false);
- Symbol->setSection(*getCurrentSection().first);
+ AssignSection(Symbol, getCurrentSection().first);
const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
Symbol->setVariableValue(Value);
@@ -203,7 +415,8 @@ private:
void Reset();
void EmitPersonalityFixup(StringRef Name);
- void CollectUnwindOpcodes();
+ void FlushPendingOffset();
+ void FlushUnwindOpcodes(bool NoHandlerData);
void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
SectionKind Kind, const MCSymbol &Fn);
@@ -220,17 +433,236 @@ private:
MCSymbol *ExTab;
MCSymbol *FnStart;
const MCSymbol *Personality;
- uint32_t VFPRegSave; // Register mask for {d31-d0}
- uint32_t RegSave; // Register mask for {r15-r0}
- int64_t SPOffset;
- uint16_t FPReg;
- int64_t FPOffset;
+ unsigned PersonalityIndex;
+ unsigned FPReg; // Frame pointer register
+ int64_t FPOffset; // Offset: (final frame pointer) - (initial $sp)
+ int64_t SPOffset; // Offset: (final $sp) - (initial $sp)
+ int64_t PendingOffset; // Offset: (final $sp) - (emitted $sp)
bool UsedFP;
bool CantUnwind;
+ SmallVector<uint8_t, 64> Opcodes;
UnwindOpcodeAssembler UnwindOpAsm;
};
} // end anonymous namespace
+ARMELFStreamer &ARMTargetELFStreamer::getStreamer() {
+ ARMELFStreamer *S = static_cast<ARMELFStreamer *>(Streamer);
+ return *S;
+}
+
+void ARMTargetELFStreamer::emitFnStart() { getStreamer().emitFnStart(); }
+void ARMTargetELFStreamer::emitFnEnd() { getStreamer().emitFnEnd(); }
+void ARMTargetELFStreamer::emitCantUnwind() { getStreamer().emitCantUnwind(); }
+void ARMTargetELFStreamer::emitPersonality(const MCSymbol *Personality) {
+ getStreamer().emitPersonality(Personality);
+}
+void ARMTargetELFStreamer::emitHandlerData() {
+ getStreamer().emitHandlerData();
+}
+void ARMTargetELFStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
+ int64_t Offset) {
+ getStreamer().emitSetFP(FpReg, SpReg, Offset);
+}
+void ARMTargetELFStreamer::emitPad(int64_t Offset) {
+ getStreamer().emitPad(Offset);
+}
+void ARMTargetELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) {
+ getStreamer().emitRegSave(RegList, isVector);
+}
+void ARMTargetELFStreamer::switchVendor(StringRef Vendor) {
+ assert(!Vendor.empty() && "Vendor cannot be empty.");
+
+ if (CurrentVendor == Vendor)
+ return;
+
+ if (!CurrentVendor.empty())
+ finishAttributeSection();
+
+ assert(Contents.empty() &&
+ ".ARM.attributes should be flushed before changing vendor");
+ CurrentVendor = Vendor;
+
+}
+void ARMTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
+ setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
+}
+void ARMTargetELFStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef Value) {
+ setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
+}
+void ARMTargetELFStreamer::emitFPU(unsigned Value) {
+ FPU = Value;
+}
+void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
+ switch (FPU) {
+ case ARM::VFP:
+ case ARM::VFPV2:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv2,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::VFPV3:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv3A,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::VFPV3_D16:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv3B,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::VFPV4:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv4A,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::VFPV4_D16:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv4B,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FP_ARMV8:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPARMv8A,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::NEON:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv3A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeon,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::NEON_VFPV4:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv4A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeon2,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::NEON_FP_ARMV8:
+ case ARM::CRYPTO_NEON_FP_ARMV8:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPARMv8A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeonARMv8,
+ /* OverwriteExisting= */ false);
+ break;
+
+ default:
+ report_fatal_error("Unknown FPU: " + Twine(FPU));
+ break;
+ }
+}
+size_t ARMTargetELFStreamer::calculateContentSize() const {
+ size_t Result = 0;
+ for (size_t i = 0; i < Contents.size(); ++i) {
+ AttributeItem item = Contents[i];
+ switch (item.Type) {
+ case AttributeItem::HiddenAttribute:
+ break;
+ case AttributeItem::NumericAttribute:
+ Result += getULEBSize(item.Tag);
+ Result += getULEBSize(item.IntValue);
+ break;
+ case AttributeItem::TextAttribute:
+ Result += getULEBSize(item.Tag);
+ Result += item.StringValue.size() + 1; // string + '\0'
+ break;
+ }
+ }
+ return Result;
+}
+void ARMTargetELFStreamer::finishAttributeSection() {
+ // <format-version>
+ // [ <section-length> "vendor-name"
+ // [ <file-tag> <size> <attribute>*
+ // | <section-tag> <size> <section-number>* 0 <attribute>*
+ // | <symbol-tag> <size> <symbol-number>* 0 <attribute>*
+ // ]+
+ // ]*
+
+ if (FPU != ARM::INVALID_FPU)
+ emitFPUDefaultAttributes();
+
+ if (Contents.empty())
+ return;
+
+ std::sort(Contents.begin(), Contents.end(), AttributeItem::LessTag);
+
+ ARMELFStreamer &Streamer = getStreamer();
+
+ // Switch to .ARM.attributes section
+ if (AttributeSection) {
+ Streamer.SwitchSection(AttributeSection);
+ } else {
+ AttributeSection =
+ Streamer.getContext().getELFSection(".ARM.attributes",
+ ELF::SHT_ARM_ATTRIBUTES,
+ 0,
+ SectionKind::getMetadata());
+ Streamer.SwitchSection(AttributeSection);
+
+ // Format version
+ Streamer.EmitIntValue(0x41, 1);
+ }
+
+ // Vendor size + Vendor name + '\0'
+ const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
+
+ // Tag + Tag Size
+ const size_t TagHeaderSize = 1 + 4;
+
+ const size_t ContentsSize = calculateContentSize();
+
+ Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
+ Streamer.EmitBytes(CurrentVendor);
+ Streamer.EmitIntValue(0, 1); // '\0'
+
+ Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
+ Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
+
+ // Size should have been accounted for already, now
+ // emit each field as its type (ULEB or String)
+ for (size_t i = 0; i < Contents.size(); ++i) {
+ AttributeItem item = Contents[i];
+ Streamer.EmitULEB128IntValue(item.Tag);
+ switch (item.Type) {
+ default: llvm_unreachable("Invalid attribute type");
+ case AttributeItem::NumericAttribute:
+ Streamer.EmitULEB128IntValue(item.IntValue);
+ break;
+ case AttributeItem::TextAttribute:
+ Streamer.EmitBytes(item.StringValue.upper());
+ Streamer.EmitIntValue(0, 1); // '\0'
+ break;
+ }
+ }
+
+ Contents.clear();
+ FPU = ARM::INVALID_FPU;
+}
+
+void ARMELFStreamer::FinishImpl() {
+ MCTargetStreamer &TS = getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+ ATS.finishAttributeSection();
+
+ MCELFStreamer::FinishImpl();
+}
+
inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
unsigned Type,
unsigned Flags,
@@ -279,81 +711,37 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
}
void ARMELFStreamer::Reset() {
- const MCRegisterInfo &MRI = getContext().getRegisterInfo();
-
ExTab = NULL;
FnStart = NULL;
Personality = NULL;
- VFPRegSave = 0;
- RegSave = 0;
- FPReg = MRI.getEncodingValue(ARM::SP);
+ PersonalityIndex = NUM_PERSONALITY_INDEX;
+ FPReg = ARM::SP;
FPOffset = 0;
SPOffset = 0;
+ PendingOffset = 0;
UsedFP = false;
CantUnwind = false;
+ Opcodes.clear();
UnwindOpAsm.Reset();
}
-// Add the R_ARM_NONE fixup at the same position
-void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
- const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name);
-
- const MCSymbolRefExpr *PersonalityRef =
- MCSymbolRefExpr::Create(PersonalitySym,
- MCSymbolRefExpr::VK_ARM_NONE,
- getContext());
-
- AddValueSymbols(PersonalityRef);
- MCDataFragment *DF = getOrCreateDataFragment();
- DF->getFixups().push_back(
- MCFixup::Create(DF->getContents().size(), PersonalityRef,
- MCFixup::getKindForSize(4, false)));
-}
-
-void ARMELFStreamer::CollectUnwindOpcodes() {
- if (UsedFP) {
- UnwindOpAsm.EmitSetFP(FPReg);
- UnwindOpAsm.EmitSPOffset(-FPOffset);
- } else {
- UnwindOpAsm.EmitSPOffset(SPOffset);
- }
- UnwindOpAsm.EmitVFPRegSave(VFPRegSave);
- UnwindOpAsm.EmitRegSave(RegSave);
- UnwindOpAsm.Finalize();
-}
-
-void ARMELFStreamer::EmitFnStart() {
+void ARMELFStreamer::emitFnStart() {
assert(FnStart == 0);
FnStart = getContext().CreateTempSymbol();
EmitLabel(FnStart);
}
-void ARMELFStreamer::EmitFnEnd() {
+void ARMELFStreamer::emitFnEnd() {
assert(FnStart && ".fnstart must preceeds .fnend");
// Emit unwind opcodes if there is no .handlerdata directive
- if (!ExTab && !CantUnwind) {
- CollectUnwindOpcodes();
-
- unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
- if (PersonalityIndex == AEABI_UNWIND_CPP_PR1 ||
- PersonalityIndex == AEABI_UNWIND_CPP_PR2) {
- // For the __aeabi_unwind_cpp_pr1 and __aeabi_unwind_cpp_pr2, we have to
- // emit the unwind opcodes in the corresponding ".ARM.extab" section, and
- // then emit a reference to these unwind opcodes in the second word of
- // the exception index table entry.
- SwitchToExTabSection(*FnStart);
- ExTab = getContext().CreateTempSymbol();
- EmitLabel(ExTab);
- EmitBytes(UnwindOpAsm.data(), 0);
- }
- }
+ if (!ExTab && !CantUnwind)
+ FlushUnwindOpcodes(true);
// Emit the exception index table entry
SwitchToExIdxSection(*FnStart);
- unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
if (PersonalityIndex < NUM_PERSONALITY_INDEX)
EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex));
@@ -362,37 +750,80 @@ void ARMELFStreamer::EmitFnEnd() {
MCSymbolRefExpr::VK_ARM_PREL31,
getContext());
- EmitValue(FnStartRef, 4, 0);
+ EmitValue(FnStartRef, 4);
if (CantUnwind) {
- EmitIntValue(EXIDX_CANTUNWIND, 4, 0);
+ EmitIntValue(EXIDX_CANTUNWIND, 4);
} else if (ExTab) {
// Emit a reference to the unwind opcodes in the ".ARM.extab" section.
const MCSymbolRefExpr *ExTabEntryRef =
MCSymbolRefExpr::Create(ExTab,
MCSymbolRefExpr::VK_ARM_PREL31,
getContext());
- EmitValue(ExTabEntryRef, 4, 0);
+ EmitValue(ExTabEntryRef, 4);
} else {
// For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in
// the second word of exception index table entry. The size of the unwind
// opcodes should always be 4 bytes.
assert(PersonalityIndex == AEABI_UNWIND_CPP_PR0 &&
"Compact model must use __aeabi_cpp_unwind_pr0 as personality");
- assert(UnwindOpAsm.size() == 4u &&
+ assert(Opcodes.size() == 4u &&
"Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4");
- EmitBytes(UnwindOpAsm.data(), 0);
+ EmitBytes(StringRef(reinterpret_cast<const char*>(Opcodes.data()),
+ Opcodes.size()));
}
+ // Switch to the section containing FnStart
+ SwitchSection(&FnStart->getSection());
+
// Clean exception handling frame information
Reset();
}
-void ARMELFStreamer::EmitCantUnwind() {
- CantUnwind = true;
+void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; }
+
+// Add the R_ARM_NONE fixup at the same position
+void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
+ const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name);
+
+ const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::Create(
+ PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext());
+
+ AddValueSymbols(PersonalityRef);
+ MCDataFragment *DF = getOrCreateDataFragment();
+ DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(),
+ PersonalityRef,
+ MCFixup::getKindForSize(4, false)));
+}
+
+void ARMELFStreamer::FlushPendingOffset() {
+ if (PendingOffset != 0) {
+ UnwindOpAsm.EmitSPOffset(-PendingOffset);
+ PendingOffset = 0;
+ }
}
-void ARMELFStreamer::EmitHandlerData() {
+void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) {
+ // Emit the unwind opcode to restore $sp.
+ if (UsedFP) {
+ const MCRegisterInfo *MRI = getContext().getRegisterInfo();
+ int64_t LastRegSaveSPOffset = SPOffset - PendingOffset;
+ UnwindOpAsm.EmitSPOffset(LastRegSaveSPOffset - FPOffset);
+ UnwindOpAsm.EmitSetSP(MRI->getEncodingValue(FPReg));
+ } else {
+ FlushPendingOffset();
+ }
+
+ // Finalize the unwind opcode sequence
+ UnwindOpAsm.Finalize(PersonalityIndex, Opcodes);
+
+ // For compact model 0, we have to emit the unwind opcodes in the .ARM.exidx
+ // section. Thus, we don't have to create an entry in the .ARM.extab
+ // section.
+ if (NoHandlerData && PersonalityIndex == AEABI_UNWIND_CPP_PR0)
+ return;
+
+ // Switch to .ARM.extab section.
SwitchToExTabSection(*FnStart);
// Create .ARM.extab label for offset in .ARM.exidx
@@ -400,73 +831,117 @@ void ARMELFStreamer::EmitHandlerData() {
ExTab = getContext().CreateTempSymbol();
EmitLabel(ExTab);
- // Emit Personality
- assert(Personality && ".personality directive must preceed .handlerdata");
-
- const MCSymbolRefExpr *PersonalityRef =
- MCSymbolRefExpr::Create(Personality,
- MCSymbolRefExpr::VK_ARM_PREL31,
- getContext());
+ // Emit personality
+ if (Personality) {
+ const MCSymbolRefExpr *PersonalityRef =
+ MCSymbolRefExpr::Create(Personality,
+ MCSymbolRefExpr::VK_ARM_PREL31,
+ getContext());
- EmitValue(PersonalityRef, 4, 0);
+ EmitValue(PersonalityRef, 4);
+ }
// Emit unwind opcodes
- CollectUnwindOpcodes();
- EmitBytes(UnwindOpAsm.data(), 0);
+ EmitBytes(StringRef(reinterpret_cast<const char *>(Opcodes.data()),
+ Opcodes.size()));
+
+ // According to ARM EHABI section 9.2, if the __aeabi_unwind_cpp_pr1() or
+ // __aeabi_unwind_cpp_pr2() is used, then the handler data must be emitted
+ // after the unwind opcodes. The handler data consists of several 32-bit
+ // words, and should be terminated by zero.
+ //
+ // In case that the .handlerdata directive is not specified by the
+ // programmer, we should emit zero to terminate the handler data.
+ if (NoHandlerData && !Personality)
+ EmitIntValue(0, 4);
}
-void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
+void ARMELFStreamer::emitHandlerData() { FlushUnwindOpcodes(false); }
+
+void ARMELFStreamer::emitPersonality(const MCSymbol *Per) {
Personality = Per;
UnwindOpAsm.setPersonality(Per);
}
-void ARMELFStreamer::EmitSetFP(unsigned NewFPReg,
- unsigned NewSPReg,
+void ARMELFStreamer::emitSetFP(unsigned NewFPReg, unsigned NewSPReg,
int64_t Offset) {
- assert(SPOffset == 0 &&
- "Current implementation assumes .setfp precedes .pad");
-
- const MCRegisterInfo &MRI = getContext().getRegisterInfo();
-
- uint16_t NewFPRegEncVal = MRI.getEncodingValue(NewFPReg);
-#ifndef NDEBUG
- uint16_t NewSPRegEncVal = MRI.getEncodingValue(NewSPReg);
-#endif
-
- assert((NewSPReg == ARM::SP || NewSPRegEncVal == FPReg) &&
+ assert((NewSPReg == ARM::SP || NewSPReg == FPReg) &&
"the operand of .setfp directive should be either $sp or $fp");
UsedFP = true;
- FPReg = NewFPRegEncVal;
- FPOffset = Offset;
-}
+ FPReg = NewFPReg;
-void ARMELFStreamer::EmitPad(int64_t Offset) {
- SPOffset += Offset;
+ if (NewSPReg == ARM::SP)
+ FPOffset = SPOffset + Offset;
+ else
+ FPOffset += Offset;
}
-void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
- bool IsVector) {
- const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+void ARMELFStreamer::emitPad(int64_t Offset) {
+ // Track the change of the $sp offset
+ SPOffset -= Offset;
-#ifndef NDEBUG
- unsigned Max = IsVector ? 32 : 16;
-#endif
- uint32_t &RegMask = IsVector ? VFPRegSave : RegSave;
+ // To squash multiple .pad directives, we should delay the unwind opcode
+ // until the .save, .vsave, .handlerdata, or .fnend directives.
+ PendingOffset -= Offset;
+}
+void ARMELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool IsVector) {
+ // Collect the registers in the register list
+ unsigned Count = 0;
+ uint32_t Mask = 0;
+ const MCRegisterInfo *MRI = getContext().getRegisterInfo();
for (size_t i = 0; i < RegList.size(); ++i) {
- unsigned Reg = MRI.getEncodingValue(RegList[i]);
- assert(Reg < Max && "Register encoded value out of range");
- RegMask |= 1u << Reg;
+ unsigned Reg = MRI->getEncodingValue(RegList[i]);
+ assert(Reg < (IsVector ? 32U : 16U) && "Register out of range");
+ unsigned Bit = (1u << Reg);
+ if ((Mask & Bit) == 0) {
+ Mask |= Bit;
+ ++Count;
+ }
}
+
+ // Track the change the $sp offset: For the .save directive, the
+ // corresponding push instruction will decrease the $sp by (4 * Count).
+ // For the .vsave directive, the corresponding vpush instruction will
+ // decrease $sp by (8 * Count).
+ SPOffset -= Count * (IsVector ? 8 : 4);
+
+ // Emit the opcode
+ FlushPendingOffset();
+ if (IsVector)
+ UnwindOpAsm.EmitVFPRegSave(Mask);
+ else
+ UnwindOpAsm.EmitRegSave(Mask);
}
namespace llvm {
+
+MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory,
+ MCInstPrinter *InstPrint, MCCodeEmitter *CE,
+ MCAsmBackend *TAB, bool ShowInst) {
+ ARMTargetAsmStreamer *S = new ARMTargetAsmStreamer(OS, *InstPrint);
+
+ return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI,
+ useDwarfDirectory, InstPrint, CE, TAB,
+ ShowInst);
+}
+
MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll, bool NoExecStack,
bool IsThumb) {
- ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
+ ARMTargetELFStreamer *TS = new ARMTargetELFStreamer();
+ ARMELFStreamer *S =
+ new ARMELFStreamer(Context, TS, TAB, OS, Emitter, IsThumb);
+ // FIXME: This should eventually end up somewhere else where more
+ // intelligent flag decisions can be made. For now we are just maintaining
+ // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
+ S->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
+
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
if (NoExecStack)
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
deleted file mode 100644
index 77ae5d23628e..000000000000
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- ARMELFStreamer.h - ELF Streamer for ARM ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ELF streamer information for the ARM backend.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM_ELF_STREAMER_H
-#define ARM_ELF_STREAMER_H
-
-#include "llvm/MC/MCELFStreamer.h"
-
-namespace llvm {
-
- MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack,
- bool IsThumb);
-}
-
-#endif // ARM_ELF_STREAMER_H
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index c1aab9c72cca..ad796e660e96 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -49,8 +49,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
Code16Directive = ".code\t16";
Code32Directive = ".code\t32";
- WeakRefDirective = "\t.weak\t";
-
HasLEB128 = true;
SupportsDebugInformation = true;
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index f0b289c6f3b6..e1f716d936ad 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -15,6 +15,7 @@
#define LLVM_ARMTARGETASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
@@ -24,7 +25,7 @@ namespace llvm {
explicit ARMMCAsmInfoDarwin();
};
- class ARMELFMCAsmInfo : public MCAsmInfo {
+ class ARMELFMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit ARMELFMCAsmInfo();
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 7a59a7dd5055..4382d0d97144 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -58,8 +58,7 @@ public:
}
bool isTargetDarwin() const {
Triple TT(STI.getTargetTriple());
- Triple::OSType OS = TT.getOS();
- return OS == Triple::Darwin || OS == Triple::MacOSX || OS == Triple::IOS;
+ return TT.isOSDarwin();
}
unsigned getMachineSoImmOpValue(unsigned SoImm) const;
@@ -315,6 +314,8 @@ public:
unsigned EncodedValue) const;
unsigned NEONThumb2DupPostEncoder(const MCInst &MI,
unsigned EncodedValue) const;
+ unsigned NEONThumb2V8PostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
unsigned VFPThumb2PostEncoder(const MCInst &MI,
unsigned EncodedValue) const;
@@ -389,6 +390,17 @@ unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI,
return EncodedValue;
}
+/// Post-process encoded NEON v8 instructions, and rewrite them to Thumb2 form
+/// if we are in Thumb2.
+unsigned ARMMCCodeEmitter::NEONThumb2V8PostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (isThumb2()) {
+ EncodedValue |= 0xC000000; // Set bits 27-26
+ }
+
+ return EncodedValue;
+}
+
/// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite
/// them to their Thumb2 form if we are currently in Thumb2 mode.
unsigned ARMMCCodeEmitter::
@@ -407,7 +419,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const {
if (MO.isReg()) {
unsigned Reg = MO.getReg();
- unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg);
+ unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg);
// Q registers are encoded as 2x their register number.
switch (Reg) {
@@ -436,7 +448,7 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ Reg = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
int32_t SImm = MO1.getImm();
bool isAdd = true;
@@ -625,8 +637,14 @@ getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- unsigned Val =
- ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups);
+ unsigned Val = 0;
+ const MCOperand MO = MI.getOperand(OpIdx);
+
+ if(MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups);
+ else
+ Val = MO.getImm() >> 1;
+
bool I = (Val & 0x800000);
bool J1 = (Val & 0x400000);
bool J2 = (Val & 0x200000);
@@ -652,7 +670,7 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
if (MO.isExpr())
return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
Fixups);
- int32_t offset = MO.getImm();
+ int64_t offset = MO.getImm();
uint32_t Val = 0x2000;
int SoImmVal;
@@ -724,8 +742,8 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
// {2-0} = Rn
const MCOperand &MO1 = MI.getOperand(OpIdx);
const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
- unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
- unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO2.getReg());
+ unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg());
+ unsigned Rm = CTX.getRegisterInfo()->getEncodingValue(MO2.getReg());
return (Rm << 3) | Rn;
}
@@ -741,12 +759,12 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC.
Imm12 = 0;
- isAdd = false ; // 'U' bit is set as part of the fixup.
if (MO.isExpr()) {
const MCExpr *Expr = MO.getExpr();
+ isAdd = false ; // 'U' bit is set as part of the fixup.
MCFixupKind Kind;
if (isThumb2())
@@ -759,8 +777,10 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
} else {
Reg = ARM::PC;
int32_t Offset = MO.getImm();
- // FIXME: Handle #-0.
- if (Offset < 0) {
+ if (Offset == INT32_MIN) {
+ Offset = 0;
+ isAdd = false;
+ } else if (Offset < 0) {
Offset *= -1;
isAdd = false;
}
@@ -821,7 +841,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC.
Imm8 = 0;
isAdd = false ; // 'U' bit is set as part of the fixup.
@@ -857,7 +877,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
// {7-0} = imm8
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- unsigned Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Reg = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
unsigned Imm8 = MO1.getImm();
return (Reg << 8) | Imm8;
}
@@ -940,8 +960,8 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx+1);
const MCOperand &MO2 = MI.getOperand(OpIdx+2);
- unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
- unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
+ unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+ unsigned Rm = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg());
unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
@@ -975,7 +995,7 @@ getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
// {12} isAdd
// {11-0} imm12/Rm
const MCOperand &MO = MI.getOperand(OpIdx);
- unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups);
Binary |= Rn << 14;
return Binary;
@@ -998,7 +1018,7 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
Binary <<= 7; // Shift amount is bits [11:7]
Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
- Binary |= CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Rm is bits [3:0]
+ Binary |= CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); // Rm is bits [3:0]
}
return Binary | (isAdd << 12) | (isReg << 13);
}
@@ -1011,7 +1031,7 @@ getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx+1);
bool isAdd = MO1.getImm() != 0;
- return CTX.getRegisterInfo().getEncodingValue(MO.getReg()) | (isAdd << 4);
+ return CTX.getRegisterInfo()->getEncodingValue(MO.getReg()) | (isAdd << 4);
}
uint32_t ARMMCCodeEmitter::
@@ -1029,7 +1049,7 @@ getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
// if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
if (!isImm)
- Imm8 = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ Imm8 = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
return Imm8 | (isAdd << 8) | (isImm << 9);
}
@@ -1047,7 +1067,7 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
if (!MO.isReg()) {
- unsigned Rn = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
+ unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC.
assert(MO.isExpr() && "Unexpected machine operand type!");
const MCExpr *Expr = MO.getExpr();
@@ -1057,14 +1077,14 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
++MCNumCPRelocations;
return (Rn << 9) | (1 << 13);
}
- unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
unsigned Imm = MO2.getImm();
bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
bool isImm = MO1.getReg() == 0;
uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
// if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
if (!isImm)
- Imm8 = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
+ Imm8 = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg());
return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
}
@@ -1092,7 +1112,7 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
// {2-0} = Rn
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
unsigned Imm5 = MO1.getImm();
return ((Imm5 & 0x1f) << 3) | Rn;
}
@@ -1119,7 +1139,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC.
Imm8 = 0;
isAdd = false; // 'U' bit is handled as part of the fixup.
@@ -1165,7 +1185,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
// Encode Rm.
- unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1190,7 +1210,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
// Encode the shift operation Rs.
// Encode Rs bit[11:8].
assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
- return Binary | (CTX.getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
+ return Binary | (CTX.getRegisterInfo()->getEncodingValue(Rs) << ARMII::RegRsShift);
}
unsigned ARMMCCodeEmitter::
@@ -1209,7 +1229,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
// Encode Rm.
- unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1248,9 +1268,9 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
// Encoded as [Rn, Rm, imm].
// FIXME: Needs fixup support.
- unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
+ unsigned Value = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg());
Value <<= 4;
- Value |= CTX.getRegisterInfo().getEncodingValue(MO2.getReg());
+ Value |= CTX.getRegisterInfo()->getEncodingValue(MO2.getReg());
Value <<= 2;
Value |= MO3.getImm();
@@ -1264,7 +1284,7 @@ getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
const MCOperand &MO2 = MI.getOperand(OpNum+1);
// FIXME: Needs fixup support.
- unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
+ unsigned Value = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg());
// Even though the immediate is 8 bits long, we need 9 bits in order
// to represent the (inverse of the) sign bit.
@@ -1326,7 +1346,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
// Encode Rm.
- unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1359,8 +1379,8 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
// msb of the mask.
const MCOperand &MO = MI.getOperand(Op);
uint32_t v = ~MO.getImm();
- uint32_t lsb = CountTrailingZeros_32(v);
- uint32_t msb = (32 - CountLeadingZeros_32 (v)) - 1;
+ uint32_t lsb = countTrailingZeros(v);
+ uint32_t msb = (32 - countLeadingZeros (v)) - 1;
assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
return lsb | (msb << 5);
}
@@ -1382,7 +1402,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
if (SPRRegs || DPRRegs) {
// VLDM/VSTM
- unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg);
+ unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg);
unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
Binary |= (RegNo & 0x1f) << 8;
if (SPRRegs)
@@ -1391,7 +1411,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
Binary |= NumRegs * 2;
} else {
for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
- unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(MI.getOperand(I).getReg());
+ unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(MI.getOperand(I).getReg());
Binary |= 1 << RegNo;
}
}
@@ -1407,7 +1427,7 @@ getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1430,7 +1450,7 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1456,7 +1476,7 @@ getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1475,7 +1495,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(Op);
if (MO.getReg() == 0) return 0x0D;
- return CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ return CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
}
unsigned ARMMCCodeEmitter::
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index f09fb5a94fd8..a99de0e78230 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -12,30 +12,73 @@
//===----------------------------------------------------------------------===//
#include "ARMBaseInfo.h"
-#include "ARMELFStreamer.h"
#include "ARMMCAsmInfo.h"
#include "ARMMCTargetDesc.h"
#include "InstPrinter/ARMInstPrinter.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_REGINFO_MC_DESC
#include "ARMGenRegisterInfo.inc"
+static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+ std::string &Info) {
+ if (STI.getFeatureBits() & llvm::ARM::HasV7Ops &&
+ (MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) &&
+ (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) &&
+ // Checks for the deprecated CP15ISB encoding:
+ // mcr p15, #0, rX, c7, c5, #4
+ (MI.getOperand(3).isImm() && MI.getOperand(3).getImm() == 7)) {
+ if ((MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 4)) {
+ if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 5) {
+ Info = "deprecated since v7, use 'isb'";
+ return true;
+ }
+
+ // Checks for the deprecated CP15DSB encoding:
+ // mcr p15, #0, rX, c7, c10, #4
+ if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10) {
+ Info = "deprecated since v7, use 'dsb'";
+ return true;
+ }
+ }
+ // Checks for the deprecated CP15DMB encoding:
+ // mcr p15, #0, rX, c7, c10, #5
+ if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10 &&
+ (MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 5)) {
+ Info = "deprecated since v7, use 'dmb'";
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+ std::string &Info) {
+ if (STI.getFeatureBits() & llvm::ARM::HasV8Ops &&
+ MI.getOperand(1).isImm() && MI.getOperand(1).getImm() != 8) {
+ Info = "applying IT instruction to more than one subsequent instruction is deprecated";
+ return true;
+ }
+
+ return false;
+}
+
#define GET_INSTRINFO_MC_DESC
#include "ARMGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
#include "ARMGenSubtargetInfo.inc"
-using namespace llvm;
std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
Triple triple(TT);
@@ -59,8 +102,17 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
std::string ARMArchFeature;
if (Idx) {
unsigned SubVer = TT[Idx];
- if (SubVer >= '7' && SubVer <= '9') {
+ if (SubVer == '8') {
+ if (NoCPU)
+ // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, FeatureMP,
+ // FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, FeatureT2XtPk, FeatureCrypto, FeatureCRC
+ ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,+trustzone,+t2xtpk,+crypto,+crc";
+ else
+ // Use CPU to figure out the exact features
+ ARMArchFeature = "+v8";
+ } else if (SubVer == '7') {
if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+ isThumb = true;
if (NoCPU)
// v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
@@ -99,9 +151,10 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
ARMArchFeature = "+v6t2";
else if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+ isThumb = true;
if (NoCPU)
// v6m: FeatureNoARM, FeatureMClass
- ARMArchFeature = "+v6,+noarm,+mclass";
+ ARMArchFeature = "+v6m,+noarm,+mclass";
else
ARMArchFeature = "+v6";
} else
@@ -159,7 +212,7 @@ static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
return X;
}
-static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
+static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin())
@@ -212,6 +265,15 @@ static MCInstPrinter *createARMMCInstPrinter(const Target &T,
return 0;
}
+static MCRelocationInfo *createARMMCRelocationInfo(StringRef TT,
+ MCContext &Ctx) {
+ Triple TheTriple(TT);
+ if (TheTriple.isEnvironmentMachO())
+ return createARMMachORelocationInfo(Ctx);
+ // Default to the stock relocation info.
+ return llvm::createMCRelocationInfo(TT, Ctx);
+}
+
namespace {
class ARMMCInstrAnalysis : public MCInstrAnalysis {
@@ -232,15 +294,16 @@ public:
return MCInstrAnalysis::isConditionalBranch(Inst);
}
- uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
- uint64_t Size) const {
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const {
// We only handle PCRel branches for now.
if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
- return -1ULL;
+ return false;
int64_t Imm = Inst.getOperand(0).getImm();
// FIXME: This is not right for thumb.
- return Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
+ Target = Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
+ return true;
}
};
@@ -292,7 +355,17 @@ extern "C" void LLVMInitializeARMTargetMC() {
TargetRegistry::RegisterMCObjectStreamer(TheARMTarget, createMCStreamer);
TargetRegistry::RegisterMCObjectStreamer(TheThumbTarget, createMCStreamer);
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmStreamer(TheARMTarget, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(TheThumbTarget, createMCAsmStreamer);
+
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter);
+
+ // Register the MC relocation info.
+ TargetRegistry::RegisterMCRelocationInfo(TheARMTarget,
+ createARMMCRelocationInfo);
+ TargetRegistry::RegisterMCRelocationInfo(TheThumbTarget,
+ createARMMCRelocationInfo);
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index a89981e4f060..959be8b55c3a 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -18,13 +18,17 @@
#include <string>
namespace llvm {
+class formatted_raw_ostream;
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
+class MCInstPrinter;
class MCObjectWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
+class MCStreamer;
+class MCRelocationInfo;
class StringRef;
class Target;
class raw_ostream;
@@ -41,12 +45,19 @@ namespace ARM_MC {
StringRef FS);
}
+MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory,
+ MCInstPrinter *InstPrint, MCCodeEmitter *CE,
+ MCAsmBackend *TAB, bool ShowInst);
+
MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU);
+MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
@@ -58,6 +69,9 @@ MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
uint32_t CPUType,
uint32_t CPUSubtype);
+
+/// createARMMachORelocationInfo - Construct ARM Mach-O relocation info.
+MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
} // End llvm namespace
// Defines symbolic names for ARM registers. This defines a mapping from
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
new file mode 100644
index 000000000000..807c9483bc38
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
@@ -0,0 +1,43 @@
+//===-- ARMMachORelocationInfo.cpp ----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "ARMMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRelocationInfo.h"
+#include "llvm-c/Disassembler.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+class ARMMachORelocationInfo : public MCRelocationInfo {
+public:
+ ARMMachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {}
+
+ const MCExpr *createExprForCAPIVariantKind(const MCExpr *SubExpr,
+ unsigned VariantKind) {
+ switch(VariantKind) {
+ case LLVMDisassembler_VariantKind_ARM_HI16:
+ return ARMMCExpr::CreateUpper16(SubExpr, Ctx);
+ case LLVMDisassembler_VariantKind_ARM_LO16:
+ return ARMMCExpr::CreateLower16(SubExpr, Ctx);
+ default:
+ return MCRelocationInfo::createExprForCAPIVariantKind(SubExpr,
+ VariantKind);
+ }
+ }
+};
+} // End unnamed namespace
+
+/// createARMMachORelocationInfo - Construct an ARM Mach-O RelocationInfo.
+MCRelocationInfo *llvm::createARMMachORelocationInfo(MCContext &Ctx) {
+ return new ARMMachORelocationInfo(Ctx);
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index b9efe74b41e5..1f681bac2242 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -20,10 +20,9 @@
#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
using namespace llvm;
-using namespace llvm::object;
namespace {
class ARMMachObjectWriter : public MCMachObjectTargetWriter {
@@ -63,7 +62,7 @@ public:
static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
unsigned &Log2Size) {
- RelocType = unsigned(macho::RIT_Vanilla);
+ RelocType = unsigned(MachO::ARM_RELOC_VANILLA);
Log2Size = ~0U;
switch (Kind) {
@@ -92,21 +91,21 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
case ARM::fixup_arm_uncondbl:
case ARM::fixup_arm_condbl:
case ARM::fixup_arm_blx:
- RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
+ RelocType = unsigned(MachO::ARM_RELOC_BR24);
// Report as 'long', even though that is not quite accurate.
Log2Size = llvm::Log2_32(4);
return true;
// Handle Thumb branches.
case ARM::fixup_arm_thumb_br:
- RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+ RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22);
Log2Size = llvm::Log2_32(2);
return true;
case ARM::fixup_t2_uncondbranch:
case ARM::fixup_arm_thumb_bl:
case ARM::fixup_arm_thumb_blx:
- RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+ RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22);
Log2Size = llvm::Log2_32(4);
return true;
@@ -121,23 +120,23 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
// 1 - thumb instructions
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_Half);
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
Log2Size = 1;
return true;
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_Half);
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
Log2Size = 3;
return true;
case ARM::fixup_arm_movw_lo16:
case ARM::fixup_arm_movw_lo16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_Half);
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
Log2Size = 0;
return true;
case ARM::fixup_t2_movw_lo16:
case ARM::fixup_t2_movw_lo16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_Half);
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
Log2Size = 2;
return true;
}
@@ -153,7 +152,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
- unsigned Type = macho::RIT_ARM_Half;
+ unsigned Type = MachO::ARM_RELOC_HALF;
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
@@ -179,7 +178,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
"' can not be undefined in a subtraction expression");
// Select the appropriate difference relocation type.
- Type = macho::RIT_ARM_HalfDifference;
+ Type = MachO::ARM_RELOC_HALF_SECTDIFF;
Value2 = Writer->getSymbolAddress(B_SD, Layout);
FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
}
@@ -223,29 +222,29 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
break;
}
- if (Type == macho::RIT_ARM_HalfDifference) {
+ if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) {
uint32_t OtherHalf = MovtBit
? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
- macho::RelocationEntry MRE;
- MRE.Word0 = ((OtherHalf << 0) |
- (macho::RIT_Pair << 24) |
- (MovtBit << 28) |
- (ThumbBit << 29) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value2;
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((OtherHalf << 0) |
+ (MachO::ARM_RELOC_PAIR << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
Writer->addRelocation(Fragment->getParent(), MRE);
}
- macho::RelocationEntry MRE;
- MRE.Word0 = ((FixupOffset << 0) |
- (Type << 24) |
- (MovtBit << 28) |
- (ThumbBit << 29) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value;
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value;
Writer->addRelocation(Fragment->getParent(), MRE);
}
@@ -259,7 +258,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
- unsigned Type = macho::RIT_Vanilla;
+ unsigned Type = MachO::ARM_RELOC_VANILLA;
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
@@ -284,31 +283,31 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
"' can not be undefined in a subtraction expression");
// Select the appropriate difference relocation type.
- Type = macho::RIT_Difference;
+ Type = MachO::ARM_RELOC_SECTDIFF;
Value2 = Writer->getSymbolAddress(B_SD, Layout);
FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
}
// Relocations are written out in reverse order, so the PAIR comes first.
- if (Type == macho::RIT_Difference ||
- Type == macho::RIT_Generic_LocalDifference) {
- macho::RelocationEntry MRE;
- MRE.Word0 = ((0 << 0) |
- (macho::RIT_Pair << 24) |
- (Log2Size << 28) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value2;
+ if (Type == MachO::ARM_RELOC_SECTDIFF ||
+ Type == MachO::ARM_RELOC_LOCAL_SECTDIFF) {
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((0 << 0) |
+ (MachO::ARM_RELOC_PAIR << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
Writer->addRelocation(Fragment->getParent(), MRE);
}
- macho::RelocationEntry MRE;
- MRE.Word0 = ((FixupOffset << 0) |
- (Type << 24) |
- (Log2Size << 28) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value;
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value;
Writer->addRelocation(Fragment->getParent(), MRE);
}
@@ -326,13 +325,13 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
switch (RelocType) {
default:
return false;
- case macho::RIT_ARM_Branch24Bit:
+ case MachO::ARM_RELOC_BR24:
// PC pre-adjustment of 8 for these instructions.
Value -= 8;
// ARM BL/BLX has a 25-bit offset.
Range = 0x1ffffff;
break;
- case macho::RIT_ARM_ThumbBranch22Bit:
+ case MachO::ARM_THUMB_RELOC_BR22:
// PC pre-adjustment of 4 for these instructions.
Value -= 4;
// Thumb BL/BLX has a 24-bit offset.
@@ -361,7 +360,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
uint64_t &FixedValue) {
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Log2Size;
- unsigned RelocType = macho::RIT_Vanilla;
+ unsigned RelocType = MachO::ARM_RELOC_VANILLA;
if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size))
// If we failed to get fixup kind info, it's because there's no legal
// relocation type for the fixup kind. This happens when it's a fixup that's
@@ -374,7 +373,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// scattered relocation entry. Differences always require scattered
// relocations.
if (Target.getSymB()) {
- if (RelocType == macho::RIT_ARM_Half)
+ if (RelocType == MachO::ARM_RELOC_HALF)
return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment,
Fixup, Target, FixedValue);
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
@@ -392,7 +391,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
//
// Is this right for ARM?
uint32_t Offset = Target.getConstant();
- if (IsPCRel && RelocType == macho::RIT_Vanilla)
+ if (IsPCRel && RelocType == MachO::ARM_RELOC_VANILLA)
Offset += 1 << Log2Size;
if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
@@ -445,17 +444,17 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
}
// struct relocation_info (8 bytes)
- macho::RelocationEntry MRE;
- MRE.Word0 = FixupOffset;
- MRE.Word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (IsExtern << 27) |
- (Type << 28));
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = FixupOffset;
+ MRE.r_word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
// Even when it's not a scattered relocation, movw/movt always uses
// a PAIR relocation.
- if (Type == macho::RIT_ARM_Half) {
+ if (Type == MachO::ARM_RELOC_HALF) {
// The other-half value only gets populated for the movt and movw
// relocation entries.
uint32_t Value = 0;
@@ -474,11 +473,11 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
Value = FixedValue & 0xffff;
break;
}
- macho::RelocationEntry MREPair;
- MREPair.Word0 = Value;
- MREPair.Word1 = ((0xffffff) |
- (Log2Size << 25) |
- (macho::RIT_Pair << 28));
+ MachO::any_relocation_info MREPair;
+ MREPair.r_word0 = Value;
+ MREPair.r_word1 = ((0xffffff << 0) |
+ (Log2Size << 25) |
+ (MachO::ARM_RELOC_PAIR << 28));
Writer->addRelocation(Fragment->getParent(), MREPair);
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 191db69fbc2b..c94337081884 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -20,6 +20,48 @@
using namespace llvm;
+namespace {
+ /// UnwindOpcodeStreamer - The simple wrapper over SmallVector to emit bytes
+ /// with MSB to LSB per uint32_t ordering. For example, the first byte will
+ /// be placed in Vec[3], and the following bytes will be placed in 2, 1, 0,
+ /// 7, 6, 5, 4, 11, 10, 9, 8, and so on.
+ class UnwindOpcodeStreamer {
+ private:
+ SmallVectorImpl<uint8_t> &Vec;
+ size_t Pos;
+
+ public:
+ UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V), Pos(3) {
+ }
+
+ /// Emit the byte in MSB to LSB per uint32_t order.
+ inline void EmitByte(uint8_t elem) {
+ Vec[Pos] = elem;
+ Pos = (((Pos ^ 0x3u) + 1) ^ 0x3u);
+ }
+
+ /// Emit the size prefix.
+ inline void EmitSize(size_t Size) {
+ size_t SizeInWords = (Size + 3) / 4;
+ assert(SizeInWords <= 0x100u &&
+ "Only 256 additional words are allowed for unwind opcodes");
+ EmitByte(static_cast<uint8_t>(SizeInWords - 1));
+ }
+
+ /// Emit the personality index prefix.
+ inline void EmitPersonalityIndex(unsigned PI) {
+ assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix");
+ EmitByte(EHT_COMPACT | PI);
+ }
+
+ /// Fill the rest of bytes with FINISH opcode.
+ inline void FillFinishOpcode() {
+ while (Pos < Vec.size())
+ EmitByte(UNWIND_OPCODE_FINISH);
+ }
+ };
+}
+
void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
if (RegSave == 0u)
return;
@@ -43,28 +85,22 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask);
if (UnmaskedReg == 0u) {
// Pop r[4 : (4 + n)]
- Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range);
+ EmitInt8(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range);
RegSave &= 0x000fu;
} else if (UnmaskedReg == (1u << 14)) {
// Pop r[14] + r[4 : (4 + n)]
- Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range);
+ EmitInt8(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range);
RegSave &= 0x000fu;
}
}
// Two bytes opcode to save register r15-r4
- if ((RegSave & 0xfff0u) != 0) {
- uint32_t Op = UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4);
- Ops.push_back(static_cast<uint8_t>(Op >> 8));
- Ops.push_back(static_cast<uint8_t>(Op & 0xff));
- }
+ if ((RegSave & 0xfff0u) != 0)
+ EmitInt16(UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4));
// Opcode to save register r3-r0
- if ((RegSave & 0x000fu) != 0) {
- uint32_t Op = UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu);
- Ops.push_back(static_cast<uint8_t>(Op >> 8));
- Ops.push_back(static_cast<uint8_t>(Op & 0xff));
- }
+ if ((RegSave & 0x000fu) != 0)
+ EmitInt16(UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu));
}
/// Emit unwind opcodes for .vsave directives
@@ -89,10 +125,8 @@ void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
Bit >>= 1;
}
- uint32_t Op =
- UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | ((i - 16) << 4) | Range;
- Ops.push_back(static_cast<uint8_t>(Op >> 8));
- Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ EmitInt16(UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 |
+ ((i - 16) << 4) | Range);
}
while (i > 0) {
@@ -113,86 +147,75 @@ void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
Bit >>= 1;
}
- uint32_t Op = UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range;
- Ops.push_back(static_cast<uint8_t>(Op >> 8));
- Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ EmitInt16(UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range);
}
}
-/// Emit unwind opcodes for .setfp directives
-void UnwindOpcodeAssembler::EmitSetFP(uint16_t FPReg) {
- Ops.push_back(UNWIND_OPCODE_SET_VSP | FPReg);
+/// Emit unwind opcodes to copy address from source register to $sp.
+void UnwindOpcodeAssembler::EmitSetSP(uint16_t Reg) {
+ EmitInt8(UNWIND_OPCODE_SET_VSP | Reg);
}
-/// Emit unwind opcodes to update stack pointer
+/// Emit unwind opcodes to add $sp with an offset.
void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) {
if (Offset > 0x200) {
- uint8_t Buff[10];
- size_t Size = encodeULEB128((Offset - 0x204) >> 2, Buff);
- Ops.push_back(UNWIND_OPCODE_INC_VSP_ULEB128);
- Ops.append(Buff, Buff + Size);
+ uint8_t Buff[16];
+ Buff[0] = UNWIND_OPCODE_INC_VSP_ULEB128;
+ size_t ULEBSize = encodeULEB128((Offset - 0x204) >> 2, Buff + 1);
+ EmitBytes(Buff, ULEBSize + 1);
} else if (Offset > 0) {
if (Offset > 0x100) {
- Ops.push_back(UNWIND_OPCODE_INC_VSP | 0x3fu);
+ EmitInt8(UNWIND_OPCODE_INC_VSP | 0x3fu);
Offset -= 0x100;
}
- Ops.push_back(UNWIND_OPCODE_INC_VSP |
- static_cast<uint8_t>((Offset - 4) >> 2));
+ EmitInt8(UNWIND_OPCODE_INC_VSP | static_cast<uint8_t>((Offset - 4) >> 2));
} else if (Offset < 0) {
while (Offset < -0x100) {
- Ops.push_back(UNWIND_OPCODE_DEC_VSP | 0x3fu);
+ EmitInt8(UNWIND_OPCODE_DEC_VSP | 0x3fu);
Offset += 0x100;
}
- Ops.push_back(UNWIND_OPCODE_DEC_VSP |
- static_cast<uint8_t>(((-Offset) - 4) >> 2));
+ EmitInt8(UNWIND_OPCODE_DEC_VSP |
+ static_cast<uint8_t>(((-Offset) - 4) >> 2));
}
}
-void UnwindOpcodeAssembler::AddOpcodeSizePrefix(size_t Pos) {
- size_t SizeInWords = (size() + 3) / 4;
- assert(SizeInWords <= 0x100u &&
- "Only 256 additional words are allowed for unwind opcodes");
- Ops[Pos] = static_cast<uint8_t>(SizeInWords - 1);
-}
+void UnwindOpcodeAssembler::Finalize(unsigned &PersonalityIndex,
+ SmallVectorImpl<uint8_t> &Result) {
-void UnwindOpcodeAssembler::AddPersonalityIndexPrefix(size_t Pos, unsigned PI) {
- assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix");
- Ops[Pos] = EHT_COMPACT | PI;
-}
+ UnwindOpcodeStreamer OpStreamer(Result);
-void UnwindOpcodeAssembler::EmitFinishOpcodes() {
- for (size_t i = (0x4u - (size() & 0x3u)) & 0x3u; i > 0; --i)
- Ops.push_back(UNWIND_OPCODE_FINISH);
-}
-
-void UnwindOpcodeAssembler::Finalize() {
if (HasPersonality) {
- // Personality specified by .personality directive
- Offset = 1;
- AddOpcodeSizePrefix(1);
+ // User-specifed personality routine: [ SIZE , OP1 , OP2 , ... ]
+ PersonalityIndex = NUM_PERSONALITY_INDEX;
+ size_t TotalSize = Ops.size() + 1;
+ size_t RoundUpSize = (TotalSize + 3) / 4 * 4;
+ Result.resize(RoundUpSize);
+ OpStreamer.EmitSize(RoundUpSize);
} else {
- if (getOpcodeSize() <= 3) {
+ if (Ops.size() <= 3) {
// __aeabi_unwind_cpp_pr0: [ 0x80 , OP1 , OP2 , OP3 ]
- Offset = 1;
PersonalityIndex = AEABI_UNWIND_CPP_PR0;
- AddPersonalityIndexPrefix(Offset, PersonalityIndex);
+ Result.resize(4);
+ OpStreamer.EmitPersonalityIndex(PersonalityIndex);
} else {
// __aeabi_unwind_cpp_pr1: [ 0x81 , SIZE , OP1 , OP2 , ... ]
- Offset = 0;
PersonalityIndex = AEABI_UNWIND_CPP_PR1;
- AddPersonalityIndexPrefix(Offset, PersonalityIndex);
- AddOpcodeSizePrefix(1);
+ size_t TotalSize = Ops.size() + 2;
+ size_t RoundUpSize = (TotalSize + 3) / 4 * 4;
+ Result.resize(RoundUpSize);
+ OpStreamer.EmitPersonalityIndex(PersonalityIndex);
+ OpStreamer.EmitSize(RoundUpSize);
}
}
- // Emit the padding finish opcodes if the size() is not multiple of 4.
- EmitFinishOpcodes();
+ // Copy the unwind opcodes
+ for (size_t i = OpBegins.size() - 1; i > 0; --i)
+ for (size_t j = OpBegins[i - 1], end = OpBegins[i]; j < end; ++j)
+ OpStreamer.EmitByte(Ops[j]);
- // Swap the byte order
- uint8_t *Ptr = Ops.begin() + Offset;
- assert(size() % 4 == 0 && "Final unwind opcodes should align to 4");
- for (size_t i = 0, n = size(); i < n; i += 4) {
- std::swap(Ptr[i], Ptr[i + 3]);
- std::swap(Ptr[i + 1], Ptr[i + 2]);
- }
+ // Emit the padding finish opcodes if the size is not multiple of 4.
+ OpStreamer.FillFinishOpcode();
+
+ // Reset the assembler state
+ Reset();
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
index f6ecaeb8b29f..ac67c6efabb7 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -27,86 +27,61 @@ class MCSymbol;
class UnwindOpcodeAssembler {
private:
- llvm::SmallVector<uint8_t, 8> Ops;
-
- unsigned Offset;
- unsigned PersonalityIndex;
+ llvm::SmallVector<uint8_t, 32> Ops;
+ llvm::SmallVector<unsigned, 8> OpBegins;
bool HasPersonality;
- enum {
- // The number of bytes to be preserved for the size and personality index
- // prefix of unwind opcodes.
- NUM_PRESERVED_PREFIX_BUF = 2
- };
-
public:
UnwindOpcodeAssembler()
- : Ops(NUM_PRESERVED_PREFIX_BUF), Offset(NUM_PRESERVED_PREFIX_BUF),
- PersonalityIndex(NUM_PERSONALITY_INDEX), HasPersonality(0) {
+ : HasPersonality(0) {
+ OpBegins.push_back(0);
}
/// Reset the unwind opcode assembler.
void Reset() {
- Ops.resize(NUM_PRESERVED_PREFIX_BUF);
- Offset = NUM_PRESERVED_PREFIX_BUF;
- PersonalityIndex = NUM_PERSONALITY_INDEX;
+ Ops.clear();
+ OpBegins.clear();
+ OpBegins.push_back(0);
HasPersonality = 0;
}
- /// Get the size of the payload (including the size byte)
- size_t size() const {
- return Ops.size() - Offset;
- }
-
- /// Get the beginning of the payload
- const uint8_t *begin() const {
- return Ops.begin() + Offset;
- }
-
- /// Get the payload
- StringRef data() const {
- return StringRef(reinterpret_cast<const char *>(begin()), size());
- }
-
/// Set the personality index
void setPersonality(const MCSymbol *Per) {
HasPersonality = 1;
}
- /// Get the personality index
- unsigned getPersonalityIndex() const {
- return PersonalityIndex;
- }
-
/// Emit unwind opcodes for .save directives
void EmitRegSave(uint32_t RegSave);
/// Emit unwind opcodes for .vsave directives
void EmitVFPRegSave(uint32_t VFPRegSave);
- /// Emit unwind opcodes for .setfp directives
- void EmitSetFP(uint16_t FPReg);
+ /// Emit unwind opcodes to copy address from source register to $sp.
+ void EmitSetSP(uint16_t Reg);
- /// Emit unwind opcodes to update stack pointer
+ /// Emit unwind opcodes to add $sp with an offset.
void EmitSPOffset(int64_t Offset);
/// Finalize the unwind opcode sequence for EmitBytes()
- void Finalize();
+ void Finalize(unsigned &PersonalityIndex,
+ SmallVectorImpl<uint8_t> &Result);
private:
- /// Get the size of the opcodes in bytes.
- size_t getOpcodeSize() const {
- return Ops.size() - NUM_PRESERVED_PREFIX_BUF;
+ void EmitInt8(unsigned Opcode) {
+ Ops.push_back(Opcode & 0xff);
+ OpBegins.push_back(OpBegins.back() + 1);
}
- /// Add the length prefix to the payload
- void AddOpcodeSizePrefix(size_t Pos);
-
- /// Add personality index prefix in some compact format
- void AddPersonalityIndexPrefix(size_t Pos, unsigned PersonalityIndex);
+ void EmitInt16(unsigned Opcode) {
+ Ops.push_back((Opcode >> 8) & 0xff);
+ Ops.push_back(Opcode & 0xff);
+ OpBegins.push_back(OpBegins.back() + 2);
+ }
- /// Fill the words with finish opcode if it is not aligned
- void EmitFinishOpcodes();
+ void EmitBytes(const uint8_t *Opcode, size_t Size) {
+ Ops.insert(Ops.end(), Opcode, Opcode + Size);
+ OpBegins.push_back(OpBegins.back() + Size);
+ }
};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 1e2a8b03e1a0..cfb33f5b8212 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -88,7 +88,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -126,7 +127,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::LR:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- AFI->addGPRCalleeSavedArea1Frame(FI);
GPRCS1Size += 4;
break;
case ARM::R8:
@@ -135,16 +135,12 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R11:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- if (STI.isTargetIOS()) {
- AFI->addGPRCalleeSavedArea2Frame(FI);
+ if (STI.isTargetIOS())
GPRCS2Size += 4;
- } else {
- AFI->addGPRCalleeSavedArea1Frame(FI);
+ else
GPRCS1Size += 4;
- }
break;
default:
- AFI->addDPRCalleeSavedAreaFrame(FI);
DPRCSSize += 8;
}
}
@@ -168,10 +164,17 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
NumBytes = DPRCSOffset;
+ int FramePtrOffsetInBlock = 0;
+ if (tryFoldSPUpdateIntoPushPop(MF, prior(MBBI), NumBytes)) {
+ FramePtrOffsetInBlock = NumBytes;
+ NumBytes = 0;
+ }
+
// Adjust FP so it point to the stack slot that contains the previous FP.
if (HasFP) {
+ FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size;
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
- .addFrameIndex(FramePtrSpillFI).addImm(0)
+ .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4)
.setMIFlags(MachineInstr::FrameSetup));
if (NumBytes > 508)
// If offset is > 508 then sp cannot be adjusted in a single instruction,
@@ -212,13 +215,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setShouldRestoreSPFromFP(true);
}
-static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
- for (unsigned i = 0; CSRegs[i]; ++i)
- if (Reg == CSRegs[i])
- return true;
- return false;
-}
-
static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
if (MI->getOpcode() == ARM::tLDRspi &&
MI->getOperand(1).isFI() &&
@@ -249,7 +245,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
int NumBytes = (int)MFI->getStackSize();
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -294,8 +291,9 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
&MBB.front() != MBBI &&
prior(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = prior(MBBI);
- emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
- } else
+ if (!tryFoldSPUpdateIntoPushPop(MF, PMBBI, NumBytes))
+ emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+ } else if (!tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes))
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
}
}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 095736d52a88..22a925e0ffbc 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -22,7 +22,7 @@
using namespace llvm;
Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(*this, STI) {
+ : ARMBaseInstrInfo(STI), RI(STI) {
}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 7452fb776ebd..65a7221d5db7 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -40,9 +40,8 @@ extern cl::opt<bool> ReuseFrameIndexVals;
using namespace llvm;
-Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii,
- const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(tii, sti) {
+Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(sti) {
}
const TargetRegisterClass*
@@ -70,6 +69,7 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
@@ -426,7 +426,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
*this);
} else {
// Translate r0 = add sp, -imm to
- // r0 = -imm (this is then translated into a series of instructons)
+ // r0 = -imm (this is then translated into a series of instructions)
// r0 = add r0, sp
emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
@@ -488,6 +488,9 @@ void
Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
unsigned BaseReg, int64_t Offset) const {
MachineInstr &MI = *I;
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(
+ MI.getParent()->getParent()->getTarget().getInstrInfo());
int Off = Offset; // ARM doesn't need the general 64-bit offsets
unsigned i = 0;
@@ -513,6 +516,7 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
// off the frame pointer (if, for example, there are alloca() calls in
// the function, the offset will be negative. Use R12 instead since that's
// a call clobbered register that we know won't be used in Thumb1 mode.
+ const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo();
DebugLoc DL;
AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
.addReg(ARM::R12, RegState::Define)
@@ -558,6 +562,8 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
MachineInstrBuilder MIB(*MBB.getParent(), &MI);
@@ -567,11 +573,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
MF.getFrameInfo()->getStackSize() + SPAdj;
- if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea1Offset();
- else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea2Offset();
- else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ if (MF.getFrameInfo()->hasVarSizedObjects()) {
assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) &&
"Unexpected");
// There are alloca()'s in this function, must reference off the frame
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
index ebbab36dd7b8..9689b23146a0 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -25,7 +25,7 @@ namespace llvm {
struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
public:
- Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+ Thumb1RegisterInfo(const ARMSubtarget &STI);
const TargetRegisterClass*
getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 97c254ce75a5..0b7d3bb7754f 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -28,6 +28,7 @@ namespace {
static char ID;
Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+ bool restrictIT;
const Thumb2InstrInfo *TII;
const TargetRegisterInfo *TRI;
ARMFunctionInfo *AFI;
@@ -73,15 +74,15 @@ static void TrackDefUses(MachineInstr *MI,
for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
unsigned Reg = LocalUses[i];
- Uses.insert(Reg);
- for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg)
+ for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true);
+ Subreg.isValid(); ++Subreg)
Uses.insert(*Subreg);
}
for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
unsigned Reg = LocalDefs[i];
- Defs.insert(Reg);
- for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg)
+ for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true);
+ Subreg.isValid(); ++Subreg)
Defs.insert(*Subreg);
if (Reg == ARM::CPSR)
continue;
@@ -192,37 +193,42 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Form IT block.
ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
unsigned Mask = 0, Pos = 3;
- // Branches, including tricky ones like LDM_RET, need to end an IT
- // block so check the instruction we just put in the block.
- for (; MBBI != E && Pos &&
- (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
- if (MBBI->isDebugValue())
- continue;
-
- MachineInstr *NMI = &*MBBI;
- MI = NMI;
-
- unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
- if (NCC == CC || NCC == OCC) {
- Mask |= (NCC & 1) << Pos;
- // Add implicit use of ITSTATE.
- NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
- true/*isImp*/, false/*isKill*/));
- LastITMI = NMI;
- } else {
- if (NCC == ARMCC::AL &&
- MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
- --MBBI;
- MBB.remove(NMI);
- MBB.insert(InsertPos, NMI);
- ++NumMovedInsts;
+
+ // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it
+ // is set: skip the loop
+ if (!restrictIT) {
+ // Branches, including tricky ones like LDM_RET, need to end an IT
+ // block so check the instruction we just put in the block.
+ for (; MBBI != E && Pos &&
+ (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
+ if (MBBI->isDebugValue())
continue;
+
+ MachineInstr *NMI = &*MBBI;
+ MI = NMI;
+
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
+ if (NCC == CC || NCC == OCC) {
+ Mask |= (NCC & 1) << Pos;
+ // Add implicit use of ITSTATE.
+ NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+ LastITMI = NMI;
+ } else {
+ if (NCC == ARMCC::AL &&
+ MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+ --MBBI;
+ MBB.remove(NMI);
+ MBB.insert(InsertPos, NMI);
+ ++NumMovedInsts;
+ continue;
+ }
+ break;
}
- break;
+ TrackDefUses(NMI, Defs, Uses, TRI);
+ --Pos;
}
- TrackDefUses(NMI, Defs, Uses, TRI);
- --Pos;
}
// Finalize IT mask.
@@ -250,6 +256,7 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
TRI = TM.getRegisterInfo();
+ restrictIT = TM.getSubtarget<ARMSubtarget>().restrictIT();
if (!AFI->isThumbFunction())
return false;
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index a1b48c226aa8..91788ac4f893 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -31,12 +31,13 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
cl::init(false));
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(*this, STI) {
+ : ARMBaseInstrInfo(STI), RI(STI) {
}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
- NopInst.setOpcode(ARM::tNOP);
+ NopInst.setOpcode(ARM::tHINT);
+ NopInst.addOperand(MCOperand::CreateImm(0));
NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
NopInst.addOperand(MCOperand::CreateReg(0));
}
@@ -214,6 +215,13 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
unsigned DestReg, unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ if (NumBytes == 0 && DestReg != BaseReg) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+ return;
+ }
+
bool isSub = NumBytes < 0;
if (isSub) NumBytes = -NumBytes;
@@ -285,7 +293,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
NumBytes = 0;
} else {
// FIXME: Move this to ARMAddressingModes.h?
- unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ unsigned RotAmt = countLeadingZeros(ThisVal);
ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
NumBytes &= ~ThisVal;
assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
@@ -302,7 +310,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
NumBytes = 0;
} else {
// FIXME: Move this to ARMAddressingModes.h?
- unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ unsigned RotAmt = countLeadingZeros(ThisVal);
ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
NumBytes &= ~ThisVal;
assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
@@ -334,6 +342,7 @@ negativeOffsetOpcode(unsigned opcode)
case ARM::t2STRi12: return ARM::t2STRi8;
case ARM::t2STRBi12: return ARM::t2STRBi8;
case ARM::t2STRHi12: return ARM::t2STRHi8;
+ case ARM::t2PLDi12: return ARM::t2PLDi8;
case ARM::t2LDRi8:
case ARM::t2LDRHi8:
@@ -343,6 +352,7 @@ negativeOffsetOpcode(unsigned opcode)
case ARM::t2STRi8:
case ARM::t2STRBi8:
case ARM::t2STRHi8:
+ case ARM::t2PLDi8:
return opcode;
default:
@@ -364,6 +374,7 @@ positiveOffsetOpcode(unsigned opcode)
case ARM::t2STRi8: return ARM::t2STRi12;
case ARM::t2STRBi8: return ARM::t2STRBi12;
case ARM::t2STRHi8: return ARM::t2STRHi12;
+ case ARM::t2PLDi8: return ARM::t2PLDi12;
case ARM::t2LDRi12:
case ARM::t2LDRHi12:
@@ -373,6 +384,7 @@ positiveOffsetOpcode(unsigned opcode)
case ARM::t2STRi12:
case ARM::t2STRBi12:
case ARM::t2STRHi12:
+ case ARM::t2PLDi12:
return opcode;
default:
@@ -394,6 +406,7 @@ immediateOffsetOpcode(unsigned opcode)
case ARM::t2STRs: return ARM::t2STRi12;
case ARM::t2STRBs: return ARM::t2STRBi12;
case ARM::t2STRHs: return ARM::t2STRHi12;
+ case ARM::t2PLDs: return ARM::t2PLDi12;
case ARM::t2LDRi12:
case ARM::t2LDRHi12:
@@ -403,6 +416,7 @@ immediateOffsetOpcode(unsigned opcode)
case ARM::t2STRi12:
case ARM::t2STRBi12:
case ARM::t2STRHi12:
+ case ARM::t2PLDi12:
case ARM::t2LDRi8:
case ARM::t2LDRHi8:
case ARM::t2LDRBi8:
@@ -411,6 +425,7 @@ immediateOffsetOpcode(unsigned opcode)
case ARM::t2STRi8:
case ARM::t2STRBi8:
case ARM::t2STRHi8:
+ case ARM::t2PLDi8:
return opcode;
default:
@@ -484,7 +499,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Otherwise, extract 8 adjacent bits from the immediate into this
// t2ADDri/t2SUBri.
- unsigned RotAmt = CountLeadingZeros_32(Offset);
+ unsigned RotAmt = countLeadingZeros<unsigned>(Offset);
unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt);
// We will handle these bits from offset, clear them.
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 1a7a4d450cfe..4cb827f308ef 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -24,9 +24,8 @@
#include "llvm/IR/Function.h"
using namespace llvm;
-Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
- const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(tii, sti) {
+Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMSubtarget &sti)
+ : ARMBaseRegisterInfo(sti) {
}
/// emitLoadConstPool - Emits a load from constpool to materialize the
@@ -40,6 +39,7 @@ Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
index 6b397e869683..b1d63fa86d01 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -20,12 +20,12 @@
#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
- class ARMSubtarget;
- class ARMBaseInstrInfo;
+
+class ARMSubtarget;
struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
public:
- Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+ Thumb2RegisterInfo(const ARMSubtarget &STI);
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.