aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/ARM
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-01-02 21:25:48 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-01-02 21:25:48 +0000
commitd88c1a5a572cdb661c111098831fa526e933756f (patch)
tree97b32c3372106ac47ded3d1a99f9c023a8530073 /contrib/llvm/lib/Target/ARM
parent715652a404ee99f10c09c0a5edbb5883961b8c25 (diff)
parentb915e9e0fc85ba6f398b3fab0db6a81a8913af94 (diff)
Update llvm to trunk r290819 and resolve conflicts.
Notes
Notes: svn path=/projects/clang400-import/; revision=311142
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
-rw-r--r--contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.h5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td84
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp247
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h30
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp91
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp63
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h110
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp203
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallLowering.h42
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td1
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp72
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp295
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp33
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h31
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp11
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp51
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp333
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp362
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp990
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h21
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrFormats.td17
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td57
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td14
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td105
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td525
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp109
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h39
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp44
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h29
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp14
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp113
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp12
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h25
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp6
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp127
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h41
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSchedule.td1
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleR52.td983
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp71
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h44
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp105
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp45
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h17
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h17
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp1384
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp14
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp6
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp58
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp29
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp11
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp21
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h20
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp13
-rw-r--r--contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp34
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp323
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp10
-rw-r--r--contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp16
69 files changed, 5669 insertions, 1980 deletions
diff --git a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
index 9228cc2d7a9c..89859ba063d9 100644
--- a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -52,9 +52,7 @@ namespace {
bool runOnMachineFunction(MachineFunction &Fn) override;
- const char *getPassName() const override {
- return "ARM A15 S->D optimizer";
- }
+ StringRef getPassName() const override { return "ARM A15 S->D optimizer"; }
private:
const ARMBaseInstrInfo *TII;
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
index 690ff86a0c86..be3048252bbc 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -16,6 +16,7 @@
#define LLVM_LIB_TARGET_ARM_ARM_H
#include "llvm/Support/CodeGen.h"
+#include "ARMBasicBlockInfo.h"
#include <functional>
namespace llvm {
@@ -46,6 +47,10 @@ FunctionPass *createThumb2SizeReductionPass(
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
+void computeBlockSize(MachineFunction *MF, MachineBasicBlock *MBB,
+ BasicBlockInfo &BBI);
+std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF);
+
void initializeARMLoadStoreOptPass(PassRegistry &);
void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index ef626b66a1e7..2a090faeee6a 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -99,6 +99,8 @@ def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
// Not to be confused with FeatureHasRetAddrStack (return address stack)
def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
"Enable Reliability, Availability and Serviceability extensions">;
+def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
+ "Enable fast computation of positive address offsets">;
// Cyclone has preferred instructions for zeroing VFP registers, which can
@@ -295,7 +297,8 @@ def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
FeatureV7Clrex]>;
def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
- [HasV7Ops, FeatureAcquireRelease]>;
+ [HasV7Ops, FeatureAcquireRelease,
+ FeatureT2XtPk]>;
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
"Support ARM v8.1a instructions",
[HasV8Ops]>;
@@ -352,6 +355,8 @@ def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
"Cortex-R5 ARM processors", []>;
def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7",
"Cortex-R7 ARM processors", []>;
+def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52",
+ "Cortex-R52 ARM processors", []>;
def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3",
"Cortex-M3 ARM processors", []>;
@@ -388,7 +393,8 @@ def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>;
def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops]>;
def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops,
- FeatureDSP]>;
+ FeatureDSP,
+ FeatureT2XtPk]>;
def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>;
@@ -409,13 +415,15 @@ def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops,
FeatureNEON,
FeatureDB,
FeatureDSP,
- FeatureAClass]>;
+ FeatureAClass,
+ FeatureT2XtPk]>;
def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops,
FeatureDB,
FeatureDSP,
FeatureHWDiv,
- FeatureRClass]>;
+ FeatureRClass,
+ FeatureT2XtPk]>;
def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops,
FeatureThumb2,
@@ -470,6 +478,19 @@ def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps,
FeatureCRC,
FeatureRAS]>;
+def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
+ FeatureRClass,
+ FeatureDB,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureDSP,
+ FeatureCRC,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureFPARMv8,
+ FeatureNEON]>;
+
def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline",
[HasV8MBaselineOps,
FeatureNoARM,
@@ -570,7 +591,6 @@ def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5,
FeatureSlowFPBrcc,
FeatureHasSlowFPVMLx,
FeatureVMLxForwarding,
- FeatureT2XtPk,
FeatureMP,
FeatureVFP4]>;
@@ -581,7 +601,6 @@ def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7,
FeatureHasVMLxHazards,
FeatureHasSlowFPVMLx,
FeatureVMLxForwarding,
- FeatureT2XtPk,
FeatureMP,
FeatureVFP4,
FeatureHWDiv,
@@ -595,15 +614,13 @@ def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8,
FeatureSlowFPBrcc,
FeatureHasVMLxHazards,
FeatureHasSlowFPVMLx,
- FeatureVMLxForwarding,
- FeatureT2XtPk]>;
+ FeatureVMLxForwarding]>;
def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9,
FeatureHasRetAddrStack,
FeatureTrustZone,
FeatureHasVMLxHazards,
FeatureVMLxForwarding,
- FeatureT2XtPk,
FeatureFP16,
FeatureAvoidPartialCPSR,
FeatureExpandMLx,
@@ -618,7 +635,6 @@ def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12,
FeatureHasRetAddrStack,
FeatureTrustZone,
FeatureVMLxForwarding,
- FeatureT2XtPk,
FeatureVFP4,
FeatureHWDiv,
FeatureHWDivARM,
@@ -632,7 +648,6 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
FeatureHasRetAddrStack,
FeatureMuxedUnits,
FeatureTrustZone,
- FeatureT2XtPk,
FeatureVFP4,
FeatureMP,
FeatureCheckVLDnAlign,
@@ -647,7 +662,6 @@ def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17,
FeatureTrustZone,
FeatureMP,
FeatureVMLxForwarding,
- FeatureT2XtPk,
FeatureVFP4,
FeatureHWDiv,
FeatureHWDivARM,
@@ -662,7 +676,6 @@ def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
FeatureMuxedUnits,
FeatureCheckVLDnAlign,
FeatureVMLxForwarding,
- FeatureT2XtPk,
FeatureFP16,
FeatureAvoidPartialCPSR,
FeatureVFP4,
@@ -672,7 +685,6 @@ def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
FeatureHasRetAddrStack,
FeatureNEONForFP,
- FeatureT2XtPk,
FeatureVFP4,
FeatureMP,
FeatureHWDiv,
@@ -691,8 +703,7 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
// FIXME: R4 has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4,
FeatureHasRetAddrStack,
- FeatureAvoidPartialCPSR,
- FeatureT2XtPk]>;
+ FeatureAvoidPartialCPSR]>;
// FIXME: R4F has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
@@ -701,8 +712,7 @@ def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
FeatureHasSlowFPVMLx,
FeatureVFP3,
FeatureD16,
- FeatureAvoidPartialCPSR,
- FeatureT2XtPk]>;
+ FeatureAvoidPartialCPSR]>;
// FIXME: R5 has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
@@ -712,8 +722,7 @@ def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
FeatureSlowFPBrcc,
FeatureHWDivARM,
FeatureHasSlowFPVMLx,
- FeatureAvoidPartialCPSR,
- FeatureT2XtPk]>;
+ FeatureAvoidPartialCPSR]>;
// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
@@ -725,8 +734,7 @@ def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
FeatureSlowFPBrcc,
FeatureHWDivARM,
FeatureHasSlowFPVMLx,
- FeatureAvoidPartialCPSR,
- FeatureT2XtPk]>;
+ FeatureAvoidPartialCPSR]>;
def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
FeatureHasRetAddrStack,
@@ -737,8 +745,7 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
FeatureSlowFPBrcc,
FeatureHWDivARM,
FeatureHasSlowFPVMLx,
- FeatureAvoidPartialCPSR,
- FeatureT2XtPk]>;
+ FeatureAvoidPartialCPSR]>;
def : ProcNoItin<"cortex-m3", [ARMv7m, ProcM3]>;
def : ProcNoItin<"sc300", [ARMv7m, ProcM3]>;
@@ -755,42 +762,38 @@ def : ProcNoItin<"cortex-m7", [ARMv7em,
def : ProcNoItin<"cortex-a32", [ARMv8a,
FeatureHWDiv,
FeatureHWDivARM,
- FeatureT2XtPk,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35,
FeatureHWDiv,
FeatureHWDivARM,
- FeatureT2XtPk,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
FeatureHWDiv,
FeatureHWDivARM,
- FeatureT2XtPk,
FeatureCrypto,
- FeatureCRC]>;
+ FeatureCRC,
+ FeatureFPAO]>;
def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57,
FeatureHWDiv,
FeatureHWDivARM,
- FeatureT2XtPk,
FeatureCrypto,
- FeatureCRC]>;
+ FeatureCRC,
+ FeatureFPAO]>;
def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
FeatureHWDiv,
FeatureHWDivARM,
- FeatureT2XtPk,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73,
FeatureHWDiv,
FeatureHWDivARM,
- FeatureT2XtPk,
FeatureCrypto,
FeatureCRC]>;
@@ -798,7 +801,6 @@ def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73,
def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureHasRetAddrStack,
FeatureNEONForFP,
- FeatureT2XtPk,
FeatureVFP4,
FeatureMP,
FeatureHWDiv,
@@ -812,10 +814,24 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1,
FeatureHWDiv,
FeatureHWDivARM,
- FeatureT2XtPk,
FeatureCrypto,
FeatureCRC]>;
+def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52,
+ FeatureFPAO]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 04863a7ecf8f..f20768ab77a5 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -74,8 +74,9 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
if (AFI->isThumbFunction()) {
OutStreamer->EmitAssemblerFlag(MCAF_Code16);
OutStreamer->EmitThumbFunc(CurrentFnSym);
+ } else {
+ OutStreamer->EmitAssemblerFlag(MCAF_Code32);
}
-
OutStreamer->EmitLabel(CurrentFnSym);
}
@@ -96,6 +97,13 @@ void ARMAsmPrinter::EmitXXStructor(const DataLayout &DL, const Constant *CV) {
OutStreamer->EmitValue(E, Size);
}
+void ARMAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ if (PromotedGlobals.count(GV))
+ // The global was promoted into a constant pool. It should not be emitted.
+ return;
+ AsmPrinter::EmitGlobalVariable(GV);
+}
+
/// runOnMachineFunction - This uses the EmitInstruction()
/// method to print assembly for each instruction.
///
@@ -108,6 +116,12 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
const Function* F = MF.getFunction();
const TargetMachine& TM = MF.getTarget();
+ // Collect all globals that had their storage promoted to a constant pool.
+ // Functions are emitted before variables, so this accumulates promoted
+ // globals from all functions in PromotedGlobals.
+ for (auto *GV : AFI->getGlobalsPromotedToConstantPool())
+ PromotedGlobals.insert(GV);
+
// Calculate this function's optimization goal.
unsigned OptimizationGoal;
if (F->hasFnAttribute(Attribute::OptimizeNone))
@@ -150,6 +164,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit the rest of the function body.
EmitFunctionBody();
+ // Emit the XRay table for this function.
+ EmitXRayTable();
+
// If we need V4T thumb mode Register Indirect Jump pads, emit them.
// These are created per function, rather than per TU, since it's
// relatively easy to exceed the thumb branch range within a TU.
@@ -215,6 +232,8 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
break;
}
case MachineOperand::MO_ConstantPoolIndex:
+ if (Subtarget->genExecuteOnly())
+ llvm_unreachable("execute-only should not generate constant pools");
GetCPISymbol(MO.getIndex())->print(O, MAI);
break;
}
@@ -249,7 +268,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
<< "]";
return false;
}
- // Fallthrough
+ LLVM_FALLTHROUGH;
case 'c': // Don't print "#" before an immediate operand.
if (!MI->getOperand(OpNum).isImm())
return true;
@@ -542,11 +561,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
raw_string_ostream OS(Flags);
for (const auto &Function : M)
- TLOF.emitLinkerFlagsForGlobal(OS, &Function, *Mang);
+ TLOF.emitLinkerFlagsForGlobal(OS, &Function);
for (const auto &Global : M.globals())
- TLOF.emitLinkerFlagsForGlobal(OS, &Global, *Mang);
+ TLOF.emitLinkerFlagsForGlobal(OS, &Global);
for (const auto &Alias : M.aliases())
- TLOF.emitLinkerFlagsForGlobal(OS, &Alias, *Mang);
+ TLOF.emitLinkerFlagsForGlobal(OS, &Alias);
OS.flush();
@@ -588,9 +607,11 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
if (CPU == "xscale")
return ARMBuildAttrs::v5TEJ;
- if (Subtarget->hasV8Ops())
+ if (Subtarget->hasV8Ops()) {
+ if (Subtarget->isRClass())
+ return ARMBuildAttrs::v8_R;
return ARMBuildAttrs::v8_A;
- else if (Subtarget->hasV8MMainlineOps())
+ } else if (Subtarget->hasV8MMainlineOps())
return ARMBuildAttrs::v8_M_Main;
else if (Subtarget->hasV7Ops()) {
if (Subtarget->isMClass() && Subtarget->hasDSP())
@@ -614,6 +635,15 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
return ARMBuildAttrs::v4;
}
+// Returns true if all functions have the same function attribute value.
+// It also returns true when the module has no functions.
+static bool checkFunctionsAttributeConsistency(const Module &M, StringRef Attr,
+ StringRef Value) {
+ return !any_of(M, [&](const Function &F) {
+ return F.getFnAttribute(Attr).getValueAsString() != Value;
+ });
+}
+
void ARMAsmPrinter::emitAttributes() {
MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
@@ -725,31 +755,48 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitFPU(ARM::FK_VFPV2);
}
+ // RW data addressing.
if (isPositionIndependent()) {
- // PIC specific attributes.
ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data,
ARMBuildAttrs::AddressRWPCRel);
+ } else if (STI.isRWPI()) {
+ // RWPI specific attributes.
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data,
+ ARMBuildAttrs::AddressRWSBRel);
+ }
+
+ // RO data addressing.
+ if (isPositionIndependent() || STI.isROPI()) {
ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RO_data,
ARMBuildAttrs::AddressROPCRel);
+ }
+
+ // GOT use.
+ if (isPositionIndependent()) {
ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use,
ARMBuildAttrs::AddressGOT);
} else {
- // Allow direct addressing of imported data for all other relocation models.
ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use,
ARMBuildAttrs::AddressDirect);
}
- // Signal various FP modes.
- if (!TM.Options.UnsafeFPMath) {
+ // Set FP Denormals.
+ if (checkFunctionsAttributeConsistency(*MMI->getModule(),
+ "denormal-fp-math",
+ "preserve-sign") ||
+ TM.Options.FPDenormalMode == FPDenormal::PreserveSign)
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::PreserveFPSign);
+ else if (checkFunctionsAttributeConsistency(*MMI->getModule(),
+ "denormal-fp-math",
+ "positive-zero") ||
+ TM.Options.FPDenormalMode == FPDenormal::PositiveZero)
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::PositiveZero);
+ else if (!TM.Options.UnsafeFPMath)
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
ARMBuildAttrs::IEEEDenormals);
- ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed);
-
- // If the user has permitted this code to choose the IEEE 754
- // rounding at run-time, emit the rounding attribute.
- if (TM.Options.HonorSignDependentRoundingFPMathOption)
- ATS.emitAttribute(ARMBuildAttrs::ABI_FP_rounding, ARMBuildAttrs::Allowed);
- } else {
+ else {
if (!STI.hasVFP2()) {
// When the target doesn't have an FPU (by design or
// intention), the assumptions made on the software support
@@ -775,6 +822,21 @@ void ARMAsmPrinter::emitAttributes() {
// absence of its emission implies zero).
}
+ // Set FP exceptions and rounding
+ if (checkFunctionsAttributeConsistency(*MMI->getModule(),
+ "no-trapping-math", "true") ||
+ TM.Options.NoTrappingFPMath)
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
+ ARMBuildAttrs::Not_Allowed);
+ else if (!TM.Options.UnsafeFPMath) {
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed);
+
+ // If the user has permitted this code to choose the IEEE 754
+ // rounding at run-time, emit the rounding attribute.
+ if (TM.Options.HonorSignDependentRoundingFPMathOption)
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_rounding, ARMBuildAttrs::Allowed);
+ }
+
// TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath is the
// equivalent of GCC's -ffinite-math-only flag.
if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
@@ -858,14 +920,16 @@ void ARMAsmPrinter::emitAttributes() {
}
}
- // TODO: We currently only support either reserving the register, or treating
- // it as another callee-saved register, but not as SB or a TLS pointer; It
- // would instead be nicer to push this from the frontend as metadata, as we do
- // for the wchar and enum size tags
- if (STI.isR9Reserved())
- ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9Reserved);
+ // We currently do not support using R9 as the TLS pointer.
+ if (STI.isRWPI())
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
+ ARMBuildAttrs::R9IsSB);
+ else if (STI.isR9Reserved())
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
+ ARMBuildAttrs::R9Reserved);
else
- ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9IsGPR);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
+ ARMBuildAttrs::R9IsGPR);
if (STI.hasTrustZone() && STI.hasVirtualization())
ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
@@ -880,7 +944,7 @@ void ARMAsmPrinter::emitAttributes() {
//===----------------------------------------------------------------------===//
-static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
+static MCSymbol *getPICLabel(StringRef Prefix, unsigned FunctionNumber,
unsigned LabelId, MCContext &Ctx) {
MCSymbol *Label = Ctx.getOrCreateSymbol(Twine(Prefix)
@@ -899,6 +963,8 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
return MCSymbolRefExpr::VK_TPOFF;
case ARMCP::GOTTPOFF:
return MCSymbolRefExpr::VK_GOTTPOFF;
+ case ARMCP::SBREL:
+ return MCSymbolRefExpr::VK_ARM_SBREL;
case ARMCP::GOT_PREL:
return MCSymbolRefExpr::VK_ARM_GOT_PREL;
case ARMCP::SECREL:
@@ -954,6 +1020,26 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
+ if (ACPV->isPromotedGlobal()) {
+ // This constant pool entry is actually a global whose storage has been
+ // promoted into the constant pool. This global may be referenced still
+ // by debug information, and due to the way AsmPrinter is set up, the debug
+ // info is immutable by the time we decide to promote globals to constant
+ // pools. Because of this, we need to ensure we emit a symbol for the global
+ // with private linkage (the default) so debug info can refer to it.
+ //
+ // However, if this global is promoted into several functions we must ensure
+ // we don't try and emit duplicate symbols!
+ auto *ACPC = cast<ARMConstantPoolConstant>(ACPV);
+ auto *GV = ACPC->getPromotedGlobal();
+ if (!EmittedPromotedGlobalLabels.count(GV)) {
+ MCSymbol *GVSym = getSymbol(GV);
+ OutStreamer->EmitLabel(GVSym);
+ EmittedPromotedGlobalLabels.insert(GV);
+ }
+ return EmitGlobalConstant(DL, ACPC->getPromotedGlobalInit());
+ }
+
MCSymbol *MCSym;
if (ACPV->isLSDA()) {
MCSym = getCurExceptionSym();
@@ -973,7 +1059,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
MCSym = MBB->getSymbol();
} else {
assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
- const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
+ auto Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
MCSym = GetExternalSymbolSymbol(Sym);
}
@@ -1037,7 +1123,7 @@ void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) {
// .word (LBB1 - LJTI_0_0)
const MCExpr *Expr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
- if (isPositionIndependent())
+ if (isPositionIndependent() || Subtarget->isROPI())
Expr = MCBinaryExpr::createSub(Expr, MCSymbolRefExpr::create(JTISymbol,
OutContext),
OutContext);
@@ -1082,6 +1168,9 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
const MachineOperand &MO1 = MI->getOperand(1);
unsigned JTI = MO1.getIndex();
+ if (Subtarget->isThumb1Only())
+ EmitAlignment(2);
+
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
OutStreamer->EmitLabel(JTISymbol);
@@ -1628,6 +1717,91 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(0));
return;
}
+ case ARM::tTBB_JT:
+ case ARM::tTBH_JT: {
+
+ bool Is8Bit = MI->getOpcode() == ARM::tTBB_JT;
+ unsigned Base = MI->getOperand(0).getReg();
+ unsigned Idx = MI->getOperand(1).getReg();
+ assert(MI->getOperand(1).isKill() && "We need the index register as scratch!");
+
+ // Multiply up idx if necessary.
+ if (!Is8Bit)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri)
+ .addReg(Idx)
+ .addReg(ARM::CPSR)
+ .addReg(Idx)
+ .addImm(1)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ if (Base == ARM::PC) {
+ // TBB [base, idx] =
+ // ADDS idx, idx, base
+ // LDRB idx, [idx, #4] ; or LDRH if TBH
+ // LSLS idx, #1
+ // ADDS pc, pc, idx
+
+ // When using PC as the base, it's important that there is no padding
+ // between the last ADDS and the start of the jump table. The jump table
+ // is 4-byte aligned, so we ensure we're 4 byte aligned here too.
+ //
+ // FIXME: Ideally we could vary the LDRB index based on the padding
+ // between the sequence and jump table, however that relies on MCExprs
+ // for load indexes which are currently not supported.
+ OutStreamer->EmitCodeAlignment(4);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
+ .addReg(Idx)
+ .addReg(Idx)
+ .addReg(Base)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ unsigned Opc = Is8Bit ? ARM::tLDRBi : ARM::tLDRHi;
+ EmitToStreamer(*OutStreamer, MCInstBuilder(Opc)
+ .addReg(Idx)
+ .addReg(Idx)
+ .addImm(Is8Bit ? 4 : 2)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ } else {
+ // TBB [base, idx] =
+ // LDRB idx, [base, idx] ; or LDRH if TBH
+ // LSLS idx, #1
+ // ADDS pc, pc, idx
+
+ unsigned Opc = Is8Bit ? ARM::tLDRBr : ARM::tLDRHr;
+ EmitToStreamer(*OutStreamer, MCInstBuilder(Opc)
+ .addReg(Idx)
+ .addReg(Base)
+ .addReg(Idx)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ }
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri)
+ .addReg(Idx)
+ .addReg(ARM::CPSR)
+ .addReg(Idx)
+ .addImm(1)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm()));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
+ .addReg(ARM::PC)
+ .addReg(ARM::PC)
+ .addReg(Idx)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ return;
+ }
case ARM::tBR_JTr:
case ARM::BR_JTr: {
// Lower and emit the instruction itself, then the jump table following it.
@@ -1961,6 +2135,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(0));
return;
}
+ case ARM::PATCHABLE_FUNCTION_ENTER:
+ LowerPATCHABLE_FUNCTION_ENTER(*MI);
+ return;
+ case ARM::PATCHABLE_FUNCTION_EXIT:
+ LowerPATCHABLE_FUNCTION_EXIT(*MI);
+ return;
+ case ARM::PATCHABLE_TAIL_CALL:
+ LowerPATCHABLE_TAIL_CALL(*MI);
+ return;
}
MCInst TmpInst;
@@ -1975,8 +2158,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Force static initialization.
extern "C" void LLVMInitializeARMAsmPrinter() {
- RegisterAsmPrinter<ARMAsmPrinter> X(TheARMLETarget);
- RegisterAsmPrinter<ARMAsmPrinter> Y(TheARMBETarget);
- RegisterAsmPrinter<ARMAsmPrinter> A(TheThumbLETarget);
- RegisterAsmPrinter<ARMAsmPrinter> B(TheThumbBETarget);
+ RegisterAsmPrinter<ARMAsmPrinter> X(getTheARMLETarget());
+ RegisterAsmPrinter<ARMAsmPrinter> Y(getTheARMBETarget());
+ RegisterAsmPrinter<ARMAsmPrinter> A(getTheThumbLETarget());
+ RegisterAsmPrinter<ARMAsmPrinter> B(getTheThumbBETarget());
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
index 97f5ca0ecbc2..ce0b04d56d9e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -56,12 +56,22 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
/// -1 if uninitialized, 0 if conflicting goals
int OptimizationGoals;
+ /// List of globals that have had their storage promoted to a constant
+ /// pool. This lives between calls to runOnMachineFunction and collects
+ /// data from every MachineFunction. It is used during doFinalization
+ /// when all non-function globals are emitted.
+ SmallPtrSet<const GlobalVariable*,2> PromotedGlobals;
+ /// Set of globals in PromotedGlobals that we've emitted labels for.
+ /// We need to emit labels even for promoted globals so that DWARF
+ /// debug info can link properly.
+ SmallPtrSet<const GlobalVariable*,2> EmittedPromotedGlobalLabels;
+
public:
explicit ARMAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer);
- const char *getPassName() const override {
- return "ARM Assembly / Object Emitter";
+ StringRef getPassName() const override {
+ return "ARM Assembly Printer";
}
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
@@ -90,11 +100,25 @@ public:
void EmitStartOfAsmFile(Module &M) override;
void EmitEndOfAsmFile(Module &M) override;
void EmitXXStructor(const DataLayout &DL, const Constant *CV) override;
-
+ void EmitGlobalVariable(const GlobalVariable *GV) override;
+
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+ //===------------------------------------------------------------------===//
+ // XRay implementation
+ //===------------------------------------------------------------------===//
+public:
+ // XRay-specific lowering for ARM.
+ void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
+ void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
+ void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
+ // Helper function that emits the XRay sleds we've collected for a particular
+ // function.
+ void EmitXRayTable();
+
private:
+ void EmitSled(const MachineInstr &MI, SledKind Kind);
// Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
void emitAttributes();
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 693f16499717..70a3246e34f1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -382,7 +382,10 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
}
-unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
+ int *BytesRemoved) const {
+ assert(!BytesRemoved && "code size not handled");
+
MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
if (I == MBB.end())
return 0;
@@ -406,11 +409,13 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return 2;
}
-unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
ArrayRef<MachineOperand> Cond,
- const DebugLoc &DL) const {
+ const DebugLoc &DL,
+ int *BytesAdded) const {
+ assert(!BytesAdded && "code size not handled");
ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
int BOpc = !AFI->isThumbFunction()
? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
@@ -419,7 +424,7 @@ unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB,
bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
// Shouldn't be a fall through.
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert(TBB && "insertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 2 || Cond.size() == 0) &&
"ARM branch conditions have two components!");
@@ -448,7 +453,7 @@ unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB,
}
bool ARMBaseInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
Cond[0].setImm(ARMCC::getOppositeCondition(CC));
return false;
@@ -575,6 +580,9 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const {
if (!MI.isPredicable())
return false;
+ if (MI.isBundle())
+ return false;
+
if (!isEligibleForITBlock(&MI))
return false;
@@ -610,7 +618,7 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
/// GetInstSize - Return the size of the specified MachineInstr.
///
-unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
+unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
const MachineBasicBlock &MBB = *MI.getParent();
const MachineFunction *MF = MBB.getParent();
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
@@ -669,7 +677,7 @@ unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
assert(!I->isBundle() && "No nested bundle!");
- Size += GetInstSizeInBytes(*I);
+ Size += getInstSizeInBytes(*I);
}
return Size;
}
@@ -868,7 +876,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -1051,7 +1059,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
@@ -2069,29 +2077,40 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
int RegListIdx = IsT1PushPop ? 2 : 4;
// Calculate the space we'll need in terms of registers.
- unsigned FirstReg = MI->getOperand(RegListIdx).getReg();
- unsigned RD0Reg, RegsNeeded;
+ unsigned RegsNeeded;
+ const TargetRegisterClass *RegClass;
if (IsVFPPushPop) {
- RD0Reg = ARM::D0;
RegsNeeded = NumBytes / 8;
+ RegClass = &ARM::DPRRegClass;
} else {
- RD0Reg = ARM::R0;
RegsNeeded = NumBytes / 4;
+ RegClass = &ARM::GPRRegClass;
}
// We're going to have to strip all list operands off before
// re-adding them since the order matters, so save the existing ones
// for later.
SmallVector<MachineOperand, 4> RegList;
- for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
- RegList.push_back(MI->getOperand(i));
+
+ // We're also going to need the first register transferred by this
+ // instruction, which won't necessarily be the first register in the list.
+ unsigned FirstRegEnc = -1;
const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
+ for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
+ MachineOperand &MO = MI->getOperand(i);
+ RegList.push_back(MO);
+
+ if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
+ FirstRegEnc = TRI->getEncodingValue(MO.getReg());
+ }
+
const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
// Now try to find enough space in the reglist to allocate NumBytes.
- for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded;
- --CurReg) {
+ for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
+ --CurRegEnc) {
+ unsigned CurReg = RegClass->getRegister(CurRegEnc);
if (!IsPop) {
// Pushing any register is completely harmless, mark the
// register involved as undef since we don't care about it in
@@ -2291,6 +2310,7 @@ bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
default: break;
case ARM::CMPri:
case ARM::t2CMPri:
+ case ARM::tCMPi8:
SrcReg = MI.getOperand(0).getReg();
SrcReg2 = 0;
CmpMask = ~0;
@@ -2477,8 +2497,21 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
if (isPredicated(*MI))
return false;
+ bool IsThumb1 = false;
switch (MI->getOpcode()) {
default: break;
+ case ARM::tLSLri:
+ case ARM::tLSRri:
+ case ARM::tLSLrr:
+ case ARM::tLSRrr:
+ case ARM::tSUBrr:
+ case ARM::tADDrr:
+ case ARM::tADDi3:
+ case ARM::tADDi8:
+ case ARM::tSUBi3:
+ case ARM::tSUBi8:
+ IsThumb1 = true;
+ LLVM_FALLTHROUGH;
case ARM::RSBrr:
case ARM::RSBri:
case ARM::RSCrr:
@@ -2511,7 +2544,11 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
case ARM::EORrr:
case ARM::EORri:
case ARM::t2EORrr:
- case ARM::t2EORri: {
+ case ARM::t2EORri:
+ case ARM::t2LSRri:
+ case ARM::t2LSRrr:
+ case ARM::t2LSLri:
+ case ARM::t2LSLrr: {
// Scan forward for the use of CPSR
// When checking against MI: if it's a conditional code that requires
// checking of the V bit or C bit, then this is not safe to do.
@@ -2618,9 +2655,12 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
return false;
}
- // Toggle the optional operand to CPSR.
- MI->getOperand(5).setReg(ARM::CPSR);
- MI->getOperand(5).setIsDef(true);
+ // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
+ // set CPSR so this is represented as an explicit output)
+ if (!IsThumb1) {
+ MI->getOperand(5).setReg(ARM::CPSR);
+ MI->getOperand(5).setIsDef(true);
+ }
assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
CmpInstr.eraseFromParent();
@@ -2632,7 +2672,7 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
return true;
}
}
-
+
return false;
}
@@ -4119,6 +4159,9 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
unsigned LoadImmOpc,
unsigned LoadOpc) const {
+ assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
+ "ROPI/RWPI not currently supported with stack guard");
+
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
unsigned Reg = MI->getOperand(0).getReg();
@@ -4132,7 +4175,9 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
if (Subtarget.isGVIndirectSymbol(GV)) {
MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
MIB.addReg(Reg, RegState::Kill).addImm(0);
- auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ auto Flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
MIB.addMemOperand(MMO);
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 52b0ff17dea2..b01d5c8ec85f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -100,6 +100,10 @@ public:
// Return whether the target has an explicit NOP encoding.
bool hasNOP() const;
+ virtual void getNoopForElfTarget(MCInst &NopInst) const {
+ getNoopForMachoTarget(NopInst);
+ }
+
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
@@ -124,13 +128,15 @@ public:
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify = false) const override;
- unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
- unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ unsigned removeBranch(MachineBasicBlock &MBB,
+ int *BytesRemoved = nullptr) const override;
+ unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
- const DebugLoc &DL) const override;
+ const DebugLoc &DL,
+ int *BytesAdded = nullptr) const override;
bool
- ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+ reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
// Predication support.
bool isPredicated(const MachineInstr &MI) const override;
@@ -154,7 +160,7 @@ public:
/// GetInstSize - Returns the size of the specified MachineInstr.
///
- virtual unsigned GetInstSizeInBytes(const MachineInstr &MI) const;
+ unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index aa968efc37d4..d995c631dd1c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -49,18 +49,13 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo()
: ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {}
static unsigned getFramePointerReg(const ARMSubtarget &STI) {
- if (STI.isTargetMachO())
- return ARM::R7;
- else if (STI.isTargetWindows())
- return ARM::R11;
- else // ARM EABI
- return STI.isThumb() ? ARM::R7 : ARM::R11;
+ return STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11;
}
const MCPhysReg*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
- bool UseSplitPush = STI.splitFramePushPop();
+ bool UseSplitPush = STI.splitFramePushPop(*MF);
const MCPhysReg *RegList =
STI.isTargetDarwin()
? CSR_iOS_SaveList
@@ -136,6 +131,15 @@ ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const {
return CSR_iOS_TLSCall_RegMask;
}
+const uint32_t *
+ARMBaseRegisterInfo::getSjLjDispatchPreservedMask(const MachineFunction &MF) const {
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ if (!STI.useSoftFloat() && STI.hasVFP2() && !STI.isThumb1Only())
+ return CSR_NoRegs_RegMask;
+ else
+ return CSR_FPRegs_RegMask;
+}
+
const uint32_t *
ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
@@ -163,27 +167,29 @@ getReservedRegs(const MachineFunction &MF) const {
// FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
- Reserved.set(ARM::SP);
- Reserved.set(ARM::PC);
- Reserved.set(ARM::FPSCR);
- Reserved.set(ARM::APSR_NZCV);
+ markSuperRegs(Reserved, ARM::SP);
+ markSuperRegs(Reserved, ARM::PC);
+ markSuperRegs(Reserved, ARM::FPSCR);
+ markSuperRegs(Reserved, ARM::APSR_NZCV);
if (TFI->hasFP(MF))
- Reserved.set(getFramePointerReg(STI));
+ markSuperRegs(Reserved, getFramePointerReg(STI));
if (hasBasePointer(MF))
- Reserved.set(BasePtr);
+ markSuperRegs(Reserved, BasePtr);
// Some targets reserve R9.
if (STI.isR9Reserved())
- Reserved.set(ARM::R9);
+ markSuperRegs(Reserved, ARM::R9);
// Reserve D16-D31 if the subtarget doesn't support them.
if (!STI.hasVFP3() || STI.hasD16()) {
static_assert(ARM::D31 == ARM::D16 + 15, "Register list not consecutive!");
- Reserved.set(ARM::D16, ARM::D31 + 1);
+ for (unsigned R = 0; R < 16; ++R)
+ markSuperRegs(Reserved, ARM::D16 + R);
}
const TargetRegisterClass *RC = &ARM::GPRPairRegClass;
for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I)
for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI)
- if (Reserved.test(*SI)) Reserved.set(*I);
+ if (Reserved.test(*SI)) markSuperRegs(Reserved, *I);
+ assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
@@ -289,8 +295,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
}
// First prefer the paired physreg.
- if (PairedPhys &&
- std::find(Order.begin(), Order.end(), PairedPhys) != Order.end())
+ if (PairedPhys && is_contained(Order, PairedPhys))
Hints.push_back(PairedPhys);
// Then prefer even or odd registers.
@@ -332,7 +337,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
}
bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const ARMFrameLowering *TFI = getFrameLowering(MF);
@@ -347,14 +352,14 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
// It's going to be better to use the SP or Base Pointer instead. When there
// are variable sized objects, we can't reference off of the SP, so we
// reserve a Base Pointer.
- if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) {
+ if (AFI->isThumbFunction() && MFI.hasVarSizedObjects()) {
// Conservatively estimate whether the negative offset from the frame
// pointer will be sufficient to reach. If a function has a smallish
// frame, it's less likely to have lots of spills and callee saved
// space, so it's all more likely to be within range of the frame pointer.
// If it's wrong, the scavenger will still enable access to work, it just
// won't be optimal.
- if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128)
+ if (AFI->isThumb2Function() && MFI.getLocalFrameSize() < 128)
return false;
return true;
}
@@ -389,10 +394,10 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
cannotEliminateFrame(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI.adjustsStack())
return true;
- return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
+ return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken()
|| needsStackRealignment(MF);
}
@@ -536,7 +541,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// so it'll be negative.
MachineFunction &MF = *MI->getParent()->getParent();
const ARMFrameLowering *TFI = getFrameLowering(MF);
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// Estimate an offset from the frame pointer.
@@ -551,7 +556,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// The incoming offset is relating to the SP at the start of the function,
// but when we access the local it'll be relative to the SP after local
// allocation, so adjust our SP-relative offset by that allocation size.
- Offset += MFI->getLocalFrameSize();
+ Offset += MFI.getLocalFrameSize();
// Assume that we'll have at least some spill slots allocated.
// FIXME: This is a total SWAG number. We should run some statistics
// and pick a real one.
@@ -563,7 +568,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// on whether there are any local variables that would trigger it.
unsigned StackAlign = TFI->getStackAlignment();
if (TFI->hasFP(MF) &&
- !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
+ !((MFI.getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset))
return false;
}
@@ -572,7 +577,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// to only disallow SP relative references in the live range of
// the VLA(s). In practice, it's unclear how much difference that
// would make, but it may be worth doing.
- if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, ARM::SP, Offset))
+ if (!MFI.hasVarSizedObjects() && isFrameOffsetLegal(MI, ARM::SP, Offset))
return false;
// The offset likely isn't legal, we want to allocate a virtual base register.
@@ -730,7 +735,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(TFI->hasReservedCallFrame(MF) &&
"Cannot use SP to access the emergency spill slot in "
"functions without a reserved call frame");
- assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
+ assert(!MF.getFrameInfo().hasVarSizedObjects() &&
"Cannot use SP to access the emergency spill slot in "
"functions with variable sized frame objects");
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 1eee94857e05..330e1535e863 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -99,11 +99,12 @@ public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const MCPhysReg *
- getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
+ getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const override;
const uint32_t *getTLSCallPreservedMask(const MachineFunction &MF) const;
+ const uint32_t *getSjLjDispatchPreservedMask(const MachineFunction &MF) const;
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
/// case that 'returned' is on an i32 first argument if the calling convention
diff --git a/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h b/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h
new file mode 100644
index 000000000000..780544f865df
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h
@@ -0,0 +1,110 @@
+//===-- ARMBasicBlockInfo.h - Basic Block Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility functions and data structure for computing block size.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
+
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+using namespace llvm;
+
+namespace llvm {
+
+/// UnknownPadding - Return the worst case padding that could result from
+/// unknown offset bits. This does not include alignment padding caused by
+/// known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+ if (KnownBits < LogAlign)
+ return (1u << LogAlign) - (1u << KnownBits);
+ return 0;
+}
+
+/// BasicBlockInfo - Information about the offset and size of a single
+/// basic block.
+struct BasicBlockInfo {
+ /// Offset - Distance from the beginning of the function to the beginning
+ /// of this basic block.
+ ///
+ /// Offsets are computed assuming worst case padding before an aligned
+ /// block. This means that subtracting basic block offsets always gives a
+ /// conservative estimate of the real distance which may be smaller.
+ ///
+ /// Because worst case padding is used, the computed offset of an aligned
+ /// block may not actually be aligned.
+ unsigned Offset;
+
+ /// Size - Size of the basic block in bytes. If the block contains
+ /// inline assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size;
+
+ /// KnownBits - The number of low bits in Offset that are known to be
+ /// exact. The remaining bits of Offset are an upper bound.
+ uint8_t KnownBits;
+
+ /// Unalign - When non-zero, the block contains instructions (inline asm)
+ /// of unknown size. The real size may be smaller than Size bytes by a
+ /// multiple of 1 << Unalign.
+ uint8_t Unalign;
+
+ /// PostAlign - When non-zero, the block terminator contains a .align
+ /// directive, so the end of the block is aligned to 1 << PostAlign
+ /// bytes.
+ uint8_t PostAlign;
+
+ BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
+ PostAlign(0) {}
+
+ /// Compute the number of known offset bits internally to this block.
+ /// This number should be used to predict worst case padding when
+ /// splitting the block.
+ unsigned internalKnownBits() const {
+ unsigned Bits = Unalign ? Unalign : KnownBits;
+ // If the block size isn't a multiple of the known bits, assume the
+ // worst case padding.
+ if (Size & ((1u << Bits) - 1))
+ Bits = countTrailingZeros(Size);
+ return Bits;
+ }
+
+ /// Compute the offset immediately following this block. If LogAlign is
+ /// specified, return the offset the successor block will get if it has
+ /// this alignment.
+ unsigned postOffset(unsigned LogAlign = 0) const {
+ unsigned PO = Offset + Size;
+ unsigned LA = std::max(unsigned(PostAlign), LogAlign);
+ if (!LA)
+ return PO;
+ // Add alignment padding from the terminator.
+ return PO + UnknownPadding(LA, internalKnownBits());
+ }
+
+ /// Compute the number of known low bits of postOffset. If this block
+ /// contains inline asm, the number of known bits drops to the
+ /// instruction alignment. An aligned terminator may increase the number
+ /// of know bits.
+ /// If LogAlign is given, also consider the alignment of the next block.
+ unsigned postKnownBits(unsigned LogAlign = 0) const {
+ return std::max(std::max(unsigned(PostAlign), LogAlign),
+ internalKnownBits());
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
new file mode 100644
index 000000000000..52c95b6244ac
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -0,0 +1,203 @@
+//===-- llvm/lib/Target/ARM/ARMCallLowering.cpp - Call lowering -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the lowering of LLVM calls to machine code calls for
+/// GlobalISel.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ARMCallLowering.h"
+
+#include "ARMBaseInstrInfo.h"
+#include "ARMISelLowering.h"
+
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "This shouldn't be built without GISel"
+#endif
+
+ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
+ : CallLowering(&TLI) {}
+
+static bool isSupportedType(const DataLayout DL, const ARMTargetLowering &TLI,
+ Type *T) {
+ EVT VT = TLI.getValueType(DL, T);
+ if (!VT.isSimple() || !VT.isInteger() || VT.isVector())
+ return false;
+
+ unsigned VTSize = VT.getSimpleVT().getSizeInBits();
+ return VTSize == 8 || VTSize == 16 || VTSize == 32;
+}
+
+namespace {
+struct FuncReturnHandler : public CallLowering::ValueHandler {
+ FuncReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstrBuilder &MIB)
+ : ValueHandler(MIRBuilder, MRI), MIB(MIB) {}
+
+ unsigned getStackAddress(uint64_t Size, int64_t Offset,
+ MachinePointerInfo &MPO) override {
+ llvm_unreachable("Don't know how to get a stack address yet");
+ }
+
+ void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+ CCValAssign &VA) override {
+ assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
+ assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
+
+ assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size");
+ assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size");
+
+ assert(VA.getLocInfo() != CCValAssign::SExt &&
+ VA.getLocInfo() != CCValAssign::ZExt &&
+ "ABI extensions not supported yet");
+
+ MIRBuilder.buildCopy(PhysReg, ValVReg);
+ MIB.addUse(PhysReg, RegState::Implicit);
+ }
+
+ void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+ MachinePointerInfo &MPO, CCValAssign &VA) override {
+ llvm_unreachable("Don't know how to assign a value to an address yet");
+ }
+
+ MachineInstrBuilder &MIB;
+};
+} // End anonymous namespace.
+
+/// Lower the return value for the already existing \p Ret. This assumes that
+/// \p MIRBuilder's insertion point is correct.
+bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
+ const Value *Val, unsigned VReg,
+ MachineInstrBuilder &Ret) const {
+ if (!Val)
+ // Nothing to do here.
+ return true;
+
+ auto &MF = MIRBuilder.getMF();
+ const auto &F = *MF.getFunction();
+
+ auto DL = MF.getDataLayout();
+ auto &TLI = *getTLI<ARMTargetLowering>();
+ if (!isSupportedType(DL, TLI, Val->getType()))
+ return false;
+
+ CCAssignFn *AssignFn =
+ TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
+
+ ArgInfo RetInfo(VReg, Val->getType());
+ setArgFlags(RetInfo, AttributeSet::ReturnIndex, DL, F);
+
+ FuncReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret);
+ return handleAssignments(MIRBuilder, AssignFn, RetInfo, RetHandler);
+}
+
+bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
+ const Value *Val, unsigned VReg) const {
+ assert(!Val == !VReg && "Return value without a vreg");
+
+ auto Ret = AddDefaultPred(MIRBuilder.buildInstrNoInsert(ARM::BX_RET));
+
+ if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret))
+ return false;
+
+ MIRBuilder.insertInstr(Ret);
+ return true;
+}
+
+namespace {
+struct FormalArgHandler : public CallLowering::ValueHandler {
+ FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
+ : ValueHandler(MIRBuilder, MRI) {}
+
+ unsigned getStackAddress(uint64_t Size, int64_t Offset,
+ MachinePointerInfo &MPO) override {
+ assert(Size == 4 && "Unsupported size");
+
+ auto &MFI = MIRBuilder.getMF().getFrameInfo();
+
+ int FI = MFI.CreateFixedObject(Size, Offset, true);
+ MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+
+ unsigned AddrReg =
+ MRI.createGenericVirtualRegister(LLT::pointer(MPO.getAddrSpace(), 32));
+ MIRBuilder.buildFrameIndex(AddrReg, FI);
+
+ return AddrReg;
+ }
+
+ void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+ MachinePointerInfo &MPO, CCValAssign &VA) override {
+ assert(Size == 4 && "Unsupported size");
+
+ auto MMO = MIRBuilder.getMF().getMachineMemOperand(
+ MPO, MachineMemOperand::MOLoad, Size, /* Alignment */ 0);
+ MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
+ }
+
+ void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+ CCValAssign &VA) override {
+ assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
+ assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
+
+ assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size");
+ assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size");
+
+ MIRBuilder.getMBB().addLiveIn(PhysReg);
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ }
+};
+} // End anonymous namespace
+
+bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+ const Function &F,
+ ArrayRef<unsigned> VRegs) const {
+ // Quick exit if there aren't any args
+ if (F.arg_empty())
+ return true;
+
+ if (F.isVarArg())
+ return false;
+
+ auto DL = MIRBuilder.getMF().getDataLayout();
+ auto &TLI = *getTLI<ARMTargetLowering>();
+
+ auto &Args = F.getArgumentList();
+ unsigned ArgIdx = 0;
+ for (auto &Arg : Args) {
+ ArgIdx++;
+ if (!isSupportedType(DL, TLI, Arg.getType()))
+ return false;
+
+ // FIXME: This check as well as ArgIdx are going away as soon as we support
+ // loading values < 32 bits.
+ if (ArgIdx > 4 && Arg.getType()->getIntegerBitWidth() != 32)
+ return false;
+ }
+
+ CCAssignFn *AssignFn =
+ TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg());
+
+ SmallVector<ArgInfo, 8> ArgInfos;
+ unsigned Idx = 0;
+ for (auto &Arg : Args) {
+ ArgInfo AInfo(VRegs[Idx], Arg.getType());
+ setArgFlags(AInfo, Idx + 1, DL, F);
+ ArgInfos.push_back(AInfo);
+ Idx++;
+ }
+
+ FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo());
+ return handleAssignments(MIRBuilder, AssignFn, ArgInfos, ArgHandler);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
new file mode 100644
index 000000000000..6a1b886b501f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
@@ -0,0 +1,42 @@
+//===-- llvm/lib/Target/ARM/ARMCallLowering.h - Call lowering -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file describes how to lower LLVM calls to machine code calls.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMCALLLOWERING
+#define LLVM_LIB_TARGET_ARM_ARMCALLLOWERING
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
+
+namespace llvm {
+
+class ARMTargetLowering;
+class MachineInstrBuilder;
+
+class ARMCallLowering : public CallLowering {
+public:
+ ARMCallLowering(const ARMTargetLowering &TLI);
+
+ bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
+ unsigned VReg) const override;
+
+ bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
+ ArrayRef<unsigned> VRegs) const override;
+
+private:
+ bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
+ unsigned VReg, MachineInstrBuilder &Ret) const;
+};
+} // End of namespace llvm
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index edb69581b9d3..9c278a52a7ff 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -242,6 +242,7 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
//===----------------------------------------------------------------------===//
def CSR_NoRegs : CalleeSavedRegs<(add)>;
+def CSR_FPRegs : CalleeSavedRegs<(add (sequence "D%u", 0, 31))>;
def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
(sequence "D%u", 15, 8))>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp b/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp
new file mode 100644
index 000000000000..64f187d17e64
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp
@@ -0,0 +1,72 @@
+//===--- ARMComputeBlockSize.cpp - Compute machine block sizes ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBasicBlockInfo.h"
+using namespace llvm;
+
+namespace llvm {
+
+// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
+// below may shrink MI.
+static bool
+mayOptimizeThumb2Instruction(const MachineInstr *MI) {
+ switch(MI->getOpcode()) {
+ // optimizeThumb2Instructions.
+ case ARM::t2LEApcrel:
+ case ARM::t2LDRpci:
+ // optimizeThumb2Branches.
+ case ARM::t2B:
+ case ARM::t2Bcc:
+ case ARM::tBcc:
+ // optimizeThumb2JumpTables.
+ case ARM::t2BR_JT:
+ return true;
+ }
+ return false;
+}
+
+void computeBlockSize(MachineFunction *MF, MachineBasicBlock *MBB,
+ BasicBlockInfo &BBI) {
+ const ARMBaseInstrInfo *TII =
+ static_cast<const ARMBaseInstrInfo *>(MF->getSubtarget().getInstrInfo());
+ bool isThumb = MF->getInfo<ARMFunctionInfo>()->isThumbFunction();
+ BBI.Size = 0;
+ BBI.Unalign = 0;
+ BBI.PostAlign = 0;
+
+ for (MachineInstr &I : *MBB) {
+ BBI.Size += TII->getInstSizeInBytes(I);
+ // For inline asm, getInstSizeInBytes returns a conservative estimate.
+ // The actual size may be smaller, but still a multiple of the instr size.
+ if (I.isInlineAsm())
+ BBI.Unalign = isThumb ? 1 : 2;
+ // Also consider instructions that may be shrunk later.
+ else if (isThumb && mayOptimizeThumb2Instruction(&I))
+ BBI.Unalign = 1;
+ }
+
+ // tBR_JTr contains a .align 2 directive.
+ if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
+ BBI.PostAlign = 2;
+ MBB->getParent()->ensureAlignment(2);
+ }
+}
+
+std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF) {
+ std::vector<BasicBlockInfo> BBInfo;
+ BBInfo.resize(MF->getNumBlockIDs());
+
+ for (MachineBasicBlock &MBB : *MF)
+ computeBlockSize(MF, &MBB, BBInfo[MBB.getNumber()]);
+
+ return BBInfo;
+}
+
+} // end namespace
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 8511f67dccd5..be1a37e3e362 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMBasicBlockInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "Thumb2InstrInfo.h"
@@ -57,18 +58,10 @@ static cl::opt<unsigned>
CPMaxIteration("arm-constant-island-max-iteration", cl::Hidden, cl::init(30),
cl::desc("The max number of iteration for converge"));
-
-/// UnknownPadding - Return the worst case padding that could result from
-/// unknown offset bits. This does not include alignment padding caused by
-/// known offset bits.
-///
-/// @param LogAlign log2(alignment)
-/// @param KnownBits Number of known low offset bits.
-static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
- if (KnownBits < LogAlign)
- return (1u << LogAlign) - (1u << KnownBits);
- return 0;
-}
+static cl::opt<bool> SynthesizeThumb1TBB(
+ "arm-synthesize-thumb-1-tbb", cl::Hidden, cl::init(true),
+ cl::desc("Use compressed jump tables in Thumb-1 by synthesizing an "
+ "equivalent to the TBB/TBH instructions"));
namespace {
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
@@ -83,78 +76,6 @@ namespace {
/// CPE - A constant pool entry that has been placed somewhere, which
/// tracks a list of users.
class ARMConstantIslands : public MachineFunctionPass {
- /// BasicBlockInfo - Information about the offset and size of a single
- /// basic block.
- struct BasicBlockInfo {
- /// Offset - Distance from the beginning of the function to the beginning
- /// of this basic block.
- ///
- /// Offsets are computed assuming worst case padding before an aligned
- /// block. This means that subtracting basic block offsets always gives a
- /// conservative estimate of the real distance which may be smaller.
- ///
- /// Because worst case padding is used, the computed offset of an aligned
- /// block may not actually be aligned.
- unsigned Offset;
-
- /// Size - Size of the basic block in bytes. If the block contains
- /// inline assembly, this is a worst case estimate.
- ///
- /// The size does not include any alignment padding whether from the
- /// beginning of the block, or from an aligned jump table at the end.
- unsigned Size;
-
- /// KnownBits - The number of low bits in Offset that are known to be
- /// exact. The remaining bits of Offset are an upper bound.
- uint8_t KnownBits;
-
- /// Unalign - When non-zero, the block contains instructions (inline asm)
- /// of unknown size. The real size may be smaller than Size bytes by a
- /// multiple of 1 << Unalign.
- uint8_t Unalign;
-
- /// PostAlign - When non-zero, the block terminator contains a .align
- /// directive, so the end of the block is aligned to 1 << PostAlign
- /// bytes.
- uint8_t PostAlign;
-
- BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
- PostAlign(0) {}
-
- /// Compute the number of known offset bits internally to this block.
- /// This number should be used to predict worst case padding when
- /// splitting the block.
- unsigned internalKnownBits() const {
- unsigned Bits = Unalign ? Unalign : KnownBits;
- // If the block size isn't a multiple of the known bits, assume the
- // worst case padding.
- if (Size & ((1u << Bits) - 1))
- Bits = countTrailingZeros(Size);
- return Bits;
- }
-
- /// Compute the offset immediately following this block. If LogAlign is
- /// specified, return the offset the successor block will get if it has
- /// this alignment.
- unsigned postOffset(unsigned LogAlign = 0) const {
- unsigned PO = Offset + Size;
- unsigned LA = std::max(unsigned(PostAlign), LogAlign);
- if (!LA)
- return PO;
- // Add alignment padding from the terminator.
- return PO + UnknownPadding(LA, internalKnownBits());
- }
-
- /// Compute the number of known low bits of postOffset. If this block
- /// contains inline asm, the number of known bits drops to the
- /// instruction alignment. An aligned terminator may increase the number
- /// of know bits.
- /// If LogAlign is given, also consider the alignment of the next block.
- unsigned postKnownBits(unsigned LogAlign = 0) const {
- return std::max(std::max(unsigned(PostAlign), LogAlign),
- internalKnownBits());
- }
- };
std::vector<BasicBlockInfo> BBInfo;
@@ -273,6 +194,7 @@ namespace {
bool isThumb;
bool isThumb1;
bool isThumb2;
+ bool isPositionIndependentOrROPI;
public:
static char ID;
ARMConstantIslands() : MachineFunctionPass(ID) {}
@@ -281,10 +203,10 @@ namespace {
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
- const char *getPassName() const override {
+ StringRef getPassName() const override {
return "ARM constant island placement and branch shortening pass";
}
@@ -319,7 +241,6 @@ namespace {
bool fixupConditionalBr(ImmBranch &Br);
bool fixupUnconditionalBr(ImmBranch &Br);
bool undoLRSpillRestore();
- bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const;
bool optimizeThumb2Instructions();
bool optimizeThumb2Branches();
bool reorderThumb2JumpTables();
@@ -330,7 +251,6 @@ namespace {
MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
MachineBasicBlock *JTBB);
- void computeBlockSize(MachineBasicBlock *MBB);
unsigned getOffsetOf(MachineInstr *MI) const;
unsigned getUserOffset(CPUser&) const;
void dumpBBs();
@@ -405,6 +325,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget());
TII = STI->getInstrInfo();
+ isPositionIndependentOrROPI =
+ STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();
AFI = MF->getInfo<ARMFunctionInfo>();
isThumb = AFI->isThumbFunction();
@@ -412,6 +334,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
isThumb2 = AFI->isThumb2Function();
HasFarJump = false;
+ bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB);
// This pass invalidates liveness information when it splits basic blocks.
MF->getRegInfo().invalidateLiveness();
@@ -423,7 +346,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
// Try to reorder and otherwise adjust the block layout to make good use
// of the TB[BH] instructions.
bool MadeChange = false;
- if (isThumb2 && AdjustJumpTableBlocks) {
+ if (GenerateTBB && AdjustJumpTableBlocks) {
scanFunctionJumpTables();
MadeChange |= reorderThumb2JumpTables();
// Data is out of date, so clear it. It'll be re-computed later.
@@ -500,7 +423,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
MadeChange |= optimizeThumb2Branches();
// Optimize jump tables using TBB / TBH.
- if (isThumb2)
+ if (GenerateTBB && !STI->genExecuteOnly())
MadeChange |= optimizeThumb2JumpTables();
// After a while, this might be made debug-only, but it is not expensive.
@@ -626,9 +549,11 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
case ARM::t2BR_JT:
JTOpcode = ARM::JUMPTABLE_INSTS;
break;
+ case ARM::tTBB_JT:
case ARM::t2TBB_JT:
JTOpcode = ARM::JUMPTABLE_TBB;
break;
+ case ARM::tTBH_JT:
case ARM::t2TBH_JT:
JTOpcode = ARM::JUMPTABLE_TBH;
break;
@@ -668,7 +593,7 @@ bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
return false;
MachineBasicBlock *NextBB = &*std::next(MBBI);
- if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end())
+ if (!MBB->isSuccessor(NextBB))
return false;
// Try to analyze the end of the block. A potential fallthrough may already
@@ -701,8 +626,9 @@ unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
case ARM::CONSTPOOL_ENTRY:
break;
case ARM::JUMPTABLE_TBB:
- return 0;
+ return isThumb1 ? 2 : 0;
case ARM::JUMPTABLE_TBH:
+ return isThumb1 ? 2 : 1;
case ARM::JUMPTABLE_INSTS:
return 1;
case ARM::JUMPTABLE_ADDRS:
@@ -724,7 +650,8 @@ unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
void ARMConstantIslands::scanFunctionJumpTables() {
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &I : MBB)
- if (I.isBranch() && I.getOpcode() == ARM::t2BR_JT)
+ if (I.isBranch() &&
+ (I.getOpcode() == ARM::t2BR_JT || I.getOpcode() == ARM::tBR_JTr))
T2JumpTables.push_back(&I);
}
}
@@ -734,15 +661,8 @@ void ARMConstantIslands::scanFunctionJumpTables() {
/// and finding all of the constant pool users.
void ARMConstantIslands::
initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
- BBInfo.clear();
- BBInfo.resize(MF->getNumBlockIDs());
- // First thing, compute the size of all basic blocks, and see if the function
- // has any inline assembly in it. If so, we have to be conservative about
- // alignment assumptions, as we don't know for sure the size of any
- // instructions in the inline assembly.
- for (MachineBasicBlock &MBB : *MF)
- computeBlockSize(&MBB);
+ BBInfo = computeAllBlockSizes(MF);
// The known bits of the entry block offset are determined by the function
// alignment.
@@ -772,12 +692,13 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
default:
continue; // Ignore other JT branches
case ARM::t2BR_JT:
+ case ARM::tBR_JTr:
T2JumpTables.push_back(&I);
continue; // Does not get an entry in ImmBranches
case ARM::Bcc:
isCond = true;
UOpc = ARM::B;
- // Fallthrough
+ LLVM_FALLTHROUGH;
case ARM::B:
Bits = 24;
Scale = 4;
@@ -860,6 +781,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
case ARM::LDRi12:
case ARM::LDRcp:
case ARM::t2LDRpci:
+ case ARM::t2LDRHpci:
Bits = 12; // +-offset_12
NegOk = true;
break;
@@ -875,6 +797,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
Scale = 4; // +-(offset_8*4)
NegOk = true;
break;
+
+ case ARM::tLDRHi:
+ Bits = 5;
+ Scale = 2; // +(offset_5*2)
+ break;
}
// Remember that this is a user of a CP entry.
@@ -901,32 +828,6 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
}
}
-/// computeBlockSize - Compute the size and some alignment information for MBB.
-/// This function updates BBInfo directly.
-void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
- BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
- BBI.Size = 0;
- BBI.Unalign = 0;
- BBI.PostAlign = 0;
-
- for (MachineInstr &I : *MBB) {
- BBI.Size += TII->GetInstSizeInBytes(I);
- // For inline asm, GetInstSizeInBytes returns a conservative estimate.
- // The actual size may be smaller, but still a multiple of the instr size.
- if (I.isInlineAsm())
- BBI.Unalign = isThumb ? 1 : 2;
- // Also consider instructions that may be shrunk later.
- else if (isThumb && mayOptimizeThumb2Instruction(&I))
- BBI.Unalign = 1;
- }
-
- // tBR_JTr contains a .align 2 directive.
- if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
- BBI.PostAlign = 2;
- MBB->getParent()->ensureAlignment(2);
- }
-}
-
/// getOffsetOf - Return the current offset of the specified machine instruction
/// from the start of the function. This offset changes as stuff is moved
/// around inside the function.
@@ -941,7 +842,7 @@ unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const {
// Sum instructions before MI in MBB.
for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
assert(I != MBB->end() && "Didn't find MI in its own basic block?");
- Offset += TII->GetInstSizeInBytes(*I);
+ Offset += TII->getInstSizeInBytes(*I);
}
return Offset;
}
@@ -1034,11 +935,11 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
// the new jump we added. (It should be possible to do this without
// recounting everything, but it's very confusing, and this is rarely
// executed.)
- computeBlockSize(OrigBB);
+ computeBlockSize(MF, OrigBB, BBInfo[OrigBB->getNumber()]);
// Figure out how large the NewMBB is. As the second half of the original
// block, it may contain a tablejump.
- computeBlockSize(NewBB);
+ computeBlockSize(MF, NewBB, BBInfo[NewBB->getNumber()]);
// All BBOffsets following these blocks must be modified.
adjustBBOffsetsAfter(OrigBB);
@@ -1400,7 +1301,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
ImmBranches.push_back(ImmBranch(&UserMBB->back(),
MaxDisp, false, UncondBr));
- computeBlockSize(UserMBB);
+ computeBlockSize(MF, UserMBB, BBInfo[UserMBB->getNumber()]);
adjustBBOffsetsAfter(UserMBB);
return;
}
@@ -1449,7 +1350,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// iterates at least once.
BaseInsertOffset =
std::max(UserBBI.postOffset() - UPad - 8,
- UserOffset + TII->GetInstSizeInBytes(*UserMI) + 1);
+ UserOffset + TII->getInstSizeInBytes(*UserMI) + 1);
DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
}
unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
@@ -1459,9 +1360,9 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
unsigned CPUIndex = CPUserIndex+1;
unsigned NumCPUsers = CPUsers.size();
MachineInstr *LastIT = nullptr;
- for (unsigned Offset = UserOffset + TII->GetInstSizeInBytes(*UserMI);
+ for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI);
Offset < BaseInsertOffset;
- Offset += TII->GetInstSizeInBytes(*MI), MI = std::next(MI)) {
+ Offset += TII->getInstSizeInBytes(*MI), MI = std::next(MI)) {
assert(MI != UserMBB->end() && "Fell off end of block");
if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == &*MI) {
CPUser &U = CPUsers[CPUIndex];
@@ -1551,7 +1452,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
// it. Check for this so it will be removed from the WaterList.
// Also remove any entry from NewWaterList.
MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
- IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
+ IP = find(WaterList, WaterBB);
if (IP != WaterList.end())
NewWaterList.erase(WaterBB);
@@ -1762,7 +1663,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
splitBlockBeforeInstr(MI);
// No need for the branch to the next block. We're adding an unconditional
// branch to the destination.
- int delta = TII->GetInstSizeInBytes(MBB->back());
+ int delta = TII->getInstSizeInBytes(MBB->back());
BBInfo[MBB->getNumber()].Size -= delta;
MBB->back().eraseFromParent();
// BBInfo[SplitBB].Offset is wrong temporarily, fixed below
@@ -1778,18 +1679,18 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
.addMBB(NextBB).addImm(CC).addReg(CCReg);
Br.MI = &MBB->back();
- BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
if (isThumb)
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
.addImm(ARMCC::AL).addReg(0);
else
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
- BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
// Remove the old conditional branch. It may or may not still be in MBB.
- BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(*MI);
+ BBInfo[MI->getParent()->getNumber()].Size -= TII->getInstSizeInBytes(*MI);
MI->eraseFromParent();
adjustBBOffsetsAfter(MBB);
return true;
@@ -1817,25 +1718,6 @@ bool ARMConstantIslands::undoLRSpillRestore() {
return MadeChange;
}
-// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
-// below may shrink MI.
-bool
-ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const {
- switch(MI->getOpcode()) {
- // optimizeThumb2Instructions.
- case ARM::t2LEApcrel:
- case ARM::t2LDRpci:
- // optimizeThumb2Branches.
- case ARM::t2B:
- case ARM::t2Bcc:
- case ARM::tBcc:
- // optimizeThumb2JumpTables.
- case ARM::t2BR_JT:
- return true;
- }
- return false;
-}
-
bool ARMConstantIslands::optimizeThumb2Instructions() {
bool MadeChange = false;
@@ -2075,7 +1957,7 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
if (RemovableAdd) {
RemovableAdd->eraseFromParent();
- DeadSize += 4;
+ DeadSize += isThumb2 ? 4 : 2;
} else if (BaseReg == EntryReg) {
// The add wasn't removable, but clobbered the base for the TBB. So we can't
// preserve it.
@@ -2142,25 +2024,82 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
if (!ByteOk && !HalfWordOk)
continue;
+ CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
MachineBasicBlock *MBB = MI->getParent();
if (!MI->getOperand(0).isKill()) // FIXME: needed now?
continue;
- unsigned IdxReg = MI->getOperand(1).getReg();
- bool IdxRegKill = MI->getOperand(1).isKill();
- CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
unsigned DeadSize = 0;
bool CanDeleteLEA = false;
bool BaseRegKill = false;
- bool PreservedBaseReg =
+
+ unsigned IdxReg = ~0U;
+ bool IdxRegKill = true;
+ if (isThumb2) {
+ IdxReg = MI->getOperand(1).getReg();
+ IdxRegKill = MI->getOperand(1).isKill();
+
+ bool PreservedBaseReg =
preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
+ if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
+ continue;
+ } else {
+ // We're in thumb-1 mode, so we must have something like:
+ // %idx = tLSLri %idx, 2
+ // %base = tLEApcrelJT
+ // %t = tLDRr %idx, %base
+ unsigned BaseReg = User.MI->getOperand(0).getReg();
+
+ if (User.MI->getIterator() == User.MI->getParent()->begin())
+ continue;
+ MachineInstr *Shift = User.MI->getPrevNode();
+ if (Shift->getOpcode() != ARM::tLSLri ||
+ Shift->getOperand(3).getImm() != 2 ||
+ !Shift->getOperand(2).isKill())
+ continue;
+ IdxReg = Shift->getOperand(2).getReg();
+ unsigned ShiftedIdxReg = Shift->getOperand(0).getReg();
- if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
- continue;
+ MachineInstr *Load = User.MI->getNextNode();
+ if (Load->getOpcode() != ARM::tLDRr)
+ continue;
+ if (Load->getOperand(1).getReg() != ShiftedIdxReg ||
+ Load->getOperand(2).getReg() != BaseReg ||
+ !Load->getOperand(1).isKill())
+ continue;
+ // If we're in PIC mode, there should be another ADD following.
+ if (isPositionIndependentOrROPI) {
+ MachineInstr *Add = Load->getNextNode();
+ if (Add->getOpcode() != ARM::tADDrr ||
+ Add->getOperand(2).getReg() != Load->getOperand(0).getReg() ||
+ Add->getOperand(3).getReg() != BaseReg ||
+ !Add->getOperand(2).isKill())
+ continue;
+ if (Add->getOperand(0).getReg() != MI->getOperand(0).getReg())
+ continue;
+
+ Add->eraseFromParent();
+ DeadSize += 2;
+ } else {
+ if (Load->getOperand(0).getReg() != MI->getOperand(0).getReg())
+ continue;
+ }
+
+
+ // Now safe to delete the load and lsl. The LEA will be removed later.
+ CanDeleteLEA = true;
+ Shift->eraseFromParent();
+ Load->eraseFromParent();
+ DeadSize += 4;
+ }
+
DEBUG(dbgs() << "Shrink JT: " << *MI);
MachineInstr *CPEMI = User.CPEMI;
unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+ if (!isThumb2)
+ Opc = ByteOk ? ARM::tTBB_JT : ARM::tTBH_JT;
+
MachineBasicBlock::iterator MI_JT = MI;
MachineInstr *NewJTMI =
BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
@@ -2180,7 +2119,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
if (CanDeleteLEA) {
User.MI->eraseFromParent();
- DeadSize += 4;
+ DeadSize += isThumb2 ? 4 : 2;
// The LEA was eliminated, the TBB instruction becomes the only new user
// of the jump table.
@@ -2194,16 +2133,15 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
// record the TBB or TBH use.
int CPEntryIdx = JumpTableEntryIndices[JTI];
auto &CPEs = CPEntries[CPEntryIdx];
- auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) {
- return E.CPEMI == User.CPEMI;
- });
+ auto Entry =
+ find_if(CPEs, [&](CPEntry &E) { return E.CPEMI == User.CPEMI; });
++Entry->RefCount;
CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false));
}
}
- unsigned NewSize = TII->GetInstSizeInBytes(*NewJTMI);
- unsigned OrigSize = TII->GetInstSizeInBytes(*MI);
+ unsigned NewSize = TII->getInstSizeInBytes(*NewJTMI);
+ unsigned OrigSize = TII->getInstSizeInBytes(*MI);
MI->eraseFromParent();
int Delta = OrigSize - NewSize + DeadSize;
@@ -2297,9 +2235,16 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// Add an unconditional branch from NewBB to BB.
// There doesn't seem to be meaningful DebugInfo available; this doesn't
// correspond directly to anything in the source.
- assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
- BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB)
- .addImm(ARMCC::AL).addReg(0);
+ if (isThumb2)
+ BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B))
+ .addMBB(BB)
+ .addImm(ARMCC::AL)
+ .addReg(0);
+ else
+ BuildMI(NewBB, DebugLoc(), TII->get(ARM::tB))
+ .addMBB(BB)
+ .addImm(ARMCC::AL)
+ .addReg(0);
// Update internal data structures to account for the newly inserted MBB.
MF->RenumberBlocks(NewBB);
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
index c0db001cb6f1..2d1602873ce0 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -46,7 +46,7 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id,
ARMConstantPoolValue::~ARMConstantPoolValue() {}
-const char *ARMConstantPoolValue::getModifierText() const {
+StringRef ARMConstantPoolValue::getModifierText() const {
switch (Modifier) {
// FIXME: Are these case sensitive? It'd be nice to lower-case all the
// strings if that's legal.
@@ -60,6 +60,8 @@ const char *ARMConstantPoolValue::getModifierText() const {
return "gottpoff";
case ARMCP::TPOFF:
return "tpoff";
+ case ARMCP::SBREL:
+ return "SBREL";
case ARMCP::SECREL:
return "secrel32";
}
@@ -129,6 +131,12 @@ ARMConstantPoolConstant::ARMConstantPoolConstant(const Constant *C,
AddCurrentAddress),
CVal(C) {}
+ARMConstantPoolConstant::ARMConstantPoolConstant(const GlobalVariable *GV,
+ const Constant *C)
+ : ARMConstantPoolValue((Type *)C->getType(), 0, ARMCP::CPPromotedGlobal, 0,
+ ARMCP::no_modifier, false),
+ CVal(C), GVar(GV) {}
+
ARMConstantPoolConstant *
ARMConstantPoolConstant::Create(const Constant *C, unsigned ID) {
return new ARMConstantPoolConstant(C, ID, ARMCP::CPValue, 0,
@@ -136,6 +144,12 @@ ARMConstantPoolConstant::Create(const Constant *C, unsigned ID) {
}
ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const GlobalVariable *GVar,
+ const Constant *Initializer) {
+ return new ARMConstantPoolConstant(GVar, Initializer);
+}
+
+ARMConstantPoolConstant *
ARMConstantPoolConstant::Create(const GlobalValue *GV,
ARMCP::ARMCPModifier Modifier) {
return new ARMConstantPoolConstant((Type*)Type::getInt32Ty(GV->getContext()),
@@ -191,18 +205,17 @@ void ARMConstantPoolConstant::print(raw_ostream &O) const {
// ARMConstantPoolSymbol
//===----------------------------------------------------------------------===//
-ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s,
- unsigned id,
- unsigned char PCAdj,
+ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, StringRef s,
+ unsigned id, unsigned char PCAdj,
ARMCP::ARMCPModifier Modifier,
bool AddCurrentAddress)
- : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier,
- AddCurrentAddress),
- S(s) {}
+ : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier,
+ AddCurrentAddress),
+ S(s) {}
-ARMConstantPoolSymbol *
-ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
- unsigned ID, unsigned char PCAdj) {
+ARMConstantPoolSymbol *ARMConstantPoolSymbol::Create(LLVMContext &C,
+ StringRef s, unsigned ID,
+ unsigned char PCAdj) {
return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false);
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
index c07331d71dad..5f61832aa740 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -24,6 +24,7 @@ namespace llvm {
class BlockAddress;
class Constant;
class GlobalValue;
+class GlobalVariable;
class LLVMContext;
class MachineBasicBlock;
@@ -33,7 +34,8 @@ namespace ARMCP {
CPExtSymbol,
CPBlockAddress,
CPLSDA,
- CPMachineBasicBlock
+ CPMachineBasicBlock,
+ CPPromotedGlobal
};
enum ARMCPModifier {
@@ -43,6 +45,7 @@ namespace ARMCP {
GOTTPOFF, /// Global Offset Table, Thread Pointer Offset
TPOFF, /// Thread Pointer Offset
SECREL, /// Section Relative (Windows TLS)
+ SBREL, /// Static Base Relative (RWPI)
};
}
@@ -89,7 +92,7 @@ public:
~ARMConstantPoolValue() override;
ARMCP::ARMCPModifier getModifier() const { return Modifier; }
- const char *getModifierText() const;
+ StringRef getModifierText() const;
bool hasModifier() const { return Modifier != ARMCP::no_modifier; }
bool mustAddCurrentAddress() const { return AddCurrentAddress; }
@@ -102,7 +105,8 @@ public:
bool isBlockAddress() const { return Kind == ARMCP::CPBlockAddress; }
bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
-
+ bool isPromotedGlobal() const{ return Kind == ARMCP::CPPromotedGlobal; }
+
int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) override;
@@ -132,6 +136,7 @@ inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
/// Functions, and BlockAddresses.
class ARMConstantPoolConstant : public ARMConstantPoolValue {
const Constant *CVal; // Constant being loaded.
+ const GlobalVariable *GVar = nullptr;
ARMConstantPoolConstant(const Constant *C,
unsigned ID,
@@ -145,11 +150,14 @@ class ARMConstantPoolConstant : public ARMConstantPoolValue {
unsigned char PCAdj,
ARMCP::ARMCPModifier Modifier,
bool AddCurrentAddress);
+ ARMConstantPoolConstant(const GlobalVariable *GV, const Constant *Init);
public:
static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID);
static ARMConstantPoolConstant *Create(const GlobalValue *GV,
ARMCP::ARMCPModifier Modifier);
+ static ARMConstantPoolConstant *Create(const GlobalVariable *GV,
+ const Constant *Initializer);
static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID,
ARMCP::ARMCPKind Kind,
unsigned char PCAdj);
@@ -161,6 +169,12 @@ public:
const GlobalValue *getGV() const;
const BlockAddress *getBlockAddress() const;
+ const GlobalVariable *getPromotedGlobal() const {
+ return dyn_cast_or_null<GlobalVariable>(GVar);
+ }
+ const Constant *getPromotedGlobalInit() const {
+ return CVal;
+ }
int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) override;
@@ -173,7 +187,8 @@ public:
void print(raw_ostream &O) const override;
static bool classof(const ARMConstantPoolValue *APV) {
- return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA();
+ return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA() ||
+ APV->isPromotedGlobal();
}
bool equals(const ARMConstantPoolConstant *A) const {
@@ -186,15 +201,15 @@ public:
class ARMConstantPoolSymbol : public ARMConstantPoolValue {
const std::string S; // ExtSymbol being loaded.
- ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id,
+ ARMConstantPoolSymbol(LLVMContext &C, StringRef s, unsigned id,
unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
bool AddCurrentAddress);
public:
- static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s,
- unsigned ID, unsigned char PCAdj);
+ static ARMConstantPoolSymbol *Create(LLVMContext &C, StringRef s, unsigned ID,
+ unsigned char PCAdj);
- const char *getSymbol() const { return S.c_str(); }
+ StringRef getSymbol() const { return S; }
int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index a7b299677c1c..95fcc8dcb453 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -53,10 +53,10 @@ namespace {
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
- const char *getPassName() const override {
+ StringRef getPassName() const override {
return "ARM pseudo instruction expansion pass";
}
@@ -657,6 +657,9 @@ static bool IsAnAddressOperand(const MachineOperand &MO) {
return true;
case MachineOperand::MO_CFIIndex:
return false;
+ case MachineOperand::MO_IntrinsicID:
+ case MachineOperand::MO_Predicate:
+ llvm_unreachable("should not exist post-isel");
}
llvm_unreachable("unhandled machine operand type");
}
@@ -1175,8 +1178,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
}
// If there's dynamic realignment, adjust for it.
if (RI.needsStackRealignment(MF)) {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned MaxAlign = MFI->getMaxAlignment();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ unsigned MaxAlign = MFI.getMaxAlignment();
assert (!AFI->isThumb1OnlyFunction());
// Emit bic r6, r6, MaxAlign
assert(MaxAlign <= 256 && "The BIC instruction cannot encode "
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 13724da5d4f7..df4dcb375750 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -112,11 +112,6 @@ class ARMFastISel final : public FastISel {
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm);
- unsigned fastEmitInst_rri(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- uint64_t Imm);
unsigned fastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
uint64_t Imm);
@@ -351,36 +346,6 @@ unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned ARMFastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- uint64_t Imm) {
- unsigned ResultReg = createResultReg(RC);
- const MCInstrDesc &II = TII.get(MachineInstOpcode);
-
- // Make sure the input operands are sufficiently constrained to be legal
- // for this instruction.
- Op0 = constrainOperandRegClass(II, Op0, 1);
- Op1 = constrainOperandRegClass(II, Op1, 2);
- if (II.getNumDefs() >= 1) {
- AddOptionalDefs(
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addImm(Imm));
- } else {
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addImm(Imm));
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(II.ImplicitDefs[0]));
- }
- return ResultReg;
-}
-
unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
uint64_t Imm) {
@@ -546,6 +511,10 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
// For now 32-bit only.
if (VT != MVT::i32 || GV->isThreadLocal()) return 0;
+ // ROPI/RWPI not currently supported.
+ if (Subtarget->isROPI() || Subtarget->isRWPI())
+ return 0;
+
bool IsIndirect = Subtarget->isGVIndirectSymbol(GV);
const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
: &ARM::GPRRegClass;
@@ -764,7 +733,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
i != e; ++i, ++GTI) {
const Value *Op = *i;
- if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = GTI.getStructTypeOrNull()) {
const StructLayout *SL = DL.getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
@@ -1071,7 +1040,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
TII.get(Opc), Res)
.addReg(SrcReg).addImm(1));
SrcReg = Res;
- } // Fallthrough here.
+ LLVM_FALLTHROUGH;
+ }
case MVT::i8:
if (isThumb2) {
if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
@@ -1844,7 +1814,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
// For AAPCS ABI targets, just use VFP variant of the calling convention.
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
}
- // Fallthrough
+ LLVM_FALLTHROUGH;
case CallingConv::C:
case CallingConv::CXX_FAST_TLS:
// Use target triple & subtarget features to do actual dispatch.
@@ -1863,6 +1833,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
// Fall through to soft float variant, variadic functions don't
// use hard floating point ABI.
+ LLVM_FALLTHROUGH;
case CallingConv::ARM_AAPCS:
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
@@ -2481,8 +2452,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
default: return false;
case Intrinsic::frameaddress: {
- MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
- MFI->setFrameAddressIsTaken(true);
+ MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
+ MFI.setFrameAddressIsTaken(true);
unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index e8c9f610ea64..c72db8aca108 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -30,6 +30,8 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
+#define DEBUG_TYPE "arm-frame-lowering"
+
using namespace llvm;
static cl::opt<bool>
@@ -57,18 +59,16 @@ bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
/// or if frame pointer elimination is disabled.
bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
- // iOS requires FP not to be clobbered for backtracing purpose.
- if (STI.isTargetIOS() || STI.isTargetWatchOS())
+ // ABI-required frame pointer.
+ if (MF.getTarget().Options.DisableFramePointerElim(MF))
return true;
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- // Always eliminate non-leaf frame pointers.
- return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
- MFI->hasCalls()) ||
- RegInfo->needsStackRealignment(MF) ||
- MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken());
+ // Frame pointer required for use within this function.
+ return (RegInfo->needsStackRealignment(MF) ||
+ MFI.hasVarSizedObjects() ||
+ MFI.isFrameAddressTaken());
}
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@@ -77,8 +77,8 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
/// add/sub sp brackets around call sites. Returns true if the call frame is
/// included as part of the stack frame.
bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
- const MachineFrameInfo *FFI = MF.getFrameInfo();
- unsigned CFSize = FFI->getMaxCallFrameSize();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ unsigned CFSize = MFI.getMaxCallFrameSize();
// It's not always a good idea to include the call frame as part of the
// stack frame. ARM (especially Thumb) has small immediate offset to
// address the stack frame. So a large call frame can cause poor codegen
@@ -86,7 +86,7 @@ bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
return false;
- return !MF.getFrameInfo()->hasVarSizedObjects();
+ return !MFI.hasVarSizedObjects();
}
/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
@@ -95,7 +95,7 @@ bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
/// even when FP is available in Thumb2 mode.
bool
ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
- return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+ return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects();
}
static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII,
@@ -169,9 +169,9 @@ static int sizeOfSPAdjustment(const MachineInstr &MI) {
static bool WindowsRequiresStackProbe(const MachineFunction &MF,
size_t StackSizeInBytes) {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackProbeSize = (MFI->getStackProtectorIndex() > 0) ? 4080 : 4096;
+ unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
if (F->hasFnAttribute("stack-probe-size"))
F->getFnAttribute("stack-probe-size")
.getValueAsString()
@@ -196,22 +196,21 @@ struct StackAdjustingInsts {
}
void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
- auto Info = std::find_if(Insts.begin(), Insts.end(),
- [&](InstInfo &Info) { return Info.I == I; });
+ auto Info = find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
assert(Info != Insts.end() && "invalid sp adjusting instruction");
Info->SPAdjust += ExtraBytes;
}
- void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB,
- const DebugLoc &dl, const ARMBaseInstrInfo &TII,
- bool HasFP) {
+ void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
+ const ARMBaseInstrInfo &TII, bool HasFP) {
+ MachineFunction &MF = *MBB.getParent();
unsigned CFAOffset = 0;
for (auto &Info : Insts) {
if (HasFP && !Info.BeforeFPSet)
return;
CFAOffset -= Info.SPAdjust;
- unsigned CFIIndex = MMI.addFrameInst(
+ unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, std::next(Info.I), dl,
TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -288,7 +287,7 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
void ARMFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineModuleInfo &MMI = MF.getMMI();
MCContext &Context = MMI.getContext();
@@ -301,8 +300,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
bool isARM = !AFI->isThumbFunction();
unsigned Align = STI.getFrameLowering()->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
- unsigned NumBytes = MFI->getStackSize();
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ unsigned NumBytes = MFI.getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -339,7 +338,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
DefCFAOffsetCandidates.addInst(std::prev(MBBI),
NumBytes - ArgRegsSaveSize, true);
}
- DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
+ DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
return;
}
@@ -353,11 +352,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.splitFramePushPop()) {
+ if (STI.splitFramePushPop(MF)) {
GPRCS2Size += 4;
break;
}
- // fallthrough
+ LLVM_FALLTHROUGH;
case ARM::R0:
case ARM::R1:
case ARM::R2:
@@ -396,8 +395,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
int FramePtrOffsetInPush = 0;
if (HasFP) {
FramePtrOffsetInPush =
- MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;
- AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+ MFI.getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;
+ AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
}
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
@@ -414,7 +413,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// .cfi_offset operations will reflect that.
if (DPRGapSize) {
assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
- if (tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
+ if (LastPush != MBB.end() &&
+ tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
else {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
@@ -440,7 +440,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// leaves the stack pointer pointing to the DPRCS2 area.
//
// Adjust NumBytes to represent the stack slots below the DPRCS2 area.
- NumBytes += MFI->getObjectOffset(D8SpillFI);
+ NumBytes += MFI.getObjectOffset(D8SpillFI);
} else
NumBytes = DPRCSOffset;
@@ -526,7 +526,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
PushSize + FramePtrOffsetInPush,
MachineInstr::FrameSetup);
if (FramePtrOffsetInPush + PushSize != 0) {
- unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
nullptr, MRI->getDwarfRegNum(FramePtr, true),
-(ArgRegsSaveSize - FramePtrOffsetInPush)));
BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -534,7 +534,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlags(MachineInstr::FrameSetup);
} else {
unsigned CFIIndex =
- MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
nullptr, MRI->getDwarfRegNum(FramePtr, true)));
BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
@@ -557,9 +557,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.splitFramePushPop())
+ if (STI.splitFramePushPop(MF))
break;
- // fallthrough
+ LLVM_FALLTHROUGH;
case ARM::R0:
case ARM::R1:
case ARM::R2:
@@ -569,8 +569,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R6:
case ARM::R7:
case ARM::LR:
- CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI)));
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -590,10 +590,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.splitFramePushPop()) {
+ if (STI.splitFramePushPop(MF)) {
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
- unsigned Offset = MFI->getObjectOffset(FI);
- unsigned CFIIndex = MMI.addFrameInst(
+ unsigned Offset = MFI.getObjectOffset(FI);
+ unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
@@ -614,8 +614,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
(Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
- unsigned Offset = MFI->getObjectOffset(FI);
- unsigned CFIIndex = MMI.addFrameInst(
+ unsigned Offset = MFI.getObjectOffset(FI);
+ unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
@@ -628,11 +628,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// throughout the process. If we have a frame pointer, it takes over the job
// half-way through, so only the first few .cfi_def_cfa_offset instructions
// actually get emitted.
- DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
+ DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
if (STI.isTargetELF() && hasFP(MF))
- MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
- AFI->getFramePtrSpillOffset());
+ MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
@@ -644,7 +644,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// If aligned NEON registers were spilled, the stack has already been
// realigned.
if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
- unsigned MaxAlign = MFI->getMaxAlignment();
+ unsigned MaxAlign = MFI.getMaxAlignment();
assert(!AFI->isThumb1OnlyFunction());
if (!AFI->isThumbFunction()) {
emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
@@ -688,13 +688,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// If the frame has variable sized objects then the epilogue must restore
// the sp from fp. We can assume there's an FP here since hasFP already
// checks for hasVarSizedObjects.
- if (MFI->hasVarSizedObjects())
+ if (MFI.hasVarSizedObjects())
AFI->setShouldRestoreSPFromFP(true);
}
void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
const ARMBaseInstrInfo &TII =
@@ -704,7 +704,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
bool isARM = !AFI->isThumbFunction();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
- int NumBytes = (int)MFI->getStackSize();
+ int NumBytes = (int)MFI.getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// All calls are tail calls in GHC calling conv, and functions have no
@@ -753,7 +753,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
// This is bad, if an interrupt is taken after the mov, sp is in an
// inconsistent state.
// Use the first callee-saved register as a scratch register.
- assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&
+ assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
"No scratch register to restore SP from FP!");
emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
ARMCC::AL, 0, TII);
@@ -776,11 +776,11 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
// Increment past our save areas.
- if (AFI->getDPRCalleeSavedAreaSize()) {
+ if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
MBBI++;
// Since vpop register list cannot have gaps, there may be multiple vpop
// instructions in the epilogue.
- while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
+ while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
MBBI++;
}
if (AFI->getDPRCalleeSavedGapSize()) {
@@ -811,13 +811,13 @@ int
ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
int FI, unsigned &FrameReg,
int SPAdj) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+ int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
int FPOffset = Offset - AFI->getFramePtrSpillOffset();
- bool isFixed = MFI->isFixedObjectIndex(FI);
+ bool isFixed = MFI.isFixedObjectIndex(FI);
FrameReg = ARM::SP;
Offset += SPAdj;
@@ -893,16 +893,18 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
DebugLoc DL;
- SmallVector<std::pair<unsigned,bool>, 4> Regs;
+ typedef std::pair<unsigned, bool> RegAndKill;
+ SmallVector<RegAndKill, 4> Regs;
unsigned i = CSI.size();
while (i != 0) {
unsigned LastReg = 0;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- if (!(Func)(Reg, STI.splitFramePushPop())) continue;
+ if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
// D-registers in the aligned area DPRCS2 are NOT spilled here.
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -927,6 +929,12 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
if (Regs.empty())
continue;
+
+ std::sort(Regs.begin(), Regs.end(), [&](const RegAndKill &LHS,
+ const RegAndKill &RHS) {
+ return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
+ });
+
if (Regs.size() > 1 || StrOpc== 0) {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
@@ -960,6 +968,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
unsigned NumAlignedDPRCS2Regs) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL;
bool isTailCall = false;
@@ -983,7 +992,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
bool DeleteRet = false;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- if (!(Func)(Reg, STI.splitFramePushPop())) continue;
+ if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
// The aligned reloads from area DPRCS2 are not inserted here.
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -1012,6 +1021,11 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
if (Regs.empty())
continue;
+
+ std::sort(Regs.begin(), Regs.end(), [&](unsigned LHS, unsigned RHS) {
+ return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
+ });
+
if (Regs.size() > 1 || LdrOpc == 0) {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
@@ -1062,7 +1076,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
// Mark the D-register spill slots as properly aligned. Since MFI computes
// stack slot layout backwards, this can actually mean that the d-reg stack
@@ -1104,7 +1118,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
.addReg(ARM::SP)
.addImm(8 * NumAlignedDPRCS2Regs)));
- unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();
+ unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
// We must set parameter MustBeSingleInstruction to true, since
// skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
// stack alignment. Luckily, this can always be done since all ARM
@@ -1359,7 +1373,7 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
unsigned FnSize = 0;
for (auto &MBB : MF) {
for (auto &MI : MBB)
- FnSize += TII.GetInstSizeInBytes(MI);
+ FnSize += TII.getInstSizeInBytes(MI);
}
return FnSize;
}
@@ -1485,8 +1499,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
const ARMBaseInstrInfo &TII =
*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ (void)TRI; // Silence unused warning in non-assert builds.
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// Spill R4 if Thumb2 function requires stack realignment - it will be used as
@@ -1495,7 +1511,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// instruction.
// FIXME: It will be better just to find spare register here.
if (AFI->isThumb2Function() &&
- (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
+ (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
SavedRegs.set(ARM::R4);
if (AFI->isThumb1OnlyFunction()) {
@@ -1509,8 +1525,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// we've used all the registers and so R4 is already used, so not marking
// it here will be OK.
// FIXME: It will be better just to find spare register here.
- unsigned StackSize = MFI->estimateStackSize(MF);
- if (MFI->hasVarSizedObjects() || StackSize > 508)
+ unsigned StackSize = MFI.estimateStackSize(MF);
+ if (MFI.hasVarSizedObjects() || StackSize > 508)
SavedRegs.set(ARM::R4);
}
@@ -1547,7 +1563,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (Spilled) {
NumGPRSpills++;
- if (!STI.splitFramePushPop()) {
+ if (!STI.splitFramePushPop(MF)) {
if (Reg == ARM::LR)
LRSpilled = true;
CS1Spilled = true;
@@ -1558,7 +1574,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
switch (Reg) {
case ARM::LR:
LRSpilled = true;
- // Fallthrough
+ LLVM_FALLTHROUGH;
case ARM::R0: case ARM::R1:
case ARM::R2: case ARM::R3:
case ARM::R4: case ARM::R5:
@@ -1569,7 +1585,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
break;
}
} else {
- if (!STI.splitFramePushPop()) {
+ if (!STI.splitFramePushPop(MF)) {
UnspilledCS1GPRs.push_back(Reg);
continue;
}
@@ -1616,7 +1632,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// and which instructions will need a scratch register for them. Is it
// worth the effort and added fragility?
unsigned EstimatedStackSize =
- MFI->estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
+ MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
if (hasFP(MF)) {
if (AFI->hasStackFrame())
EstimatedStackSize += 4;
@@ -1628,20 +1644,149 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
EstimatedStackSize += 16; // For possible paddings.
bool BigStack = EstimatedStackSize >= estimateRSStackSizeLimit(MF, this) ||
- MFI->hasVarSizedObjects() ||
- (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+ MFI.hasVarSizedObjects() ||
+ (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
bool ExtraCSSpill = false;
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
AFI->setHasStackFrame(true);
+ if (hasFP(MF)) {
+ SavedRegs.set(FramePtr);
+ // If the frame pointer is required by the ABI, also spill LR so that we
+ // emit a complete frame record.
+ if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
+ SavedRegs.set(ARM::LR);
+ LRSpilled = true;
+ NumGPRSpills++;
+ auto LRPos = find(UnspilledCS1GPRs, ARM::LR);
+ if (LRPos != UnspilledCS1GPRs.end())
+ UnspilledCS1GPRs.erase(LRPos);
+ }
+ auto FPPos = find(UnspilledCS1GPRs, FramePtr);
+ if (FPPos != UnspilledCS1GPRs.end())
+ UnspilledCS1GPRs.erase(FPPos);
+ NumGPRSpills++;
+ if (FramePtr == ARM::R7)
+ CS1Spilled = true;
+ }
+
+ if (AFI->isThumb1OnlyFunction()) {
+ // For Thumb1-only targets, we need some low registers when we save and
+ // restore the high registers (which aren't allocatable, but could be
+ // used by inline assembly) because the push/pop instructions can not
+ // access high registers. If necessary, we might need to push more low
+ // registers to ensure that there is at least one free that can be used
+ // for the saving & restoring, and preferably we should ensure that as
+ // many as are needed are available so that fewer push/pop instructions
+ // are required.
+
+ // Low registers which are not currently pushed, but could be (r4-r7).
+ SmallVector<unsigned, 4> AvailableRegs;
+
+ // Unused argument registers (r0-r3) can be clobbered in the prologue for
+ // free.
+ int EntryRegDeficit = 0;
+ for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
+ if (!MF.getRegInfo().isLiveIn(Reg)) {
+ --EntryRegDeficit;
+ DEBUG(dbgs() << PrintReg(Reg, TRI)
+ << " is unused argument register, EntryRegDeficit = "
+ << EntryRegDeficit << "\n");
+ }
+ }
+
+ // Unused return registers can be clobbered in the epilogue for free.
+ int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
+ DEBUG(dbgs() << AFI->getReturnRegsCount()
+ << " return regs used, ExitRegDeficit = " << ExitRegDeficit
+ << "\n");
+
+ int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
+ DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
+
+ // r4-r6 can be used in the prologue if they are pushed by the first push
+ // instruction.
+ for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
+ if (SavedRegs.test(Reg)) {
+ --RegDeficit;
+ DEBUG(dbgs() << PrintReg(Reg, TRI)
+ << " is saved low register, RegDeficit = " << RegDeficit
+ << "\n");
+ } else {
+ AvailableRegs.push_back(Reg);
+ DEBUG(dbgs()
+ << PrintReg(Reg, TRI)
+ << " is non-saved low register, adding to AvailableRegs\n");
+ }
+ }
+
+ // r7 can be used if it is not being used as the frame pointer.
+ if (!hasFP(MF)) {
+ if (SavedRegs.test(ARM::R7)) {
+ --RegDeficit;
+ DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = "
+ << RegDeficit << "\n");
+ } else {
+ AvailableRegs.push_back(ARM::R7);
+ DEBUG(dbgs()
+ << "%R7 is non-saved low register, adding to AvailableRegs\n");
+ }
+ }
+
+ // Each of r8-r11 needs to be copied to a low register, then pushed.
+ for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
+ if (SavedRegs.test(Reg)) {
+ ++RegDeficit;
+ DEBUG(dbgs() << PrintReg(Reg, TRI)
+ << " is saved high register, RegDeficit = " << RegDeficit
+ << "\n");
+ }
+ }
+
+ // LR can only be used by PUSH, not POP, and can't be used at all if the
+ // llvm.returnaddress intrinsic is used. This is only worth doing if we
+ // are more limited at function entry than exit.
+ if ((EntryRegDeficit > ExitRegDeficit) &&
+ !(MF.getRegInfo().isLiveIn(ARM::LR) &&
+ MF.getFrameInfo().isReturnAddressTaken())) {
+ if (SavedRegs.test(ARM::LR)) {
+ --RegDeficit;
+ DEBUG(dbgs() << "%LR is saved register, RegDeficit = " << RegDeficit
+ << "\n");
+ } else {
+ AvailableRegs.push_back(ARM::LR);
+ DEBUG(dbgs() << "%LR is not saved, adding to AvailableRegs\n");
+ }
+ }
+
+ // If there are more high registers that need pushing than low registers
+ // available, push some more low registers so that we can use fewer push
+ // instructions. This might not reduce RegDeficit all the way to zero,
+ // because we can only guarantee that r4-r6 are available, but r8-r11 may
+ // need saving.
+ DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
+ for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
+ unsigned Reg = AvailableRegs.pop_back_val();
+ DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI)
+ << " to make up reg deficit\n");
+ SavedRegs.set(Reg);
+ NumGPRSpills++;
+ CS1Spilled = true;
+ ExtraCSSpill = true;
+ UnspilledCS1GPRs.erase(find(UnspilledCS1GPRs, Reg));
+ if (Reg == ARM::LR)
+ LRSpilled = true;
+ }
+ DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit << "\n");
+ }
+
// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
// Spill LR as well so we can fold BX_RET to the registers restore (LDM).
if (!LRSpilled && CS1Spilled) {
SavedRegs.set(ARM::LR);
NumGPRSpills++;
SmallVectorImpl<unsigned>::iterator LRPos;
- LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
- (unsigned)ARM::LR);
+ LRPos = find(UnspilledCS1GPRs, (unsigned)ARM::LR);
if (LRPos != UnspilledCS1GPRs.end())
UnspilledCS1GPRs.erase(LRPos);
@@ -1649,18 +1794,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
ExtraCSSpill = true;
}
- if (hasFP(MF)) {
- SavedRegs.set(FramePtr);
- auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
- FramePtr);
- if (FPPos != UnspilledCS1GPRs.end())
- UnspilledCS1GPRs.erase(FPPos);
- NumGPRSpills++;
- }
-
// If stack and double are 8-byte aligned and we are spilling an odd number
// of GPRs, spill one extra callee save GPR so we won't have to pad between
// the integer and double callee save areas.
+ DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
unsigned TargetAlign = getStackAlignment();
if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
@@ -1672,6 +1809,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
(STI.isTargetWindows() && Reg == ARM::R11) ||
isARMLowRegister(Reg) || Reg == ARM::LR) {
SavedRegs.set(Reg);
+ DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI)
+ << " to make up alignment\n");
if (!MRI.isReserved(Reg))
ExtraCSSpill = true;
break;
@@ -1680,6 +1819,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
} else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
unsigned Reg = UnspilledCS2GPRs.front();
SavedRegs.set(Reg);
+ DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI)
+ << " to make up alignment\n");
if (!MRI.isReserved(Reg))
ExtraCSSpill = true;
}
@@ -1725,9 +1866,9 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// closest to SP or frame pointer.
assert(RS && "Register scavenging not provided");
const TargetRegisterClass *RC = &ARM::GPRRegClass;
- RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
}
}
}
@@ -1855,7 +1996,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
if (!ST->isTargetAndroid() && !ST->isTargetLinux())
report_fatal_error("Segmented stacks not supported on this platform.");
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
MCContext &Context = MMI.getContext();
const MCRegisterInfo *MRI = Context.getRegisterInfo();
@@ -1864,7 +2005,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL;
- uint64_t StackSize = MFI->getStackSize();
+ uint64_t StackSize = MFI.getStackSize();
// Do not generate a prologue for functions with a stack of size zero
if (StackSize == 0)
@@ -1951,14 +2092,14 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Emit the relevant DWARF information about the change in stack pointer as
// well as where to find both r4 and r5 (the callee-save registers)
CFIIndex =
- MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8));
+ MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8));
BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
- CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
- CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
@@ -2069,10 +2210,10 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Emit the DWARF info about the change in stack as well as where to find the
// previous link register
CFIIndex =
- MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12));
+ MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12));
BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
- CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
@@ -2124,7 +2265,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
}
// Update the CFA offset now that we've popped
- CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
@@ -2147,17 +2288,17 @@ void ARMFrameLowering::adjustForSegmentedStacks(
}
// Update the CFA offset now that we've popped
- CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
// Tell debuggers that r4 and r5 are now the same as they were in the
// previous function, that they're the "Same Value".
- CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue(
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
- CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue(
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 20db3d39bcae..c3e9591d5c70 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -70,9 +70,7 @@ public:
return true;
}
- const char *getPassName() const override {
- return "ARM Instruction Selection";
- }
+ StringRef getPassName() const override { return "ARM Instruction Selection"; }
void PreprocessISelDAG() override;
@@ -193,6 +191,8 @@ public:
#include "ARMGenDAGISel.inc"
private:
+ void transferMemOperands(SDNode *Src, SDNode *Dst);
+
/// Indexed (pre/post inc/dec) load matching code for ARM.
bool tryARMIndexedLoad(SDNode *N);
bool tryT1IndexedLoad(SDNode *N);
@@ -222,10 +222,11 @@ private:
const uint16_t *QOpcodes);
/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
- /// should be 2, 3 or 4. The opcode array specifies the instructions used
+ /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
/// for loading D registers. (Q registers are not supported.)
void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
- const uint16_t *Opcodes);
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes = nullptr);
/// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
/// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
@@ -244,6 +245,7 @@ private:
bool tryInlineAsm(SDNode *N);
void SelectConcatVector(SDNode *N);
+ void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
bool trySMLAWSMULW(SDNode *N);
@@ -476,7 +478,9 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
if (Subtarget->isThumb()) {
if (Val <= 255) return 1; // MOV
- if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
+ if (Subtarget->hasV6T2Ops() &&
+ (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
+ return 1; // MOVW
if (Val <= 510) return 2; // MOV + ADDi8
if (~Val <= 255) return 2; // MOV + MVN
if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
@@ -1186,6 +1190,7 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
+ N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
} else {
@@ -1232,9 +1237,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
int FI = cast<FrameIndexSDNode>(N)->getIndex();
// Only multiples of 4 are allowed for the offset, so the frame object
// alignment must be at least 4.
- MachineFrameInfo *MFI = MF->getFrameInfo();
- if (MFI->getObjectAlignment(FI) < 4)
- MFI->setObjectAlignment(FI, 4);
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ if (MFI.getObjectAlignment(FI) < 4)
+ MFI.setObjectAlignment(FI, 4);
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
@@ -1255,9 +1260,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
// For LHS+RHS to result in an offset that's a multiple of 4 the object
// indexed by the LHS must be 4-byte aligned.
- MachineFrameInfo *MFI = MF->getFrameInfo();
- if (MFI->getObjectAlignment(FI) < 4)
- MFI->setObjectAlignment(FI, 4);
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ if (MFI.getObjectAlignment(FI) < 4)
+ MFI.setObjectAlignment(FI, 4);
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
@@ -1469,6 +1474,12 @@ static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
}
+void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+}
+
bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::MemIndexedMode AM = LD->getAddressingMode();
@@ -1527,16 +1538,20 @@ bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
CurDAG->getRegister(0, MVT::i32), Chain };
- ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
- MVT::i32, MVT::Other, Ops));
+ SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
+ MVT::Other, Ops);
+ transferMemOperands(N, New);
+ ReplaceNode(N, New);
return true;
} else {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
CurDAG->getRegister(0, MVT::i32), Chain };
- ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
- MVT::i32, MVT::Other, Ops));
+ SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
+ MVT::Other, Ops);
+ transferMemOperands(N, New);
+ ReplaceNode(N, New);
return true;
}
}
@@ -1548,8 +1563,8 @@ bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT();
ISD::MemIndexedMode AM = LD->getAddressingMode();
- if (AM == ISD::UNINDEXED || LD->getExtensionType() != ISD::NON_EXTLOAD ||
- AM != ISD::POST_INC || LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
+ if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
+ LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
return false;
auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
@@ -1564,8 +1579,10 @@ bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
CurDAG->getRegister(0, MVT::i32), Chain };
- ReplaceNode(N, CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, MVT::i32,
- MVT::Other, Ops));
+ SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
+ MVT::i32, MVT::Other, Ops);
+ transferMemOperands(N, New);
+ ReplaceNode(N, New);
return true;
}
@@ -1610,8 +1627,10 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
CurDAG->getRegister(0, MVT::i32), Chain };
- ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
- MVT::Other, Ops));
+ SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
+ MVT::Other, Ops);
+ transferMemOperands(N, New);
+ ReplaceNode(N, New);
return true;
}
@@ -1744,6 +1763,12 @@ static bool isVLDfixed(unsigned Opc)
case ARM::VLD1q16wb_fixed : return true;
case ARM::VLD1q32wb_fixed : return true;
case ARM::VLD1q64wb_fixed : return true;
+ case ARM::VLD1DUPd8wb_fixed : return true;
+ case ARM::VLD1DUPd16wb_fixed : return true;
+ case ARM::VLD1DUPd32wb_fixed : return true;
+ case ARM::VLD1DUPq8wb_fixed : return true;
+ case ARM::VLD1DUPq16wb_fixed : return true;
+ case ARM::VLD1DUPq32wb_fixed : return true;
case ARM::VLD2d8wb_fixed : return true;
case ARM::VLD2d16wb_fixed : return true;
case ARM::VLD2d32wb_fixed : return true;
@@ -1798,6 +1823,12 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
+ case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
+ case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
+ case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
+ case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
+ case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
+ case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
@@ -2140,7 +2171,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
unsigned Alignment = 0;
if (NumVecs != 3) {
Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
- unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
if (Alignment > NumBytes)
Alignment = NumBytes;
if (Alignment < 8 && Alignment < NumBytes)
@@ -2238,8 +2269,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
}
void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
- const uint16_t *Opcodes) {
- assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
SDLoc dl(N);
SDValue MemAddr, Align;
@@ -2255,7 +2287,7 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
unsigned Alignment = 0;
if (NumVecs != 3) {
Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
- unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
if (Alignment > NumBytes)
Alignment = NumBytes;
if (Alignment < 8 && Alignment < NumBytes)
@@ -2267,19 +2299,21 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
}
Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
- unsigned OpcodeIndex;
+ unsigned Opc;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld-dup type");
- case MVT::v8i8: OpcodeIndex = 0; break;
- case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v8i8: Opc = DOpcodes[0]; break;
+ case MVT::v16i8: Opc = QOpcodes[0]; break;
+ case MVT::v4i16: Opc = DOpcodes[1]; break;
+ case MVT::v8i16: Opc = QOpcodes[1]; break;
case MVT::v2f32:
- case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v2i32: Opc = DOpcodes[2]; break;
+ case MVT::v4f32:
+ case MVT::v4i32: Opc = QOpcodes[2]; break;
}
SDValue Pred = getAL(CurDAG, dl);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SDValue SuperReg;
- unsigned Opc = Opcodes[OpcodeIndex];
SmallVector<SDValue, 6> Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
@@ -2287,6 +2321,8 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
// fixed-stride update instructions don't have an explicit writeback
// operand. It's implicit in the opcode itself.
SDValue Inc = N->getOperand(2);
+ if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
if (!isa<ConstantSDNode>(Inc.getNode()))
Ops.push_back(Inc);
// FIXME: VLD3 and VLD4 haven't been updated to that form yet.
@@ -2305,14 +2341,18 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
ResTys.push_back(MVT::Other);
SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
- SuperReg = SDValue(VLdDup, 0);
// Extract the subregisters.
- static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
- unsigned SubIdx = ARM::dsub_0;
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- ReplaceUses(SDValue(N, Vec),
- CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+ if (NumVecs == 1) {
+ ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
+ } else {
+ SDValue SuperReg = SDValue(VLdDup, 0);
+ static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
+ unsigned SubIdx = ARM::dsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+ }
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
if (isUpdating)
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
@@ -2612,6 +2652,10 @@ static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
}
bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
+ if (!Subtarget->hasV6Ops() ||
+ (Subtarget->isThumb() && !Subtarget->hasThumb2()))
+ return false;
+
SDLoc dl(N);
SDValue Src0 = N->getOperand(0);
SDValue Src1 = N->getOperand(1);
@@ -2687,6 +2731,87 @@ void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)));
}
+static Optional<std::pair<unsigned, unsigned>>
+getContiguousRangeOfSetBits(const APInt &A) {
+ unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
+ unsigned LastOne = A.countTrailingZeros();
+ if (A.countPopulation() != (FirstOne - LastOne + 1))
+ return Optional<std::pair<unsigned,unsigned>>();
+ return std::make_pair(FirstOne, LastOne);
+}
+
+void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
+ assert(N->getOpcode() == ARMISD::CMPZ);
+ SwitchEQNEToPLMI = false;
+
+ if (!Subtarget->isThumb())
+ // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
+ // LSR don't exist as standalone instructions - they need the barrel shifter.
+ return;
+
+ // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
+ SDValue And = N->getOperand(0);
+ if (!And->hasOneUse())
+ return;
+
+ SDValue Zero = N->getOperand(1);
+ if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
+ And->getOpcode() != ISD::AND)
+ return;
+ SDValue X = And.getOperand(0);
+ auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
+
+ if (!C || !X->hasOneUse())
+ return;
+ auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
+ if (!Range)
+ return;
+
+ // There are several ways to lower this:
+ SDNode *NewN;
+ SDLoc dl(N);
+
+ auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
+ if (Subtarget->isThumb2()) {
+ Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
+ SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
+ } else {
+ SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
+ CurDAG->getTargetConstant(Imm, dl, MVT::i32),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
+ return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
+ }
+ };
+
+ if (Range->second == 0) {
+ // 1. Mask includes the LSB -> Simply shift the top N bits off
+ NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
+ ReplaceNode(And.getNode(), NewN);
+ } else if (Range->first == 31) {
+ // 2. Mask includes the MSB -> Simply shift the bottom N bits off
+ NewN = EmitShift(ARM::tLSRri, X, Range->second);
+ ReplaceNode(And.getNode(), NewN);
+ } else if (Range->first == Range->second) {
+ // 3. Only one bit is set. We can shift this into the sign bit and use a
+ // PL/MI comparison.
+ NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
+ ReplaceNode(And.getNode(), NewN);
+
+ SwitchEQNEToPLMI = true;
+ } else if (!Subtarget->hasV6T2Ops()) {
+ // 4. Do a double shift to clear bottom and top bits, but only in
+ // thumb-1 mode as in thumb-2 we can use UBFX.
+ NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
+ NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
+ Range->second + (31 - Range->first));
+ ReplaceNode(And.getNode(), NewN);
+ }
+
+}
+
void ARMDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
@@ -2761,9 +2886,9 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
if (Subtarget->isThumb1Only()) {
// Set the alignment of the frame object to 4, to avoid having to generate
// more than one ADD
- MachineFrameInfo *MFI = MF->getFrameInfo();
- if (MFI->getObjectAlignment(FI) < 4)
- MFI->setObjectAlignment(FI, 4);
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ if (MFI.getObjectAlignment(FI) < 4)
+ MFI.setObjectAlignment(FI, 4);
CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
CurDAG->getTargetConstant(0, dl, MVT::i32));
return;
@@ -2914,6 +3039,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
}
+
break;
}
case ARMISD::VMOVRRD:
@@ -2971,7 +3097,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
case ARMISD::UMLAL:{
// UMAAL is similar to UMLAL but it adds two 32-bit values to the
// 64-bit multiplication result.
- if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC &&
+ if (Subtarget->hasV6Ops() && Subtarget->hasDSP() &&
+ N->getOperand(2).getOpcode() == ARMISD::ADDC &&
N->getOperand(3).getOpcode() == ARMISD::ADDE) {
SDValue Addc = N->getOperand(2);
@@ -3037,6 +3164,37 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
}
+ case ARMISD::SUBE: {
+ if (!Subtarget->hasV6Ops())
+ break;
+ // Look for a pattern to match SMMLS
+ // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
+ if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
+ N->getOperand(2).getOpcode() != ARMISD::SUBC ||
+ !SDValue(N, 1).use_empty())
+ break;
+
+ if (Subtarget->isThumb())
+ assert(Subtarget->hasThumb2() &&
+ "This pattern should not be generated for Thumb");
+
+ SDValue SmulLoHi = N->getOperand(1);
+ SDValue Subc = N->getOperand(2);
+ auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
+
+ if (!Zero || Zero->getZExtValue() != 0 ||
+ Subc.getOperand(1) != SmulLoHi.getValue(0) ||
+ N->getOperand(1) != SmulLoHi.getValue(1) ||
+ N->getOperand(2) != Subc.getValue(1))
+ break;
+
+ unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
+ SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
+ N->getOperand(0), getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32) };
+ ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
+ return;
+ }
case ISD::LOAD: {
if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
if (tryT2IndexedLoad(N))
@@ -3073,9 +3231,27 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
assert(N2.getOpcode() == ISD::Constant);
assert(N3.getOpcode() == ISD::Register);
- SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
- cast<ConstantSDNode>(N2)->getZExtValue()), dl,
- MVT::i32);
+ unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
+
+ if (InFlag.getOpcode() == ARMISD::CMPZ) {
+ bool SwitchEQNEToPLMI;
+ SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
+ InFlag = N->getOperand(4);
+
+ if (SwitchEQNEToPLMI) {
+ switch ((ARMCC::CondCodes)CC) {
+ default: llvm_unreachable("CMPZ must be either NE or EQ!");
+ case ARMCC::NE:
+ CC = (unsigned)ARMCC::MI;
+ break;
+ case ARMCC::EQ:
+ CC = (unsigned)ARMCC::PL;
+ break;
+ }
+ }
+ }
+
+ SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
MVT::Glue, Ops);
@@ -3089,6 +3265,80 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->RemoveDeadNode(N);
return;
}
+
+ case ARMISD::CMPZ: {
+ // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
+ // This allows us to avoid materializing the expensive negative constant.
+ // The CMPZ #0 is useless and will be peepholed away but we need to keep it
+ // for its glue output.
+ SDValue X = N->getOperand(0);
+ auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
+ if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
+ int64_t Addend = -C->getSExtValue();
+
+ SDNode *Add = nullptr;
+ // In T2 mode, ADDS can be better than CMN if the immediate fits in a
+ // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
+ // Outside that range we can just use a CMN which is 32-bit but has a
+ // 12-bit immediate range.
+ if (Subtarget->isThumb2() && Addend < 1<<8) {
+ SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
+ } else if (!Subtarget->isThumb2() && Addend < 1<<8) {
+ // FIXME: Add T1 tADDi8 code.
+ SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
+ CurDAG->getTargetConstant(Addend, dl, MVT::i32),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
+ Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops);
+ } else if (!Subtarget->isThumb2() && Addend < 1<<3) {
+ SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
+ CurDAG->getTargetConstant(Addend, dl, MVT::i32),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
+ Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops);
+ }
+ if (Add) {
+ SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
+ CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
+ }
+ }
+ // Other cases are autogenerated.
+ break;
+ }
+
+ case ARMISD::CMOV: {
+ SDValue InFlag = N->getOperand(4);
+
+ if (InFlag.getOpcode() == ARMISD::CMPZ) {
+ bool SwitchEQNEToPLMI;
+ SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
+
+ if (SwitchEQNEToPLMI) {
+ SDValue ARMcc = N->getOperand(2);
+ ARMCC::CondCodes CC =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+
+ switch (CC) {
+ default: llvm_unreachable("CMPZ must be either NE or EQ!");
+ case ARMCC::NE:
+ CC = ARMCC::MI;
+ break;
+ case ARMCC::EQ:
+ CC = ARMCC::PL;
+ break;
+ }
+ SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
+ SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
+ N->getOperand(3), N->getOperand(4)};
+ CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
+ }
+
+ }
+ // Other cases are autogenerated.
+ break;
+ }
+
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
@@ -3174,6 +3424,15 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
+ case ARMISD::VLD1DUP: {
+ static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
+ ARM::VLD1DUPd32 };
+ static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
+ ARM::VLD1DUPq32 };
+ SelectVLDDup(N, false, 1, DOpcodes, QOpcodes);
+ return;
+ }
+
case ARMISD::VLD2DUP: {
static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
ARM::VLD2DUPd32 };
@@ -3197,6 +3456,17 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
+ case ARMISD::VLD1DUP_UPD: {
+ static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
+ ARM::VLD1DUPd16wb_fixed,
+ ARM::VLD1DUPd32wb_fixed };
+ static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
+ ARM::VLD1DUPq16wb_fixed,
+ ARM::VLD1DUPq32wb_fixed };
+ SelectVLDDup(N, true, 1, DOpcodes, QOpcodes);
+ return;
+ }
+
case ARMISD::VLD2DUP_UPD: {
static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
ARM::VLD2DUPd16wb_fixed,
@@ -4383,7 +4653,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
case InlineAsm::Constraint_i:
// FIXME: It seems strange that 'i' is needed here since it's supposed to
// be an immediate and not a memory constraint.
- // Fallthrough.
+ LLVM_FALLTHROUGH;
case InlineAsm::Constraint_m:
case InlineAsm::Constraint_o:
case InlineAsm::Constraint_Q:
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 3cfcb1e09f0b..afba1587a743 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
@@ -59,18 +60,27 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
+STATISTIC(NumConstpoolPromoted,
+ "Number of constants with their storage promoted into constant pools");
static cl::opt<bool>
ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
-// Disabled for causing self-hosting failures once returned-attribute inference
-// was enabled.
-static cl::opt<bool>
-EnableThisRetForwarding("arm-this-return-forwarding", cl::Hidden,
- cl::desc("Directly forward this return"),
- cl::init(false));
+static cl::opt<bool> EnableConstpoolPromotion(
+ "arm-promote-constant", cl::Hidden,
+ cl::desc("Enable / disable promotion of unnamed_addr constants into "
+ "constant pools"),
+ cl::init(true));
+static cl::opt<unsigned> ConstpoolPromotionMaxSize(
+ "arm-promote-constant-max-size", cl::Hidden,
+ cl::desc("Maximum size of constant to promote into a constant pool"),
+ cl::init(64));
+static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
+ "arm-promote-constant-max-total", cl::Hidden,
+ cl::desc("Maximum size of ALL constants to promote into a constant pool"),
+ cl::init(128));
namespace {
class ARMCCState : public CCState {
@@ -87,6 +97,171 @@ namespace {
};
}
+void ARMTargetLowering::InitLibcallCallingConvs() {
+ // The builtins on ARM always use AAPCS, irrespective of wheter C is AAPCS or
+ // AAPCS_VFP.
+ for (const auto LC : {
+ RTLIB::SHL_I16,
+ RTLIB::SHL_I32,
+ RTLIB::SHL_I64,
+ RTLIB::SHL_I128,
+ RTLIB::SRL_I16,
+ RTLIB::SRL_I32,
+ RTLIB::SRL_I64,
+ RTLIB::SRL_I128,
+ RTLIB::SRA_I16,
+ RTLIB::SRA_I32,
+ RTLIB::SRA_I64,
+ RTLIB::SRA_I128,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16,
+ RTLIB::MUL_I32,
+ RTLIB::MUL_I64,
+ RTLIB::MUL_I128,
+ RTLIB::MULO_I32,
+ RTLIB::MULO_I64,
+ RTLIB::MULO_I128,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16,
+ RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64,
+ RTLIB::SDIV_I128,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16,
+ RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64,
+ RTLIB::UDIV_I128,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16,
+ RTLIB::SREM_I32,
+ RTLIB::SREM_I64,
+ RTLIB::SREM_I128,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16,
+ RTLIB::UREM_I32,
+ RTLIB::UREM_I64,
+ RTLIB::UREM_I128,
+ RTLIB::SDIVREM_I8,
+ RTLIB::SDIVREM_I16,
+ RTLIB::SDIVREM_I32,
+ RTLIB::SDIVREM_I64,
+ RTLIB::SDIVREM_I128,
+ RTLIB::UDIVREM_I8,
+ RTLIB::UDIVREM_I16,
+ RTLIB::UDIVREM_I32,
+ RTLIB::UDIVREM_I64,
+ RTLIB::UDIVREM_I128,
+ RTLIB::NEG_I32,
+ RTLIB::NEG_I64,
+ RTLIB::ADD_F32,
+ RTLIB::ADD_F64,
+ RTLIB::ADD_F80,
+ RTLIB::ADD_F128,
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::POWI_F32,
+ RTLIB::POWI_F64,
+ RTLIB::POWI_F80,
+ RTLIB::POWI_F128,
+ RTLIB::FPEXT_F64_F128,
+ RTLIB::FPEXT_F32_F128,
+ RTLIB::FPEXT_F32_F64,
+ RTLIB::FPEXT_F16_F32,
+ RTLIB::FPROUND_F32_F16,
+ RTLIB::FPROUND_F64_F16,
+ RTLIB::FPROUND_F80_F16,
+ RTLIB::FPROUND_F128_F16,
+ RTLIB::FPROUND_F64_F32,
+ RTLIB::FPROUND_F80_F32,
+ RTLIB::FPROUND_F128_F32,
+ RTLIB::FPROUND_F80_F64,
+ RTLIB::FPROUND_F128_F64,
+ RTLIB::FPTOSINT_F32_I32,
+ RTLIB::FPTOSINT_F32_I64,
+ RTLIB::FPTOSINT_F32_I128,
+ RTLIB::FPTOSINT_F64_I32,
+ RTLIB::FPTOSINT_F64_I64,
+ RTLIB::FPTOSINT_F64_I128,
+ RTLIB::FPTOSINT_F80_I32,
+ RTLIB::FPTOSINT_F80_I64,
+ RTLIB::FPTOSINT_F80_I128,
+ RTLIB::FPTOSINT_F128_I32,
+ RTLIB::FPTOSINT_F128_I64,
+ RTLIB::FPTOSINT_F128_I128,
+ RTLIB::FPTOUINT_F32_I32,
+ RTLIB::FPTOUINT_F32_I64,
+ RTLIB::FPTOUINT_F32_I128,
+ RTLIB::FPTOUINT_F64_I32,
+ RTLIB::FPTOUINT_F64_I64,
+ RTLIB::FPTOUINT_F64_I128,
+ RTLIB::FPTOUINT_F80_I32,
+ RTLIB::FPTOUINT_F80_I64,
+ RTLIB::FPTOUINT_F80_I128,
+ RTLIB::FPTOUINT_F128_I32,
+ RTLIB::FPTOUINT_F128_I64,
+ RTLIB::FPTOUINT_F128_I128,
+ RTLIB::SINTTOFP_I32_F32,
+ RTLIB::SINTTOFP_I32_F64,
+ RTLIB::SINTTOFP_I32_F80,
+ RTLIB::SINTTOFP_I32_F128,
+ RTLIB::SINTTOFP_I64_F32,
+ RTLIB::SINTTOFP_I64_F64,
+ RTLIB::SINTTOFP_I64_F80,
+ RTLIB::SINTTOFP_I64_F128,
+ RTLIB::SINTTOFP_I128_F32,
+ RTLIB::SINTTOFP_I128_F64,
+ RTLIB::SINTTOFP_I128_F80,
+ RTLIB::SINTTOFP_I128_F128,
+ RTLIB::UINTTOFP_I32_F32,
+ RTLIB::UINTTOFP_I32_F64,
+ RTLIB::UINTTOFP_I32_F80,
+ RTLIB::UINTTOFP_I32_F128,
+ RTLIB::UINTTOFP_I64_F32,
+ RTLIB::UINTTOFP_I64_F64,
+ RTLIB::UINTTOFP_I64_F80,
+ RTLIB::UINTTOFP_I64_F128,
+ RTLIB::UINTTOFP_I128_F32,
+ RTLIB::UINTTOFP_I128_F64,
+ RTLIB::UINTTOFP_I128_F80,
+ RTLIB::UINTTOFP_I128_F128,
+ RTLIB::OEQ_F32,
+ RTLIB::OEQ_F64,
+ RTLIB::OEQ_F128,
+ RTLIB::UNE_F32,
+ RTLIB::UNE_F64,
+ RTLIB::UNE_F128,
+ RTLIB::OGE_F32,
+ RTLIB::OGE_F64,
+ RTLIB::OGE_F128,
+ RTLIB::OLT_F32,
+ RTLIB::OLT_F64,
+ RTLIB::OLT_F128,
+ RTLIB::OLE_F32,
+ RTLIB::OLE_F64,
+ RTLIB::OLE_F128,
+ RTLIB::OGT_F32,
+ RTLIB::OGT_F64,
+ RTLIB::OGT_F128,
+ RTLIB::UO_F32,
+ RTLIB::UO_F64,
+ RTLIB::UO_F128,
+ RTLIB::O_F32,
+ RTLIB::O_F64,
+ RTLIB::O_F128,
+ })
+ setLibcallCallingConv(LC, CallingConv::ARM_AAPCS);
+}
+
// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -103,7 +278,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
}
MVT ElemTy = VT.getVectorElementType();
- if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
+ if (ElemTy != MVT::f64)
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -174,6 +349,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+ InitLibcallCallingConvs();
+
if (Subtarget->isTargetMachO()) {
// Uses VFP for Thumb libfuncs if available.
if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
@@ -565,8 +742,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
- setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
- setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
// Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
// a destination type that is wider than the source, and nor does
// it have a FP_TO_[SU]INT instruction with a narrower destination than
@@ -803,19 +978,26 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UREM, MVT::i32, Expand);
// Register based DivRem for AEABI (RTABI 4.2)
if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
- Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) {
+ Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
+ Subtarget->isTargetWindows()) {
setOperationAction(ISD::SREM, MVT::i64, Custom);
setOperationAction(ISD::UREM, MVT::i64, Custom);
HasStandaloneRem = false;
- setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod");
- setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
- setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
- setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
- setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod");
- setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
- setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
- setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
+ for (const auto &LC :
+ {RTLIB::SDIVREM_I8, RTLIB::SDIVREM_I16, RTLIB::SDIVREM_I32})
+ setLibcallName(LC, Subtarget->isTargetWindows() ? "__rt_sdiv"
+ : "__aeabi_idivmod");
+ setLibcallName(RTLIB::SDIVREM_I64, Subtarget->isTargetWindows()
+ ? "__rt_sdiv64"
+ : "__aeabi_ldivmod");
+ for (const auto &LC :
+ {RTLIB::UDIVREM_I8, RTLIB::UDIVREM_I16, RTLIB::UDIVREM_I32})
+ setLibcallName(LC, Subtarget->isTargetWindows() ? "__rt_udiv"
+ : "__aeabi_uidivmod");
+ setLibcallName(RTLIB::UDIVREM_I64, Subtarget->isTargetWindows()
+ ? "__rt_udiv64"
+ : "__aeabi_uldivmod");
setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
@@ -835,6 +1017,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
}
+ if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
+ for (auto &VT : {MVT::f32, MVT::f64})
+ setOperationAction(ISD::FPOWI, VT, Custom);
+
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
@@ -875,6 +1061,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
} else {
// If there's anything we can use as a barrier, go through custom lowering
// for ATOMIC_FENCE.
+ // If target has DMB in thumb, Fences can be inserted.
+ if (Subtarget->hasDataBarrier())
+ InsertFencesForAtomic = true;
+
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
Subtarget->hasAnyDataBarrier() ? Custom : Expand);
@@ -893,8 +1083,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
// Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
// Unordered/Monotonic case.
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
+ if (!InsertFencesForAtomic) {
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
+ }
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
@@ -1177,7 +1369,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
- case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK";
+ case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
case ARMISD::VCEQ: return "ARMISD::VCEQ";
@@ -1236,6 +1428,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
case ARMISD::VBSL: return "ARMISD::VBSL";
case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
+ case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
@@ -1246,6 +1439,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
+ case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
@@ -1429,6 +1623,16 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
}
}
+CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
+ bool isVarArg) const {
+ return CCAssignFnForNode(CC, false, isVarArg);
+}
+
+CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
+ bool isVarArg) const {
+ return CCAssignFnForNode(CC, true, isVarArg);
+}
+
/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
/// CallingConvention.
CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
@@ -1464,9 +1668,7 @@ SDValue ARMTargetLowering::LowerCallResult(
SmallVector<CCValAssign, 16> RVLocs;
ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext(), Call);
- CCInfo.AnalyzeCallResult(Ins,
- CCAssignFnForNode(CallConv, /* Return*/ true,
- isVarArg));
+ CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -1474,7 +1676,7 @@ SDValue ARMTargetLowering::LowerCallResult(
// Pass 'this' value directly from the argument to return value, to avoid
// reg unit interference
- if (i == 0 && isThisReturn && EnableThisRetForwarding) {
+ if (i == 0 && isThisReturn) {
assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment");
InVals.push_back(ThisVal);
@@ -1627,9 +1829,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<CCValAssign, 16> ArgLocs;
ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext(), Call);
- CCInfo.AnalyzeCallOperands(Outs,
- CCAssignFnForNode(CallConv, /* Return*/ false,
- isVarArg));
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -1864,7 +2064,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
auto *BB = CLI.CS->getParent();
bool PreferIndirect =
Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
- std::count_if(GV->user_begin(), GV->user_end(), [&BB](const User *U) {
+ count_if(GV->users(), [&BB](const User *U) {
return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
}) > 2;
@@ -1880,10 +2080,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Callee = DAG.getNode(
ARMISD::WrapperPIC, dl, PtrVt,
DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
- Callee =
- DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- /* Alignment = */ 0, MachineMemOperand::MOInvariant);
+ Callee = DAG.getLoad(
+ PtrVt, dl, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant);
} else if (Subtarget->isTargetCOFF()) {
assert(Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target");
@@ -1977,7 +2178,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (isTailCall) {
- MF.getFrameInfo()->setHasTailCall();
+ MF.getFrameInfo().setHasTailCall();
return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
}
@@ -2060,9 +2261,9 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
/// incoming argument stack.
static
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
- MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+ MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
const TargetInstrInfo *TII) {
- unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+ unsigned Bytes = Arg.getValueSizeInBits() / 8;
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
@@ -2094,9 +2295,9 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
return false;
assert(FI != INT_MAX);
- if (!MFI->isFixedObjectIndex(FI))
+ if (!MFI.isFixedObjectIndex(FI))
return false;
- return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+ return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
}
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
@@ -2121,11 +2322,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.
- // Do not sibcall optimize vararg calls unless the call site is not passing
- // any arguments.
- if (isVarArg && !Outs.empty())
- return false;
-
// Exception-handling functions need a special set of instructions to indicate
// a return to the hardware. Tail-calling another function would probably
// break this.
@@ -2155,8 +2351,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Check that the call results are passed in the same way.
LLVMContext &C = *DAG.getContext();
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
- CCAssignFnForNode(CalleeCC, true, isVarArg),
- CCAssignFnForNode(CallerCC, true, isVarArg)))
+ CCAssignFnForReturn(CalleeCC, isVarArg),
+ CCAssignFnForReturn(CallerCC, isVarArg)))
return false;
// The callee has to preserve all registers the caller needs to preserve.
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
@@ -2181,12 +2377,11 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call);
- CCInfo.AnalyzeCallOperands(Outs,
- CCAssignFnForNode(CalleeCC, false, isVarArg));
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
if (CCInfo.getNextStackOffset()) {
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
@@ -2236,8 +2431,7 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
- return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
- isVarArg));
+ return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
}
static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
@@ -2288,8 +2482,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
*DAG.getContext(), Call);
// Analyze outgoing return values.
- CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
- isVarArg));
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
SDValue Flag;
SmallVector<SDValue, 4> RetOps;
@@ -2537,7 +2730,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
SDValue CPAddr;
- bool IsPositionIndependent = isPositionIndependent();
+ bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
if (!IsPositionIndependent) {
CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
} else {
@@ -2595,16 +2788,17 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
// The first entry in the descriptor is a function pointer that we must call
// to obtain the address of the variable.
SDValue Chain = DAG.getEntryNode();
- SDValue FuncTLVGet =
- DAG.getLoad(MVT::i32, DL, Chain, DescAddr,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
- MachineMemOperand::MOInvariant);
+ SDValue FuncTLVGet = DAG.getLoad(
+ MVT::i32, DL, Chain, DescAddr,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ /* Alignment = */ 4,
+ MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant);
Chain = FuncTLVGet.getValue(1);
MachineFunction &F = DAG.getMachineFunction();
- MachineFrameInfo *MFI = F.getFrameInfo();
- MFI->setAdjustsStack(true);
+ MachineFrameInfo &MFI = F.getFrameInfo();
+ MFI.setAdjustsStack(true);
// TLS calls preserve all registers except those that absolutely must be
// trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
@@ -2801,12 +2995,171 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("bogus TLS model");
}
+/// Return true if all users of V are within function F, looking through
+/// ConstantExprs.
+static bool allUsersAreInFunction(const Value *V, const Function *F) {
+ SmallVector<const User*,4> Worklist;
+ for (auto *U : V->users())
+ Worklist.push_back(U);
+ while (!Worklist.empty()) {
+ auto *U = Worklist.pop_back_val();
+ if (isa<ConstantExpr>(U)) {
+ for (auto *UU : U->users())
+ Worklist.push_back(UU);
+ continue;
+ }
+
+ auto *I = dyn_cast<Instruction>(U);
+ if (!I || I->getParent()->getParent() != F)
+ return false;
+ }
+ return true;
+}
+
+/// Return true if all users of V are within some (any) function, looking through
+/// ConstantExprs. In other words, are there any global constant users?
+static bool allUsersAreInFunctions(const Value *V) {
+ SmallVector<const User*,4> Worklist;
+ for (auto *U : V->users())
+ Worklist.push_back(U);
+ while (!Worklist.empty()) {
+ auto *U = Worklist.pop_back_val();
+ if (isa<ConstantExpr>(U)) {
+ for (auto *UU : U->users())
+ Worklist.push_back(UU);
+ continue;
+ }
+
+ if (!isa<Instruction>(U))
+ return false;
+ }
+ return true;
+}
+
+// Return true if T is an integer, float or an array/vector of either.
+static bool isSimpleType(Type *T) {
+ if (T->isIntegerTy() || T->isFloatingPointTy())
+ return true;
+ Type *SubT = nullptr;
+ if (T->isArrayTy())
+ SubT = T->getArrayElementType();
+ else if (T->isVectorTy())
+ SubT = T->getVectorElementType();
+ else
+ return false;
+ return SubT->isIntegerTy() || SubT->isFloatingPointTy();
+}
+
+static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
+ EVT PtrVT, SDLoc dl) {
+ // If we're creating a pool entry for a constant global with unnamed address,
+ // and the global is small enough, we can emit it inline into the constant pool
+ // to save ourselves an indirection.
+ //
+ // This is a win if the constant is only used in one function (so it doesn't
+ // need to be duplicated) or duplicating the constant wouldn't increase code
+ // size (implying the constant is no larger than 4 bytes).
+ const Function *F = DAG.getMachineFunction().getFunction();
+
+ // We rely on this decision to inline being idemopotent and unrelated to the
+ // use-site. We know that if we inline a variable at one use site, we'll
+ // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
+ // doesn't know about this optimization, so bail out if it's enabled else
+ // we could decide to inline here (and thus never emit the GV) but require
+ // the GV from fast-isel generated code.
+ if (!EnableConstpoolPromotion ||
+ DAG.getMachineFunction().getTarget().Options.EnableFastISel)
+ return SDValue();
+
+ auto *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar || !GVar->hasInitializer() ||
+ !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
+ !GVar->hasLocalLinkage())
+ return SDValue();
+
+ // Ensure that we don't try and inline any type that contains pointers. If
+ // we inline a value that contains relocations, we move the relocations from
+ // .data to .text which is not ideal.
+ auto *Init = GVar->getInitializer();
+ if (!isSimpleType(Init->getType()))
+ return SDValue();
+
+ // The constant islands pass can only really deal with alignment requests
+ // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
+ // any type wanting greater alignment requirements than 4 bytes. We also
+ // can only promote constants that are multiples of 4 bytes in size or
+ // are paddable to a multiple of 4. Currently we only try and pad constants
+ // that are strings for simplicity.
+ auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
+ unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
+ unsigned Align = GVar->getAlignment();
+ unsigned RequiredPadding = 4 - (Size % 4);
+ bool PaddingPossible =
+ RequiredPadding == 4 || (CDAInit && CDAInit->isString());
+ if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize)
+ return SDValue();
+
+ unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // We can't bloat the constant pool too much, else the ConstantIslands pass
+ // may fail to converge. If we haven't promoted this global yet (it may have
+ // multiple uses), and promoting it would increase the constant pool size (Sz
+ // > 4), ensure we have space to do so up to MaxTotal.
+ if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
+ if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
+ ConstpoolPromotionMaxTotal)
+ return SDValue();
+
+ // This is only valid if all users are in a single function OR it has users
+ // in multiple functions but it no larger than a pointer. We also check if
+ // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
+ // address taken.
+ if (!allUsersAreInFunction(GVar, F) &&
+ !(Size <= 4 && allUsersAreInFunctions(GVar)))
+ return SDValue();
+
+ // We're going to inline this global. Pad it out if needed.
+ if (RequiredPadding != 4) {
+ StringRef S = CDAInit->getAsString();
+
+ SmallVector<uint8_t,16> V(S.size());
+ std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
+ while (RequiredPadding--)
+ V.push_back(0);
+ Init = ConstantDataArray::get(*DAG.getContext(), V);
+ }
+
+ auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
+ SDValue CPAddr =
+ DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
+ if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
+ AFI->markGlobalAsPromotedToConstantPool(GVar);
+ AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
+ PaddedSize - 4);
+ }
+ ++NumConstpoolPromoted;
+ return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+}
+
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
const TargetMachine &TM = getTargetMachine();
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GV = GA->getBaseObject();
+ bool IsRO =
+ (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
+ isa<Function>(GV);
+
+ // promoteToConstantPool only if not generating XO text section
+ if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
+ if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
+ return V;
+
if (isPositionIndependent()) {
bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
@@ -2833,6 +3186,23 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
DAG.getLoad(PtrVT, dl, Chain, Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
+ } else if (Subtarget->isROPI() && IsRO) {
+ // PC-relative.
+ SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+ SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
+ return Result;
+ } else if (Subtarget->isRWPI() && !IsRO) {
+ // SB-relative.
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue G = DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+ SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
+ SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, G);
+ return Result;
}
// If we have T2 ops, we can materialize the address directly via movt/movw
@@ -2854,6 +3224,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SelectionDAG &DAG) const {
+ assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
+ "ROPI/RWPI not currently supported for Darwin");
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
@@ -2880,6 +3252,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
"Windows on ARM expects to use movw/movt");
+ assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
+ "ROPI/RWPI not currently supported for Windows");
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
const ARMII::TOF TargetFlags =
@@ -3097,8 +3471,8 @@ SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
SDValue ArgValue2;
if (NextVA.isMemLoc()) {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
// Create load node to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
@@ -3139,7 +3513,7 @@ int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
// initialize stack frame.
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned RBegin, REnd;
if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
@@ -3154,7 +3528,7 @@ int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
ArgOffset = -4 * (ARM::R4 - RBegin);
auto PtrVT = getPointerTy(DAG.getDataLayout());
- int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
+ int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
SmallVector<SDValue, 4> MemOps;
@@ -3200,7 +3574,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -3208,9 +3582,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
SmallVector<CCValAssign, 16> ArgLocs;
ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext(), Prologue);
- CCInfo.AnalyzeFormalArguments(Ins,
- CCAssignFnForNode(CallConv, /* Return*/ false,
- isVarArg));
+ CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
SmallVector<SDValue, 16> ArgValues;
SDValue ArgValue;
@@ -3248,7 +3620,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
CCInfo.rewindByValRegsInfo();
int lastInsIndex = -1;
- if (isVarArg && MFI->hasVAStart()) {
+ if (isVarArg && MFI.hasVAStart()) {
unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
if (RegIdx != array_lengthof(GPRArgRegs))
ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
@@ -3278,7 +3650,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
VA = ArgLocs[++i]; // skip ahead to next loc
SDValue ArgValue2;
if (VA.isMemLoc()) {
- int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
+ int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(
@@ -3370,8 +3742,8 @@ SDValue ARMTargetLowering::LowerFormalArguments(
CCInfo.nextInRegsParam();
} else {
unsigned FIOffset = VA.getLocMemOffset();
- int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
- FIOffset, true);
+ int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+ FIOffset, true);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -3385,7 +3757,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
}
// varargs
- if (isVarArg && MFI->hasVAStart())
+ if (isVarArg && MFI.hasVAStart())
VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
CCInfo.getNextStackOffset(),
TotalArgRegsSaveSize);
@@ -4122,15 +4494,15 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
- if (Subtarget->isThumb2()) {
- // Thumb2 uses a two-level jump. That is, it jumps into the jump table
+ if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
+ // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
// which does another jump to the destination. This also makes it easier
- // to translate it to TBB / TBH later.
+ // to translate it to TBB / TBH later (Thumb2 only).
// FIXME: This might not work if the function is extremely large.
return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
Addr, Op.getOperand(2), JTI);
}
- if (isPositionIndependent()) {
+ if (isPositionIndependent() || Subtarget->isROPI()) {
Addr =
DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
@@ -4320,8 +4692,8 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MFI->setReturnAddressIsTaken(true);
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MFI.setReturnAddressIsTaken(true);
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
@@ -4346,8 +4718,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
const ARMBaseRegisterInfo &ARI =
*static_cast<const ARMBaseRegisterInfo*>(RegInfo);
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MFI->setFrameAddressIsTaken(true);
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MFI.setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc dl(Op); // FIXME probably not meaningful
@@ -4520,6 +4892,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
SDValue ARMcc;
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
@@ -4530,15 +4903,23 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
DAG.getConstant(VTBits, dl, MVT::i32));
SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
- SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
- SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
-
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
- ISD::SETGE, ARMcc, DAG, dl);
- SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
- SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
- CCR, Cmp);
+ SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+ SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
+ ISD::SETGE, ARMcc, DAG, dl);
+ SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
+ ARMcc, CCR, CmpLo);
+
+
+ SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+ SDValue HiBigShift = Opc == ISD::SRA
+ ? DAG.getNode(Opc, dl, VT, ShOpHi,
+ DAG.getConstant(VTBits - 1, dl, VT))
+ : DAG.getConstant(0, dl, VT);
+ SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
+ ISD::SETGE, ARMcc, DAG, dl);
+ SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
+ ARMcc, CCR, CmpHi);
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
@@ -4556,23 +4937,28 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
SDValue ARMcc;
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
assert(Op.getOpcode() == ISD::SHL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+ SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
DAG.getConstant(VTBits, dl, MVT::i32));
- SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
- SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+ SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+ SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
+ ISD::SETGE, ARMcc, DAG, dl);
+ SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
+ ARMcc, CCR, CmpHi);
- SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
+ SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
- SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
- SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
- CCR, Cmp);
+ SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
+ DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
@@ -4877,32 +5263,49 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDLoc dl(Op);
+ if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
+ (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
+ // Special-case integer 64-bit equality comparisons. They aren't legal,
+ // but they can be lowered with a few vector instructions.
+ unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
+ EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
+ SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
+ SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
+ SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
+ DAG.getCondCode(ISD::SETEQ));
+ SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
+ SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
+ Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
+ if (SetCCOpcode == ISD::SETNE)
+ Merged = DAG.getNOT(dl, Merged, CmpVT);
+ Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
+ return Merged;
+ }
+
if (CmpVT.getVectorElementType() == MVT::i64)
- // 64-bit comparisons are not legal. We've marked SETCC as non-Custom,
- // but it's possible that our operands are 64-bit but our result is 32-bit.
- // Bail in this case.
+ // 64-bit comparisons are not legal in general.
return SDValue();
if (Op1.getValueType().isFloatingPoint()) {
switch (SetCCOpcode) {
default: llvm_unreachable("Illegal FP comparison");
case ISD::SETUNE:
- case ISD::SETNE: Invert = true; // Fallthrough
+ case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
case ISD::SETOEQ:
case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
case ISD::SETOLT:
- case ISD::SETLT: Swap = true; // Fallthrough
+ case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETOGT:
case ISD::SETGT: Opc = ARMISD::VCGT; break;
case ISD::SETOLE:
- case ISD::SETLE: Swap = true; // Fallthrough
+ case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETOGE:
case ISD::SETGE: Opc = ARMISD::VCGE; break;
- case ISD::SETUGE: Swap = true; // Fallthrough
+ case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
- case ISD::SETUGT: Swap = true; // Fallthrough
+ case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
- case ISD::SETUEQ: Invert = true; // Fallthrough
+ case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH;
case ISD::SETONE:
// Expand this to (OLT | OGT).
TmpOp0 = Op0;
@@ -4911,7 +5314,9 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
break;
- case ISD::SETUO: Invert = true; // Fallthrough
+ case ISD::SETUO:
+ Invert = true;
+ LLVM_FALLTHROUGH;
case ISD::SETO:
// Expand this to (OLT | OGE).
TmpOp0 = Op0;
@@ -5168,11 +5573,28 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const {
- if (!ST->hasVFP3())
- return SDValue();
-
bool IsDouble = Op.getValueType() == MVT::f64;
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+ const APFloat &FPVal = CFP->getValueAPF();
+
+ // Prevent floating-point constants from using literal loads
+ // when execute-only is enabled.
+ if (ST->genExecuteOnly()) {
+ APInt INTVal = FPVal.bitcastToAPInt();
+ SDLoc DL(CFP);
+ if (IsDouble) {
+ SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
+ SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
+ if (!ST->isLittle())
+ std::swap(Lo, Hi);
+ return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
+ } else {
+ return DAG.getConstant(INTVal, DL, MVT::i32);
+ }
+ }
+
+ if (!ST->hasVFP3())
+ return SDValue();
// Use the default (constant pool) lowering for double constants when we have
// an SP-only FPU
@@ -5180,7 +5602,6 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
return SDValue();
// Try splatting with a VMOV.f32...
- const APFloat &FPVal = CFP->getValueAPF();
int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
if (ImmVal != -1) {
@@ -5325,7 +5746,7 @@ static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64");
- unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
@@ -5376,7 +5797,7 @@ static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
// want to check the low half and high half of the shuffle mask as if it were
// the other case
static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
- unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
@@ -5411,7 +5832,7 @@ static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
- unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
@@ -5446,7 +5867,7 @@ static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
// Requires similar checks to that of isVTRNMask with
// respect the how results are returned.
static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
- unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
@@ -5476,7 +5897,7 @@ static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
- unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
@@ -5517,7 +5938,7 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
// Requires similar checks to that of isVTRNMask with respect the how results
// are returned.
static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
- unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
@@ -5550,7 +5971,7 @@ static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
- unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
@@ -5650,6 +6071,9 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
unsigned SplatBitSize;
bool HasAnyUndefs;
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatUndef.isAllOnesValue())
+ return DAG.getUNDEF(VT);
+
if (SplatBitSize <= 64) {
// Check if an immediate VMOV works.
EVT VmovVT;
@@ -5732,7 +6156,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ unsigned EltSize = VT.getScalarSizeInBits();
// Use VDUP for non-constant splats. For f32 constant splats, reduce to
// i32 and try again.
@@ -5811,6 +6235,24 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return shuffle;
}
+ if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
+ // If we haven't found an efficient lowering, try splitting a 128-bit vector
+ // into two 64-bit vectors; we might discover a better way to lower it.
+ SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
+ EVT ExtVT = VT.getVectorElementType();
+ EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
+ SDValue Lower =
+ DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
+ if (Lower.getOpcode() == ISD::BUILD_VECTOR)
+ Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
+ SDValue Upper = DAG.getBuildVector(
+ HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
+ if (Upper.getOpcode() == ISD::BUILD_VECTOR)
+ Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
+ if (Lower && Upper)
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
+ }
+
// Vectors with 32- or 64-bit elements can be built by directly assigning
// the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
// will be legalized.
@@ -5896,7 +6338,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// Add this element source to the list if it's not already there.
SDValue SourceVec = V.getOperand(0);
- auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
+ auto Source = find(Sources, SourceVec);
if (Source == Sources.end())
Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
@@ -5920,7 +6362,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
SmallestEltTy = SrcEltTy;
}
unsigned ResMultiplier =
- VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
+ VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
@@ -6006,13 +6448,13 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// The stars all align, our next step is to produce the mask for the shuffle.
SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
- int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
+ int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Entry = Op.getOperand(i);
if (Entry.isUndef())
continue;
- auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
+ auto Src = find(Sources, Entry.getOperand(0));
int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
// EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
@@ -6020,7 +6462,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// segment.
EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
- VT.getVectorElementType().getSizeInBits());
+ VT.getScalarSizeInBits());
int LanesDefined = BitsDefined / BitsPerShuffleLane;
// This source is expected to fill ResMultiplier lanes of the final shuffle,
@@ -6080,7 +6522,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
bool ReverseVEXT, isV_UNDEF;
unsigned Imm, WhichResult;
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ unsigned EltSize = VT.getScalarSizeInBits();
return (EltSize >= 32 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isVREVMask(M, VT, 64) ||
@@ -6223,7 +6665,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// of the same time so that they get CSEd properly.
ArrayRef<int> ShuffleMask = SVN->getMask();
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ unsigned EltSize = VT.getScalarSizeInBits();
if (EltSize <= 32) {
if (SVN->isSplat()) {
int Lane = SVN->getSplatIndex();
@@ -6309,7 +6751,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
EVT SubVT = SubV1.getValueType();
// We expect these to have been canonicalized to -1.
- assert(std::all_of(ShuffleMask.begin(), ShuffleMask.end(), [&](int i) {
+ assert(all_of(ShuffleMask, [&](int i) {
return i < (int)VT.getVectorNumElements();
}) && "Unexpected shuffle index into UNDEF operand!");
@@ -6397,8 +6839,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
return SDValue();
SDValue Vec = Op.getOperand(0);
- if (Op.getValueType() == MVT::i32 &&
- Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
+ if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
SDLoc dl(Op);
return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
}
@@ -6463,7 +6904,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDNode *Elt = N->getOperand(i).getNode();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ unsigned EltSize = VT.getScalarSizeInBits();
unsigned HalfSize = EltSize / 2;
if (isSigned) {
if (!isIntN(HalfSize, C->getSExtValue()))
@@ -6590,7 +7031,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
// Construct a new BUILD_VECTOR with elements truncated to half the size.
assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
EVT VT = N->getValueType(0);
- unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
+ unsigned EltSize = VT.getScalarSizeInBits() / 2;
unsigned NumElts = VT.getVectorNumElements();
MVT TruncVT = MVT::getIntegerVT(EltSize);
SmallVector<SDValue, 8> Ops;
@@ -6915,7 +7356,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
auto PtrVT = getPointerTy(DAG.getDataLayout());
- MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Pair of floats / doubles used to pass the result.
@@ -6929,7 +7370,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
// Create stack object for sret.
const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
- int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
ArgListEntry Entry;
@@ -7029,6 +7470,19 @@ SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
}
+static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(1);
+ if (N->getValueType(0) == MVT::i32)
+ return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
+ DAG.getConstant(0, DL, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
+ DAG.getConstant(1, DL, MVT::i32));
+ return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
+ DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
+}
+
void ARMTargetLowering::ExpandDIV_Windows(
SDValue Op, SelectionDAG &DAG, bool Signed,
SmallVectorImpl<SDValue> &Results) const {
@@ -7039,14 +7493,7 @@ void ARMTargetLowering::ExpandDIV_Windows(
"unexpected type for custom lowering DIV");
SDLoc dl(Op);
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1),
- DAG.getConstant(0, dl, MVT::i32));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1),
- DAG.getConstant(1, dl, MVT::i32));
- SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i32, Lo, Hi);
-
- SDValue DBZCHK =
- DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, DAG.getEntryNode(), Or);
+ SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
@@ -7132,11 +7579,66 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N,
Results.push_back(SDValue(CmpSwap, 2));
}
+static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
+ SelectionDAG &DAG) {
+ const auto &TLI = DAG.getTargetLoweringInfo();
+
+ assert(Subtarget.getTargetTriple().isOSMSVCRT() &&
+ "Custom lowering is MSVCRT specific!");
+
+ SDLoc dl(Op);
+ SDValue Val = Op.getOperand(0);
+ MVT Ty = Val->getSimpleValueType(0);
+ SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
+ SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Node = Val;
+ Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isZExt = true;
+ Args.push_back(Entry);
+
+ Entry.Node = Exponent;
+ Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isZExt = true;
+ Args.push_back(Entry);
+
+ Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
+
+ // In the in-chain to the call is the entry node If we are emitting a
+ // tailcall, the chain will be mutated if the node has a non-entry input
+ // chain.
+ SDValue InChain = DAG.getEntryNode();
+ SDValue TCChain = InChain;
+
+ const auto *F = DAG.getMachineFunction().getFunction();
+ bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
+ F->getReturnType() == LCRTy;
+ if (IsTC)
+ InChain = TCChain;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(InChain)
+ .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args))
+ .setTailCall(IsTC);
+ std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI);
+
+ // Return the chain (the DAG root) if it is a tail call
+ return !CI.second.getNode() ? DAG.getRoot() : CI.first;
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
- case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::ConstantPool:
+ if (Subtarget->genExecuteOnly())
+ llvm_unreachable("execute-only should not generate constant pools");
+ return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress:
switch (Subtarget->getTargetTriple().getObjectFormat()) {
@@ -7218,6 +7720,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Don't know how to custom lower this!");
case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG);
case ARMISD::WIN__DBZCHK: return SDValue();
}
}
@@ -7278,6 +7781,8 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB,
int FI) const {
+ assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
+ "ROPI/RWPI not currently supported with SjLj");
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
@@ -7396,8 +7901,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
DebugLoc dl = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
- MachineFrameInfo *MFI = MF->getFrameInfo();
- int FI = MFI->getFunctionContextIndex();
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ int FI = MFI.getFunctionContextIndex();
const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
: &ARM::GPRnopcRegClass;
@@ -7406,7 +7911,6 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
// associated with.
DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
unsigned MaxCSNum = 0;
- MachineModuleInfo &MMI = MF->getMMI();
for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
++BB) {
if (!BB->isEHPad()) continue;
@@ -7418,9 +7922,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
if (!II->isEHLabel()) continue;
MCSymbol *Sym = II->getOperand(0).getMCSymbol();
- if (!MMI.hasCallSiteLandingPad(Sym)) continue;
+ if (!MF->hasCallSiteLandingPad(Sym)) continue;
- SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);
+ SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
for (SmallVectorImpl<unsigned>::iterator
CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
CSI != CSE; ++CSI) {
@@ -7491,8 +7995,10 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
// Add a register mask with no preserved registers. This results in all
- // registers being marked as clobbered.
- MIB.addRegMask(RI.getNoPreservedMask());
+ // registers being marked as clobbered. This can't work if the dispatch block
+ // is in a Thumb1 function and is linked with ARM code which uses the FP
+ // registers, as there is no way to preserve the FP registers in Thumb1 mode.
+ MIB.addRegMask(RI.getSjLjDispatchPreservedMask(*MF));
bool IsPositionIndependent = isPositionIndependent();
unsigned NumLPads = LPadList.size();
@@ -7911,6 +8417,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
bool IsThumb1 = Subtarget->isThumb1Only();
bool IsThumb2 = Subtarget->isThumb2();
+ bool IsThumb = Subtarget->isThumb();
if (Align & 1) {
UnitSize = 1;
@@ -7932,7 +8439,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
// Select the correct opcode and register class for unit size load/store
bool IsNeon = UnitSize >= 8;
- TRC = (IsThumb1 || IsThumb2) ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
+ TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
if (IsNeon)
VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
: UnitSize == 8 ? &ARM::DPRRegClass
@@ -8014,12 +8521,12 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
if ((LoopSize & 0xFFFF0000) != 0)
Vtmp = MRI.createVirtualRegister(TRC);
AddDefaultPred(BuildMI(BB, dl,
- TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16),
+ TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16),
Vtmp).addImm(LoopSize & 0xFFFF));
if ((LoopSize & 0xFFFF0000) != 0)
AddDefaultPred(BuildMI(BB, dl,
- TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16),
+ TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16),
varEnd)
.addReg(Vtmp)
.addImm(LoopSize >> 16));
@@ -8034,7 +8541,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
Align = MF->getDataLayout().getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
- if (IsThumb1)
+ if (IsThumb)
AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
varEnd, RegState::Define).addConstantPoolIndex(Idx));
else
@@ -8201,17 +8708,20 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
ContBB->splice(ContBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
ContBB->transferSuccessorsAndUpdatePHIs(MBB);
+ MBB->addSuccessor(ContBB);
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
MF->push_back(TrapBB);
- BuildMI(TrapBB, DL, TII->get(ARM::t2UDF)).addImm(249);
MBB->addSuccessor(TrapBB);
- BuildMI(*MBB, MI, DL, TII->get(ARM::tCBZ))
- .addReg(MI.getOperand(0).getReg())
- .addMBB(TrapBB);
- AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::t2B)).addMBB(ContBB));
- MBB->addSuccessor(ContBB);
+ AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
+ .addReg(MI.getOperand(0).getReg())
+ .addImm(0));
+ BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
+ .addMBB(TrapBB)
+ .addImm(ARMCC::EQ)
+ .addReg(ARM::CPSR);
MI.eraseFromParent();
return ContBB;
@@ -8635,7 +9145,7 @@ static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
// (zext cc) can never be the all ones value.
if (AllOnes)
return false;
- // Fall through.
+ LLVM_FALLTHROUGH;
case ISD::SIGN_EXTEND: {
SDLoc dl(N);
EVT VT = N->getValueType(0);
@@ -8962,7 +9472,8 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
// be combined into a UMLAL. The other pattern is AddcNode being combined
// into an UMLAL and then using another addc is handled in ISelDAGToDAG.
- if (!Subtarget->hasV6Ops())
+ if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() ||
+ (Subtarget->isThumb() && !Subtarget->hasThumb2()))
return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
SDNode *PrevAddc = nullptr;
@@ -9964,6 +10475,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
isLaneOp = true;
switch (N->getOpcode()) {
default: llvm_unreachable("unexpected opcode for Neon base update");
+ case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break;
case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
@@ -10078,8 +10590,8 @@ static SDValue CombineBaseUpdate(SDNode *N,
StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
}
- SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys,
- Ops, AlignedVecTy,
+ EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
MemN->getMemOperand());
// Update the uses.
@@ -10211,19 +10723,44 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
return SDValue();
// Make sure the VMOV element size is not bigger than the VDUPLANE elements.
- unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
+ unsigned EltSize = Op.getScalarValueSizeInBits();
// The canonical VMOV for a zero vector uses a 32-bit element size.
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
unsigned EltBits;
if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
EltSize = 8;
EVT VT = N->getValueType(0);
- if (EltSize > VT.getVectorElementType().getSizeInBits())
+ if (EltSize > VT.getScalarSizeInBits())
return SDValue();
return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
}
+/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
+static SDValue PerformVDUPCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op = N->getOperand(0);
+
+ // Match VDUP(LOAD) -> VLD1DUP.
+ // We match this pattern here rather than waiting for isel because the
+ // transform is only legal for unindexed loads.
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
+ if (LD && Op.hasOneUse() && LD->isUnindexed() &&
+ LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
+ SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1),
+ DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) };
+ SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
+ SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys,
+ Ops, LD->getMemoryVT(),
+ LD->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
+ return VLDDup;
+ }
+
+ return SDValue();
+}
+
static SDValue PerformLOADCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
EVT VT = N->getValueType(0);
@@ -10255,8 +10792,8 @@ static SDValue PerformSTORECombine(SDNode *N,
EVT StVT = St->getMemoryVT();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
- unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
- unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
+ unsigned FromEltSz = VT.getScalarSizeInBits();
+ unsigned ToEltSz = StVT.getScalarSizeInBits();
// From, To sizes and ElemCount must be pow of two
if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
@@ -10524,7 +11061,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
/// 0 <= Value <= ElementBits for a long left shift.
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
- int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
+ int64_t ElementBits = VT.getScalarSizeInBits();
if (! getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
@@ -10539,7 +11076,7 @@ static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
- int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
+ int64_t ElementBits = VT.getScalarSizeInBits();
if (! getVShiftImm(Op, ElementBits, Cnt))
return false;
if (!isIntrinsic)
@@ -11051,6 +11588,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
+ case ARMISD::VDUP: return PerformVDUPCombine(N, DCI);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return PerformVCVTCombine(N, DCI.DAG, Subtarget);
@@ -11066,6 +11604,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
case ISD::LOAD: return PerformLOADCombine(N, DCI);
+ case ARMISD::VLD1DUP:
case ARMISD::VLD2DUP:
case ARMISD::VLD3DUP:
case ARMISD::VLD4DUP:
@@ -11234,6 +11773,17 @@ bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
return true;
}
+int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
+ unsigned AS) const {
+ if (isLegalAddressingMode(DL, AM, Ty, AS)) {
+ if (Subtarget->hasFPAO())
+ return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
+ return 0;
+ }
+ return -1;
+}
+
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
@@ -11384,7 +11934,7 @@ bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
case 1:
if (Subtarget->isThumb1Only())
return false;
- // FALL THROUGH.
+ LLVM_FALLTHROUGH;
default:
// ARM doesn't support any R+R*scale+imm addr modes.
if (AM.BaseOffs)
@@ -11682,7 +12232,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case Intrinsic::arm_ldaex:
case Intrinsic::arm_ldrex: {
EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
- unsigned MemBits = VT.getScalarType().getSizeInBits();
+ unsigned MemBits = VT.getScalarSizeInBits();
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
return;
}
@@ -12043,7 +12593,7 @@ static RTLIB::Libcall getDivRemLibcall(
}
static TargetLowering::ArgListTy getDivRemArgList(
- const SDNode *N, LLVMContext *Context) {
+ const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
"Unhandled Opcode in getDivRemArgList");
@@ -12060,12 +12610,15 @@ static TargetLowering::ArgListTy getDivRemArgList(
Entry.isZExt = !isSigned;
Args.push_back(Entry);
}
+ if (Subtarget->isTargetWindows() && Args.size() >= 2)
+ std::swap(Args[0], Args[1]);
return Args;
}
SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
- Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) &&
+ Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
+ Subtarget->isTargetWindows()) &&
"Register-based DivRem lowering only");
unsigned Opcode = Op->getOpcode();
assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
@@ -12073,20 +12626,42 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
bool isSigned = (Opcode == ISD::SDIVREM);
EVT VT = Op->getValueType(0);
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+ SDLoc dl(Op);
+
+ // If the target has hardware divide, use divide + multiply + subtract:
+ // div = a / b
+ // rem = a - b * div
+ // return {div, rem}
+ // This should be lowered into UDIV/SDIV + MLS later on.
+ if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() &&
+ Op->getSimpleValueType(0) == MVT::i32) {
+ unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ const SDValue Dividend = Op->getOperand(0);
+ const SDValue Divisor = Op->getOperand(1);
+ SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
+ SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
+
+ SDValue Values[2] = {Div, Rem};
+ return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
+ }
RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
VT.getSimpleVT().SimpleTy);
SDValue InChain = DAG.getEntryNode();
TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(),
- DAG.getContext());
+ DAG.getContext(),
+ Subtarget);
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy(DAG.getDataLayout()));
Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
- SDLoc dl(Op);
+ if (Subtarget->isTargetWindows())
+ InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
+
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
.setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
@@ -12119,11 +12694,15 @@ SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
SimpleTy);
SDValue InChain = DAG.getEntryNode();
- TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext());
+ TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext(),
+ Subtarget);
bool isSigned = N->getOpcode() == ISD::SREM;
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy(DAG.getDataLayout()));
+ if (Subtarget->isTargetWindows())
+ InChain = WinDBZCheckDenominator(DAG, N, InChain);
+
// Lower call
CallLoweringInfo CLI(DAG);
CLI.setChain(InChain)
@@ -12342,6 +12921,14 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
+bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT,
+ unsigned Index) const {
+ if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
+ return false;
+
+ return (Index == 0 || Index == ResVT.getVectorNumElements());
+}
+
Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
ARM_MB::MemBOpt Domain) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -12443,7 +13030,8 @@ ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- return (Size <= (Subtarget->isMClass() ? 32U : 64U))
+ bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
+ return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
? AtomicExpansionKind::LLSC
: AtomicExpansionKind::None;
}
@@ -12455,7 +13043,9 @@ bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
- return getTargetMachine().getOptLevel() != 0;
+ bool hasAtomicCmpXchg =
+ !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
+ return getTargetMachine().getOptLevel() != 0 && hasAtomicCmpXchg;
}
bool ARMTargetLowering::shouldInsertFencesForAtomic(
@@ -12681,6 +13271,17 @@ static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
///
/// Note that the new shufflevectors will be removed and we'll only generate one
/// vst3 instruction in CodeGen.
+///
+/// Example for a more general valid mask (Factor 3). Lower:
+/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
+/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
+/// store <12 x i32> %i.vec, <12 x i32>* %ptr
+///
+/// Into:
+/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
+/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
+/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
+/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
ShuffleVectorInst *SVI,
unsigned Factor) const {
@@ -12691,9 +13292,9 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
assert(VecTy->getVectorNumElements() % Factor == 0 &&
"Invalid interleaved store");
- unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
+ unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
Type *EltTy = VecTy->getVectorElementType();
- VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
+ VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
@@ -12720,7 +13321,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
- SubVecTy = VectorType::get(IntTy, NumSubElts);
+ SubVecTy = VectorType::get(IntTy, LaneLen);
}
static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
@@ -12736,9 +13337,28 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
SI->getModule(), StoreInts[Factor - 2], Tys);
// Split the shufflevector operands into sub vectors for the new vstN call.
- for (unsigned i = 0; i < Factor; i++)
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
+ auto Mask = SVI->getShuffleMask();
+ for (unsigned i = 0; i < Factor; i++) {
+ if (Mask[i] >= 0) {
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
+ } else {
+ unsigned StartMask = 0;
+ for (unsigned j = 1; j < LaneLen; j++) {
+ if (Mask[j*Factor + i] >= 0) {
+ StartMask = Mask[j*Factor + i] - j;
+ break;
+ }
+ }
+ // Note: If all elements in a chunk are undefs, StartMask=0!
+ // Note: Filling undef gaps with random elements is ok, since
+ // those elements were being written anyway (with undefs).
+ // In the case of all undefs we're defaulting to using elems from 0
+ // Note: StartMask cannot be negative, it's checked in isReInterleaveMask
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
+ }
+ }
Ops.push_back(Builder.getInt32(SI->getAlignment()));
Builder.CreateCall(VstNFunc, Ops);
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 4906686616bc..5255d82d647a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -190,7 +190,8 @@ namespace llvm {
MEMCPY,
// Vector load N-element structure to all lanes:
- VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ VLD1DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ VLD2DUP,
VLD3DUP,
VLD4DUP,
@@ -202,6 +203,7 @@ namespace llvm {
VLD2LN_UPD,
VLD3LN_UPD,
VLD4LN_UPD,
+ VLD1DUP_UPD,
VLD2DUP_UPD,
VLD3DUP_UPD,
VLD4DUP_UPD,
@@ -291,6 +293,14 @@ namespace llvm {
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AS) const override;
+
+ /// getScalingFactorCost - Return the cost of the scaling used in
+ /// addressing mode represented by AM.
+ /// If the AM is supported, the return value must be >= 0.
+ /// If the AM is not supported, the return value must be negative.
+ int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
+
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
@@ -421,6 +431,10 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
+ /// with this index.
+ bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override;
+
/// \brief Returns true if an argument of type Ty needs to be passed in a
/// contiguous block of registers in calling convention CallConv.
bool functionArgumentNeedsConsecutiveRegisters(
@@ -482,6 +496,9 @@ namespace llvm {
return HasStandaloneRem;
}
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
+ CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -506,6 +523,8 @@ namespace llvm {
bool HasStandaloneRem = true;
+ void InitLibcallCallingConvs();
+
void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
void addDRTypeForNEON(MVT VT);
void addQRTypeForNEON(MVT VT);
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
index 37a83f70a1fb..488439fc24e0 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -398,6 +398,14 @@ class tPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
list<Predicate> Predicates = [IsThumb];
}
+// PseudoInst that's in ARMv8-M baseline (Somewhere between Thumb and Thumb2)
+class t2basePseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let Size = sz;
+ list<Predicate> Predicates = [IsThumb,HasV8MBaseline];
+}
+
// PseudoInst that's Thumb2-mode only.
class t2PseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
list<dag> pattern>
@@ -999,6 +1007,15 @@ class VFPPat<dag pattern, dag result> : Pat<pattern, result> {
class VFPNoNEONPat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [HasVFP2, DontUseNEONForFP];
}
+class Thumb2DSPPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb2, HasDSP];
+}
+class Thumb2DSPMulPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb2, UseMulOps, HasDSP];
+}
+class Thumb2ExtractPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb2, HasT2ExtractPack];
+}
//===----------------------------------------------------------------------===//
// Thumb Instruction Format Definitions.
//
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 98b1b4ca4272..27b64322dfa9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -123,7 +123,9 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
MIB = BuildMI(MBB, MI, DL, get(ARM::MOV_ga_pcrel_ldr), Reg)
.addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
- auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ auto Flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
MIB.addMemOperand(MMO);
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index c9735f3ec277..c47393990e97 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -330,6 +330,8 @@ def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&"
def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">;
def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">;
+def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
+
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -358,7 +360,23 @@ def imm16_31 : ImmLeaf<i32, [{
// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
def sext_16_node : PatLeaf<(i32 GPR:$a), [{
- return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+ if (CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17)
+ return true;
+
+ if (N->getOpcode() != ISD::SRA)
+ return false;
+ if (N->getOperand(0).getOpcode() != ISD::SHL)
+ return false;
+
+ auto *ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!ShiftVal || ShiftVal->getZExtValue() != 16)
+ return false;
+
+ ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(0)->getOperand(1));
+ if (!ShiftVal || ShiftVal->getZExtValue() != 16)
+ return false;
+
+ return true;
}]>;
/// Split a 32-bit immediate into two 16 bit parts.
@@ -3400,6 +3418,12 @@ def SXTAB : AI_exta_rrot<0b01101010,
def SXTAH : AI_exta_rrot<0b01101011,
"sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)),
+ (SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot),
+ i16)),
+ (SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+
def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
@@ -3427,6 +3451,11 @@ def UXTAB : AI_exta_rrot<0b01101110, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
def UXTAH : AI_exta_rrot<0b01101111, "uxtah",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+
+def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)),
+ (UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
+ (UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
}
// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
@@ -3471,6 +3500,7 @@ def UBFX : I<(outs GPRnopc:$Rd),
// Arithmetic Instructions.
//
+let isAdd = 1 in
defm ADD : AsI1_bin_irs<0b0100, "add",
IIC_iALUi, IIC_iALUr, IIC_iALUsr, add, 1>;
defm SUB : AsI1_bin_irs<0b0010, "sub",
@@ -3486,9 +3516,11 @@ defm SUB : AsI1_bin_irs<0b0010, "sub",
// FIXME: Eliminate ADDS/SUBS pseudo opcodes after adding tablegen
// support for an optional CPSR definition that corresponds to the DAG
// node's second value. We can then eliminate the implicit def of CPSR.
+let isAdd = 1 in
defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMaddc, 1>;
defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMsubc>;
+let isAdd = 1 in
defm ADC : AI1_adde_sube_irs<0b0101, "adc", ARMadde, 1>;
defm SBC : AI1_adde_sube_irs<0b0110, "sbc", ARMsube>;
@@ -5492,45 +5524,22 @@ def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
// smul* and smla*
-def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
- (sra (shl GPR:$b, (i32 16)), (i32 16))),
- (SMULBB GPR:$a, GPR:$b)>;
def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
(SMULBB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
- (sra GPR:$b, (i32 16))),
- (SMULBT GPR:$a, GPR:$b)>;
def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
(SMULBT GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
- (sra (shl GPR:$b, (i32 16)), (i32 16))),
- (SMULTB GPR:$a, GPR:$b)>;
def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
(SMULTB GPR:$a, GPR:$b)>;
-
-def : ARMV5MOPat<(add GPR:$acc,
- (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
- (sra (shl GPR:$b, (i32 16)), (i32 16)))),
- (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, sext_16_node:$b)),
(SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
def : ARMV5MOPat<(add GPR:$acc,
- (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
- (sra GPR:$b, (i32 16)))),
- (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
(SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
def : ARMV5MOPat<(add GPR:$acc,
- (mul (sra GPR:$a, (i32 16)),
- (sra (shl GPR:$b, (i32 16)), (i32 16)))),
- (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-
// Pre-v7 uses MCR for synchronization barriers.
def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
Requires<[IsARM, HasV6]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index defef4ea9073..b5fa8e999e2a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -610,14 +610,14 @@ def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
def VLDMQIA
: PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
IIC_fpLoad_m, "",
- [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>;
+ [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>;
// Use VSTM to store a Q register as a D register pair.
// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
def VSTMQIA
: PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
IIC_fpStore_m, "",
- [(store (v2f64 DPair:$src), GPR:$Rn)]>;
+ [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>;
// Classes for VLD* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
@@ -6849,6 +6849,16 @@ let Predicates = [IsBE] in {
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
}
+// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
+def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
+ (VREV64q8 (VLD1q8 addrmode6:$addr))>, Requires<[IsBE]>;
+def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>, Requires<[IsBE]>;
+def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
+ (VREV64q16 (VLD1q16 addrmode6:$addr))>, Requires<[IsBE]>;
+def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>, Requires<[IsBE]>;
+
// Fold extracting an element out of a v2i32 into a vfp register.
def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
(f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index 93a174f3678a..a681f64b05e6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -904,49 +904,51 @@ class T1sItGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
let Inst{7-0} = imm8;
}
-// Add with carry register
-let isCommutable = 1, Uses = [CPSR] in
-def tADC : // A8.6.2
- T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
- "adc", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
-
-// Add immediate
-def tADDi3 : // A8.6.4 T1
- T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
- IIC_iALUi,
- "add", "\t$Rd, $Rm, $imm3",
- [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]>,
- Sched<[WriteALU]> {
- bits<3> imm3;
- let Inst{8-6} = imm3;
-}
-
-def tADDi8 : // A8.6.4 T2
- T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn),
- (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
- "add", "\t$Rdn, $imm8",
- [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>,
- Sched<[WriteALU]>;
+let isAdd = 1 in {
+ // Add with carry register
+ let isCommutable = 1, Uses = [CPSR] in
+ def tADC : // A8.6.2
+ T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
+ "adc", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+
+ // Add immediate
+ def tADDi3 : // A8.6.4 T1
+ T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
+ IIC_iALUi,
+ "add", "\t$Rd, $Rm, $imm3",
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]>,
+ Sched<[WriteALU]> {
+ bits<3> imm3;
+ let Inst{8-6} = imm3;
+ }
-// Add register
-let isCommutable = 1 in
-def tADDrr : // A8.6.6 T1
- T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
- IIC_iALUr,
- "add", "\t$Rd, $Rn, $Rm",
- [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
-
-let hasSideEffects = 0 in
-def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
- "add", "\t$Rdn, $Rm", []>,
- T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
- // A8.6.6 T2
- bits<4> Rdn;
- bits<4> Rm;
- let Inst{7} = Rdn{3};
- let Inst{6-3} = Rm;
- let Inst{2-0} = Rdn{2-0};
+ def tADDi8 : // A8.6.4 T2
+ T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn),
+ (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
+ "add", "\t$Rdn, $imm8",
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>,
+ Sched<[WriteALU]>;
+
+ // Add register
+ let isCommutable = 1 in
+ def tADDrr : // A8.6.6 T1
+ T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "add", "\t$Rd, $Rn, $Rm",
+ [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+
+ let hasSideEffects = 0 in
+ def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
+ "add", "\t$Rdn, $Rm", []>,
+ T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
+ // A8.6.6 T2
+ bits<4> Rdn;
+ bits<4> Rm;
+ let Inst{7} = Rdn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rdn{2-0};
+ }
}
// AND register
@@ -1259,6 +1261,13 @@ def tUDF : TI<(outs), (ins imm0_255:$imm8), IIC_Br, "udf\t$imm8",
let Inst{7-0} = imm8;
}
+def t__brkdiv0 : TI<(outs), (ins), IIC_Br, "__brkdiv0",
+ [(int_arm_undefined 249)]>, Encoding16,
+ Requires<[IsThumb, IsWindows]> {
+ let Inst = 0xdef9;
+ let isTerminator = 1;
+}
+
// Zero-extend byte
def tUXTB : // A8.6.262
T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
@@ -1306,6 +1315,18 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
(ins i32imm:$label, pred:$p),
2, IIC_iALUi, []>, Sched<[WriteALU]>;
+// Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them
+// and make use of the same compressed jump table format as Thumb-2.
+let Size = 2 in {
+def tTBB_JT : tPseudoInst<(outs),
+ (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
+ Sched<[WriteBr]>;
+
+def tTBH_JT : tPseudoInst<(outs),
+ (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
+ Sched<[WriteBr]>;
+}
+
//===----------------------------------------------------------------------===//
// TLS Instructions
//
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index db8b9fb923bf..603d66403e65 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -536,9 +536,9 @@ class T2FourReg<dag oops, dag iops, InstrItinClass itin,
}
class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
- dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, itin, opc, asm, pattern> {
+ string opc, list<dag> pattern>
+ : T2I<(outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+ opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern> {
bits<4> RdLo;
bits<4> RdHi;
bits<4> Rn;
@@ -552,10 +552,11 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
let Inst{7-4} = opc7_4;
let Inst{3-0} = Rm;
}
-class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4,
- dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, itin, opc, asm, pattern> {
+class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4, string opc>
+ : T2I<(outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
+ opc, "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi"> {
bits<4> RdLo;
bits<4> RdHi;
bits<4> Rn;
@@ -1983,12 +1984,19 @@ def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">;
// A simple right-shift can also be used in most cases (the exception is the
// SXTH operations with a rotate of 24: there the non-contiguous bits are
// relevant).
-def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)),
- (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
-def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), i16)),
- (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+ (srl rGPR:$Rm, rot_imm:$rot), i8)),
+ (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+ (srl rGPR:$Rm, imm8_or_16:$rot), i16)),
+ (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+ (rotr rGPR:$Rm, (i32 24)), i16)),
+ (t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>;
+def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+ (or (srl rGPR:$Rm, (i32 24)),
+ (shl rGPR:$Rm, (i32 8))), i16)),
+ (t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>;
// Zero extenders
@@ -2017,12 +2025,12 @@ def t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">;
-def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)),
- (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
-def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
- (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot),
+ 0xFF)),
+ (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot),
+ 0xFFFF)),
+ (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
}
@@ -2030,6 +2038,7 @@ def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
// Arithmetic Instructions.
//
+let isAdd = 1 in
defm t2ADD : T2I_bin_ii12rs<0b000, "add", add, 1>;
defm t2SUB : T2I_bin_ii12rs<0b101, "sub", sub>;
@@ -2546,367 +2555,194 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
let Inst{7-4} = 0b0000; // Multiply
}
-def t2MLA: T2FourReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
- "mla", "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]>,
- Requires<[IsThumb2, UseMulOps]> {
+class T2FourRegMLA<bits<4> op7_4, string opc, list<dag> pattern>
+ : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
+ Requires<[IsThumb2, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
- let Inst{7-4} = 0b0000; // Multiply
+ let Inst{7-4} = op7_4;
}
-def t2MLS: T2FourReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
- "mls", "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]>,
- Requires<[IsThumb2, UseMulOps]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b000;
- let Inst{7-4} = 0b0001; // Multiply and Subtract
-}
+def t2MLA : T2FourRegMLA<0b0000, "mla",
+ [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm),
+ rGPR:$Ra))]>;
+def t2MLS: T2FourRegMLA<0b0001, "mls",
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn,
+ rGPR:$Rm)))]>;
// Extra precision multiplies with low / high results
let hasSideEffects = 0 in {
let isCommutable = 1 in {
-def t2SMULL : T2MulLong<0b000, 0b0000,
- (outs rGPR:$RdLo, rGPR:$RdHi),
- (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
- "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
-
-def t2UMULL : T2MulLong<0b010, 0b0000,
- (outs rGPR:$RdLo, rGPR:$RdHi),
- (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
- "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+def t2SMULL : T2MulLong<0b000, 0b0000, "smull", []>;
+def t2UMULL : T2MulLong<0b010, 0b0000, "umull", []>;
} // isCommutable
// Multiply + accumulate
-def t2SMLAL : T2MlaLong<0b100, 0b0000,
- (outs rGPR:$RdLo, rGPR:$RdHi),
- (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
- "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">;
-
-def t2UMLAL : T2MlaLong<0b110, 0b0000,
- (outs rGPR:$RdLo, rGPR:$RdHi),
- (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
- "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">;
-
-def t2UMAAL : T2MulLong<0b110, 0b0110,
- (outs rGPR:$RdLo, rGPR:$RdHi),
- (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
- "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
- Requires<[IsThumb2, HasDSP]>;
+def t2SMLAL : T2MlaLong<0b100, 0b0000, "smlal">;
+def t2UMLAL : T2MlaLong<0b110, 0b0000, "umlal">;
+def t2UMAAL : T2MlaLong<0b110, 0b0110, "umaal">, Requires<[IsThumb2, HasDSP]>;
} // hasSideEffects
// Rounding variants of the below included for disassembly only
// Most significant word multiply
-def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
- "smmul", "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[IsThumb2, HasDSP]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b101;
- let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
- let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
-}
-
-def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
- "smmulr", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]> {
+class T2SMMUL<bits<4> op7_4, string opc, list<dag> pattern>
+ : T2ThreeReg<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ opc, "\t$Rd, $Rn, $Rm", pattern>,
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
- let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+ let Inst{7-4} = op7_4;
}
+def t2SMMUL : T2SMMUL<0b0000, "smmul", [(set rGPR:$Rd, (mulhs rGPR:$Rn,
+ rGPR:$Rm))]>;
+def t2SMMULR : T2SMMUL<0b0001, "smmulr", []>;
-def t2SMMLA : T2FourReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
- "smmla", "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
+class T2FourRegSMMLA<bits<3> op22_20, bits<4> op7_4, string opc,
+ list<dag> pattern>
+ : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b101;
- let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
-}
-
-def t2SMMLAR: T2FourReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
- "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasDSP]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b101;
- let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
-}
-
-def t2SMMLS: T2FourReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
- "smmls", "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
- Requires<[IsThumb2, HasDSP, UseMulOps]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b110;
- let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
-}
-
-def t2SMMLSR:T2FourReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
- "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasDSP]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b110;
- let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
}
-multiclass T2I_smul<string opc, SDNode opnode> {
- def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
- !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
- (sext_inreg rGPR:$Rm, i16)))]>,
- Requires<[IsThumb2, HasDSP]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b001;
- let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
- let Inst{7-6} = 0b00;
- let Inst{5-4} = 0b00;
- }
-
- def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
- !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
- (sra rGPR:$Rm, (i32 16))))]>,
- Requires<[IsThumb2, HasDSP]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b001;
- let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
- let Inst{7-6} = 0b00;
- let Inst{5-4} = 0b01;
- }
-
- def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
- !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
- (sext_inreg rGPR:$Rm, i16)))]>,
- Requires<[IsThumb2, HasDSP]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b001;
- let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
- let Inst{7-6} = 0b00;
- let Inst{5-4} = 0b10;
- }
+def t2SMMLA : T2FourRegSMMLA<0b101, 0b0000, "smmla",
+ [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>;
+def t2SMMLAR: T2FourRegSMMLA<0b101, 0b0001, "smmlar", []>;
+def t2SMMLS: T2FourRegSMMLA<0b110, 0b0000, "smmls", []>;
+def t2SMMLSR: T2FourRegSMMLA<0b110, 0b0001, "smmlsr", []>;
- def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
- !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
- (sra rGPR:$Rm, (i32 16))))]>,
- Requires<[IsThumb2, HasDSP]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b001;
- let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
- let Inst{7-6} = 0b00;
- let Inst{5-4} = 0b11;
- }
-
- def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
- !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
- []>,
- Requires<[IsThumb2, HasDSP]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b011;
- let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
- let Inst{7-6} = 0b00;
- let Inst{5-4} = 0b00;
- }
-
- def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
- !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
- []>,
- Requires<[IsThumb2, HasDSP]> {
+class T2ThreeRegSMUL<bits<3> op22_20, bits<2> op5_4, string opc,
+ list<dag> pattern>
+ : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, opc,
+ "\t$Rd, $Rn, $Rm", pattern>,
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b011;
+ let Inst{22-20} = op22_20;
let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
let Inst{7-6} = 0b00;
- let Inst{5-4} = 0b01;
- }
-}
-
-
-multiclass T2I_smla<string opc, SDNode opnode> {
- def BB : T2FourReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
- !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (add rGPR:$Ra,
- (opnode (sext_inreg rGPR:$Rn, i16),
- (sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasDSP, UseMulOps]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b001;
- let Inst{7-6} = 0b00;
- let Inst{5-4} = 0b00;
- }
-
- def BT : T2FourReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
- !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
- (sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasDSP, UseMulOps]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b001;
- let Inst{7-6} = 0b00;
- let Inst{5-4} = 0b01;
- }
-
- def TB : T2FourReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
- !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
- (sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ let Inst{5-4} = op5_4;
+}
+
+def t2SMULBB : T2ThreeRegSMUL<0b001, 0b00, "smulbb",
+ [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16),
+ (sext_inreg rGPR:$Rm, i16)))]>;
+def t2SMULBT : T2ThreeRegSMUL<0b001, 0b01, "smulbt",
+ [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16),
+ (sra rGPR:$Rm, (i32 16))))]>;
+def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb",
+ [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
+ (sext_inreg rGPR:$Rm, i16)))]>;
+def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt",
+ [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
+ (sra rGPR:$Rm, (i32 16))))]>;
+def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", []>;
+def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", []>;
+
+def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn),
+ (t2SMULBB rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))),
+ (t2SMULBT rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm),
+ (t2SMULTB rGPR:$Rn, rGPR:$Rm)>;
+
+class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc,
+ list<dag> pattern>
+ : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMUL16,
+ opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b001;
+ let Inst{22-20} = op22_20;
let Inst{7-6} = 0b00;
- let Inst{5-4} = 0b10;
- }
-
- def TT : T2FourReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
- !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
- (sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasDSP, UseMulOps]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b001;
- let Inst{7-6} = 0b00;
- let Inst{5-4} = 0b11;
- }
-
- def WB : T2FourReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
- !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
- []>,
- Requires<[IsThumb2, HasDSP, UseMulOps]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b011;
- let Inst{7-6} = 0b00;
- let Inst{5-4} = 0b00;
- }
-
- def WT : T2FourReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
- !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
- []>,
- Requires<[IsThumb2, HasDSP, UseMulOps]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0110;
- let Inst{22-20} = 0b011;
- let Inst{7-6} = 0b00;
- let Inst{5-4} = 0b01;
- }
-}
-
-defm t2SMUL : T2I_smul<"smul", mul>;
-defm t2SMLA : T2I_smla<"smla", mul>;
+ let Inst{5-4} = op5_4;
+}
+
+def t2SMLABB : T2FourRegSMLA<0b001, 0b00, "smlabb",
+ [(set rGPR:$Rd, (add rGPR:$Ra,
+ (mul (sext_inreg rGPR:$Rn, i16),
+ (sext_inreg rGPR:$Rm, i16))))]>;
+def t2SMLABT : T2FourRegSMLA<0b001, 0b01, "smlabt",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sext_inreg rGPR:$Rn, i16),
+ (sra rGPR:$Rm, (i32 16)))))]>;
+def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
+ (sext_inreg rGPR:$Rm, i16))))]>;
+def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
+ (sra rGPR:$Rm, (i32 16)))))]>;
+def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", []>;
+def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", []>;
+
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)),
+ (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
+def : Thumb2DSPMulPat<(add rGPR:$Ra,
+ (mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16)))),
+ (t2SMLABT rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
+def : Thumb2DSPMulPat<(add rGPR:$Ra,
+ (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)),
+ (t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
+
+class T2SMLAL<bits<3> op22_20, bits<4> op7_4, string opc, list<dag> pattern>
+ : T2FourReg_mac<1, op22_20, op7_4,
+ (outs rGPR:$Ra, rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]>;
// Halfword multiple accumulate long: SMLAL<x><y>
-def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
- (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasDSP]>;
-def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
- (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasDSP]>;
-def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
- (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasDSP]>;
-def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
- (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasDSP]>;
-
-// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
-def t2SMUAD: T2ThreeReg_mac<
- 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
- IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]> {
- let Inst{15-12} = 0b1111;
-}
-def t2SMUADX:T2ThreeReg_mac<
- 0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
- IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]> {
+def t2SMLALBB : T2SMLAL<0b100, 0b1000, "smlalbb", []>;
+def t2SMLALBT : T2SMLAL<0b100, 0b1001, "smlalbt", []>;
+def t2SMLALTB : T2SMLAL<0b100, 0b1010, "smlaltb", []>;
+def t2SMLALTT : T2SMLAL<0b100, 0b1011, "smlaltt", []>;
+
+class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc>
+ : T2ThreeReg_mac<0, op22_20, op7_4,
+ (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
-def t2SMUSD: T2ThreeReg_mac<
- 0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
- IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]> {
- let Inst{15-12} = 0b1111;
-}
-def t2SMUSDX:T2ThreeReg_mac<
- 0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
- IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]> {
- let Inst{15-12} = 0b1111;
-}
-def t2SMLAD : T2FourReg_mac<
- 0, 0b010, 0b0000, (outs rGPR:$Rd),
- (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
- "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasDSP]>;
-def t2SMLADX : T2FourReg_mac<
- 0, 0b010, 0b0001, (outs rGPR:$Rd),
- (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
- "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasDSP]>;
-def t2SMLSD : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
- (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
- "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasDSP]>;
-def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
- (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
- "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasDSP]>;
-def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
- (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald",
- "\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]>;
-def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
- (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx",
- "\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]>;
-def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
- (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld",
- "\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]>;
-def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
- (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
- "\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]>;
+
+// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+def t2SMUAD: T2DualHalfMul<0b010, 0b0000, "smuad">;
+def t2SMUADX: T2DualHalfMul<0b010, 0b0001, "smuadx">;
+def t2SMUSD: T2DualHalfMul<0b100, 0b0000, "smusd">;
+def t2SMUSDX: T2DualHalfMul<0b100, 0b0001, "smusdx">;
+
+class T2DualHalfMulAdd<bits<3> op22_20, bits<4> op7_4, string opc>
+ : T2FourReg_mac<0, op22_20, op7_4,
+ (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra),
+ IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", []>,
+ Requires<[IsThumb2, HasDSP]>;
+
+def t2SMLAD : T2DualHalfMulAdd<0b010, 0b0000, "smlad">;
+def t2SMLADX : T2DualHalfMulAdd<0b010, 0b0001, "smladx">;
+def t2SMLSD : T2DualHalfMulAdd<0b100, 0b0000, "smlsd">;
+def t2SMLSDX : T2DualHalfMulAdd<0b100, 0b0001, "smlsdx">;
+
+class T2DualHalfMulAddLong<bits<3> op22_20, bits<4> op7_4, string opc>
+ : T2FourReg_mac<1, op22_20, op7_4,
+ (outs rGPR:$Ra, rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>,
+ Requires<[IsThumb2, HasDSP]>;
+
+def t2SMLALD : T2DualHalfMulAddLong<0b100, 0b1100, "smlald">;
+def t2SMLALDX : T2DualHalfMulAddLong<0b100, 0b1101, "smlaldx">;
+def t2SMLSLD : T2DualHalfMulAddLong<0b101, 0b1100, "smlsld">;
+def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">;
//===----------------------------------------------------------------------===//
// Division Instructions.
@@ -3545,7 +3381,9 @@ def t2B : T2I<(outs), (ins thumb_br_target:$target), IIC_Br,
}
let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
-def t2BR_JT : t2PseudoInst<(outs),
+
+// available in both v8-M.Baseline and Thumb2 targets
+def t2BR_JT : t2basePseudoInst<(outs),
(ins GPR:$target, GPR:$index, i32imm:$jt),
0, IIC_Br,
[(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt)]>,
@@ -3645,6 +3483,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
// Branch and Exchange Jazelle -- for disassembly only
// Rm = Inst{19-16}
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
def t2BXJ : T2I<(outs), (ins GPRnopc:$func), NoItinerary, "bxj", "\t$func", []>,
Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass]> {
bits<4> func;
@@ -3753,6 +3592,7 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt",
// Secure Monitor Call is a system instruction.
// Option = Inst{19-16}
+let isCall = 1, Uses = [SP] in
def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
[]>, Requires<[IsThumb2, HasTrustZone]> {
let Inst{31-27} = 0b11110;
@@ -3809,6 +3649,7 @@ def : t2InstAlias<"srsia${p} $mode", (t2SRSIA imm0_31:$mode, pred:$p)>;
def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>;
// Return From Exception is a system instruction.
+let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in
class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: T2I<oops, iops, itin, opc, asm, pattern>,
@@ -4568,7 +4409,7 @@ def : t2InstAlias<"ldrsh${p} $Rt, $addr",
(t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
def : t2InstAlias<"ldr${p} $Rt, $addr",
- (t2LDRpci GPRnopc:$Rt, t2ldrlabel:$addr, pred:$p)>;
+ (t2LDRpci GPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
def : t2InstAlias<"ldrb${p} $Rt, $addr",
(t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
def : t2InstAlias<"ldrh${p} $Rt, $addr",
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index e29d265ae3d1..e99048645685 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -624,7 +624,7 @@ def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
- [(set DPR:$Dd, (fextend SPR:$Sm))]> {
+ [(set DPR:$Dd, (fpextend SPR:$Sm))]> {
// Instruction operands.
bits<5> Dd;
bits<5> Sm;
@@ -641,7 +641,7 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
// Special case encoding: bits 11-8 is 0b1011.
def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (fround DPR:$Dm))]> {
+ [(set SPR:$Sd, (fpround DPR:$Dm))]> {
// Instruction operands.
bits<5> Sd;
bits<5> Dm;
@@ -838,7 +838,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
}
}
-defm VCVTA : vcvt_inst<"a", 0b00, frnd>;
+defm VCVTA : vcvt_inst<"a", 0b00, fround>;
defm VCVTN : vcvt_inst<"n", 0b01>;
defm VCVTP : vcvt_inst<"p", 0b10, fceil>;
defm VCVTM : vcvt_inst<"m", 0b11, ffloor>;
@@ -938,7 +938,7 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm,
Requires<[HasFPARMv8,HasDPVFP]>;
}
-defm VRINTA : vrint_inst_anpm<"a", 0b00, frnd>;
+defm VRINTA : vrint_inst_anpm<"a", 0b00, fround>;
defm VRINTN : vrint_inst_anpm<"n", 0b01>;
defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>;
defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
new file mode 100644
index 000000000000..2bdbe4fca3de
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -0,0 +1,109 @@
+//===- ARMInstructionSelector.cpp ----------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the InstructionSelector class for ARM.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstructionSelector.h"
+#include "ARMRegisterBankInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "arm-isel"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+ARMInstructionSelector::ARMInstructionSelector(const ARMSubtarget &STI,
+ const ARMRegisterBankInfo &RBI)
+ : InstructionSelector(), TII(*STI.getInstrInfo()),
+ TRI(*STI.getRegisterInfo()), RBI(RBI) {}
+
+static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ unsigned DstReg = I.getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ return true;
+
+ const RegisterBank *RegBank = RBI.getRegBank(DstReg, MRI, TRI);
+ (void)RegBank;
+ assert(RegBank && "Can't get reg bank for virtual register");
+
+ const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
+ (void)DstSize;
+ unsigned SrcReg = I.getOperand(1).getReg();
+ const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+ (void)SrcSize;
+ assert((DstSize == SrcSize ||
+ // Copies are a means to setup initial types, the number of
+ // bits may not exactly match.
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ DstSize <= SrcSize)) &&
+ "Copy with different width?!");
+
+ assert(RegBank->getID() == ARM::GPRRegBankID && "Unsupported reg bank");
+ const TargetRegisterClass *RC = &ARM::GPRRegClass;
+
+ // No need to constrain SrcReg. It will get constrained when
+ // we hit another of its uses or its defs.
+ // Copies do not have constraints.
+ if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
+ DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
+ << " operand\n");
+ return false;
+ }
+ return true;
+}
+
+bool ARMInstructionSelector::select(MachineInstr &I) const {
+ assert(I.getParent() && "Instruction should be in a basic block!");
+ assert(I.getParent()->getParent() && "Instruction should be in a function!");
+
+ auto &MBB = *I.getParent();
+ auto &MF = *MBB.getParent();
+ auto &MRI = MF.getRegInfo();
+
+ if (!isPreISelGenericOpcode(I.getOpcode())) {
+ if (I.isCopy())
+ return selectCopy(I, TII, MRI, TRI, RBI);
+
+ return true;
+ }
+
+ MachineInstrBuilder MIB{MF, I};
+
+ using namespace TargetOpcode;
+ switch (I.getOpcode()) {
+ case G_ADD:
+ I.setDesc(TII.get(ARM::ADDrr));
+ AddDefaultCC(AddDefaultPred(MIB));
+ break;
+ case G_FRAME_INDEX:
+ // Add 0 to the given frame index and hope it will eventually be folded into
+ // the user(s).
+ I.setDesc(TII.get(ARM::ADDri));
+ AddDefaultCC(AddDefaultPred(MIB.addImm(0)));
+ break;
+ case G_LOAD:
+ I.setDesc(TII.get(ARM::LDRi12));
+ AddDefaultPred(MIB.addImm(0));
+ break;
+ default:
+ return false;
+ }
+
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h
new file mode 100644
index 000000000000..5072cdd60ce4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h
@@ -0,0 +1,39 @@
+//===- ARMInstructionSelector ------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the InstructionSelector class for ARM.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H
+#define LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H
+
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+
+namespace llvm {
+class ARMBaseInstrInfo;
+class ARMBaseRegisterInfo;
+class ARMBaseTargetMachine;
+class ARMRegisterBankInfo;
+class ARMSubtarget;
+
+class ARMInstructionSelector : public InstructionSelector {
+public:
+ ARMInstructionSelector(const ARMSubtarget &STI,
+ const ARMRegisterBankInfo &RBI);
+
+ virtual bool select(MachineInstr &I) const override;
+
+private:
+ const ARMBaseInstrInfo &TII;
+ const ARMBaseRegisterInfo &TRI;
+ const ARMRegisterBankInfo &RBI;
+};
+
+} // End llvm namespace.
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
new file mode 100644
index 000000000000..255ea4bc7198
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -0,0 +1,44 @@
+//===- ARMLegalizerInfo.cpp --------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the Machinelegalizer class for ARM.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "ARMLegalizerInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Target/TargetOpcodes.h"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+ARMLegalizerInfo::ARMLegalizerInfo() {
+ using namespace TargetOpcode;
+
+ const LLT p0 = LLT::pointer(0, 32);
+
+ const LLT s8 = LLT::scalar(8);
+ const LLT s16 = LLT::scalar(16);
+ const LLT s32 = LLT::scalar(32);
+
+ setAction({G_FRAME_INDEX, p0}, Legal);
+
+ setAction({G_LOAD, s32}, Legal);
+ setAction({G_LOAD, 1, p0}, Legal);
+
+ for (auto Ty : {s8, s16, s32})
+ setAction({G_ADD, Ty}, Legal);
+
+ computeTables();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
new file mode 100644
index 000000000000..ca3eea81271b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
@@ -0,0 +1,29 @@
+//===- ARMLegalizerInfo ------------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the Machinelegalizer class for ARM.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H
+#define LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+namespace llvm {
+
+class LLVMContext;
+
+/// This class provides the information for the target register banks.
+class ARMLegalizerInfo : public LegalizerInfo {
+public:
+ ARMLegalizerInfo();
+};
+} // End llvm namespace.
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 62d57f3f4986..48ab491b5be9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -95,12 +95,10 @@ namespace {
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
- const char *getPassName() const override {
- return ARM_LOAD_STORE_OPT_NAME;
- }
+ StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
private:
/// A set of load/store MachineInstrs with same base register sorted by
@@ -562,7 +560,7 @@ void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
MachineBasicBlock::const_iterator Before) {
// Initialize if we never queried in this block.
if (!LiveRegsValid) {
- LiveRegs.init(TRI);
+ LiveRegs.init(*TRI);
LiveRegs.addLiveOuts(MBB);
LiveRegPos = MBB.end();
LiveRegsValid = true;
@@ -834,7 +832,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
assert(MO.isImplicit());
unsigned DefReg = MO.getReg();
- if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
+ if (is_contained(ImpDefs, DefReg))
continue;
// We can ignore cases where the super-reg is read and written.
if (MI->readsRegister(DefReg))
@@ -1851,7 +1849,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
if (MBB.empty()) return false;
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- if (MBBI != MBB.begin() &&
+ if (MBBI != MBB.begin() && MBBI != MBB.end() &&
(MBBI->getOpcode() == ARM::BX_RET ||
MBBI->getOpcode() == ARM::tBX_RET ||
MBBI->getOpcode() == ARM::MOVPCLR)) {
@@ -1953,7 +1951,7 @@ namespace {
bool runOnMachineFunction(MachineFunction &Fn) override;
- const char *getPassName() const override {
+ StringRef getPassName() const override {
return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
index 7429acdb09ad..293a527b09e8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -21,6 +21,12 @@
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCStreamer.h"
using namespace llvm;
@@ -85,6 +91,8 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = GetSymbolRef(MO, GetJTISymbol(MO.getIndex()));
break;
case MachineOperand::MO_ConstantPoolIndex:
+ if (Subtarget->genExecuteOnly())
+ llvm_unreachable("execute-only should not generate constant pools");
MCOp = GetSymbolRef(MO, GetCPISymbol(MO.getIndex()));
break;
case MachineOperand::MO_BlockAddress:
@@ -93,7 +101,7 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
case MachineOperand::MO_FPImmediate: {
APFloat Val = MO.getFPImm()->getValueAPF();
bool ignored;
- Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &ignored);
MCOp = MCOperand::createFPImm(Val.convertToDouble());
break;
}
@@ -150,3 +158,106 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
}
}
}
+
+void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
+{
+ if (MI.getParent()->getParent()->getInfo<ARMFunctionInfo>()
+ ->isThumbFunction())
+ {
+ MI.emitError("An attempt to perform XRay instrumentation for a"
+ " Thumb function (not supported). Detected when emitting a sled.");
+ return;
+ }
+ static const int8_t NoopsInSledCount = 6;
+ // We want to emit the following pattern:
+ //
+ // .Lxray_sled_N:
+ // ALIGN
+ // B #20
+ // ; 6 NOP instructions (24 bytes)
+ // .tmpN
+ //
+ // We need the 24 bytes (6 instructions) because at runtime, we'd be patching
+ // over the full 28 bytes (7 instructions) with the following pattern:
+ //
+ // PUSH{ r0, lr }
+ // MOVW r0, #<lower 16 bits of function ID>
+ // MOVT r0, #<higher 16 bits of function ID>
+ // MOVW ip, #<lower 16 bits of address of __xray_FunctionEntry/Exit>
+ // MOVT ip, #<higher 16 bits of address of __xray_FunctionEntry/Exit>
+ // BLX ip
+ // POP{ r0, lr }
+ //
+ OutStreamer->EmitCodeAlignment(4);
+ auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+ OutStreamer->EmitLabel(CurSled);
+ auto Target = OutContext.createTempSymbol();
+
+ // Emit "B #20" instruction, which jumps over the next 24 bytes (because
+ // register pc is 8 bytes ahead of the jump instruction by the moment CPU
+ // is executing it).
+ // By analogy to ARMAsmPrinter::emitPseudoExpansionLowering() |case ARM::B|.
+ // It is not clear why |addReg(0)| is needed (the last operand).
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc).addImm(20)
+ .addImm(ARMCC::AL).addReg(0));
+
+ MCInst Noop;
+ Subtarget->getInstrInfo()->getNoopForElfTarget(Noop);
+ for (int8_t I = 0; I < NoopsInSledCount; I++)
+ {
+ OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
+ }
+
+ OutStreamer->EmitLabel(Target);
+ recordSled(CurSled, MI, Kind);
+}
+
+void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI)
+{
+ EmitSled(MI, SledKind::FUNCTION_ENTER);
+}
+
+void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI)
+{
+ EmitSled(MI, SledKind::FUNCTION_EXIT);
+}
+
+void ARMAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
+{
+ EmitSled(MI, SledKind::TAIL_CALL);
+}
+
+void ARMAsmPrinter::EmitXRayTable()
+{
+ if (Sleds.empty())
+ return;
+
+ MCSection *Section = nullptr;
+ if (Subtarget->isTargetELF()) {
+ Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_GROUP |
+ ELF::SHF_MERGE,
+ 0, CurrentFnSym->getName());
+ } else if (Subtarget->isTargetMachO()) {
+ Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
+ SectionKind::getReadOnlyWithRel());
+ } else {
+ llvm_unreachable("Unsupported target");
+ }
+
+ auto PrevSection = OutStreamer->getCurrentSectionOnly();
+ OutStreamer->SwitchSection(Section);
+ for (const auto &Sled : Sleds) {
+ OutStreamer->EmitSymbolValue(Sled.Sled, 4);
+ OutStreamer->EmitSymbolValue(CurrentFnSym, 4);
+ auto Kind = static_cast<uint8_t>(Sled.Kind);
+ OutStreamer->EmitBytes(
+ StringRef(reinterpret_cast<const char *>(&Kind), 1));
+ OutStreamer->EmitBytes(
+ StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
+ OutStreamer->EmitZeros(6);
+ }
+ OutStreamer->SwitchSection(PrevSection);
+
+ Sleds.clear();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index b6dee9ff8385..50d8f0941460 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -11,14 +11,14 @@
using namespace llvm;
-void ARMFunctionInfo::anchor() { }
+void ARMFunctionInfo::anchor() {}
ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
: isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()),
- StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false),
- RestoreSPFromFP(false), LRSpilledForFarJump(false),
+ StByValParamsPadding(0), ArgRegsSaveSize(0), ReturnRegsCount(0),
+ HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
- GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
- ArgumentStackSize(0), IsSplitCSR(false) {}
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), PICLabelUId(0),
+ VarArgsFrameIndex(0), HasITBlocks(false), ArgumentStackSize(0),
+ IsSplitCSR(false), PromotedGlobalsIncrease(0) {}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index f71497240ff3..8c485e89bf54 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -121,6 +121,12 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// copies.
bool IsSplitCSR;
+ /// Globals that have had their storage promoted into the constant pool.
+ SmallPtrSet<const GlobalVariable*,2> PromotedGlobals;
+
+ /// The amount the literal pool has been increasedby due to promoted globals.
+ int PromotedGlobalsIncrease;
+
public:
ARMFunctionInfo() :
isThumb(false),
@@ -131,7 +137,8 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
NumAlignedDPRCS2Regs(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false) {}
+ VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false),
+ PromotedGlobalsIncrease(0) {}
explicit ARMFunctionInfo(MachineFunction &MF);
@@ -226,6 +233,22 @@ public:
}
return It;
}
+
+ /// Indicate to the backend that \c GV has had its storage changed to inside
+ /// a constant pool. This means it no longer needs to be emitted as a
+ /// global variable.
+ void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV) {
+ PromotedGlobals.insert(GV);
+ }
+ SmallPtrSet<const GlobalVariable*, 2>& getGlobalsPromotedToConstantPool() {
+ return PromotedGlobals;
+ }
+ int getPromotedConstpoolIncrease() const {
+ return PromotedGlobalsIncrease;
+ }
+ void setPromotedConstpoolIncrease(int Sz) {
+ PromotedGlobalsIncrease = Sz;
+ }
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index 73dcb9641b61..581d5fe159fd 100644
--- a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -29,12 +29,10 @@ public:
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
- const char *getPassName() const override {
- return "optimise barriers pass";
- }
+ StringRef getPassName() const override { return "optimise barriers pass"; }
};
char ARMOptimizeBarriersPass::ID = 0;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
new file mode 100644
index 000000000000..9bd036a1eace
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -0,0 +1,127 @@
+//===- ARMRegisterBankInfo.cpp -----------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the RegisterBankInfo class for ARM.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "ARMRegisterBankInfo.h"
+#include "ARMInstrInfo.h" // For the register classes
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+// FIXME: TableGen this.
+// If it grows too much and TableGen still isn't ready to do the job, extract it
+// into an ARMGenRegisterBankInfo.def (similar to AArch64).
+namespace llvm {
+namespace ARM {
+RegisterBank GPRRegBank;
+RegisterBank *RegBanks[] = {&GPRRegBank};
+
+RegisterBankInfo::PartialMapping GPRPartialMapping{0, 32, GPRRegBank};
+
+RegisterBankInfo::ValueMapping ValueMappings[] = {
+ {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}};
+} // end namespace arm
+} // end namespace llvm
+
+ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
+ : RegisterBankInfo(ARM::RegBanks, ARM::NumRegisterBanks) {
+ static bool AlreadyInit = false;
+ // We have only one set of register banks, whatever the subtarget
+ // is. Therefore, the initialization of the RegBanks table should be
+ // done only once. Indeed the table of all register banks
+ // (ARM::RegBanks) is unique in the compiler. At some point, it
+ // will get tablegen'ed and the whole constructor becomes empty.
+ if (AlreadyInit)
+ return;
+ AlreadyInit = true;
+
+ // Initialize the GPR bank.
+ createRegisterBank(ARM::GPRRegBankID, "GPRB");
+
+ addRegBankCoverage(ARM::GPRRegBankID, ARM::GPRRegClassID, TRI);
+ addRegBankCoverage(ARM::GPRRegBankID, ARM::GPRwithAPSRRegClassID, TRI);
+ const RegisterBank &RBGPR = getRegBank(ARM::GPRRegBankID);
+ (void)RBGPR;
+ assert(&ARM::GPRRegBank == &RBGPR && "The order in RegBanks is messed up");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRwithAPSRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRnopcRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::rGPRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::tcGPRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.getSize() == 32 && "GPRs should hold up to 32-bit");
+}
+
+const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass(
+ const TargetRegisterClass &RC) const {
+ using namespace ARM;
+
+ switch (RC.getID()) {
+ case GPRRegClassID:
+ case tGPR_and_tcGPRRegClassID:
+ return getRegBank(ARM::GPRRegBankID);
+ default:
+ llvm_unreachable("Unsupported register kind");
+ }
+
+ llvm_unreachable("Switch should handle all register classes");
+}
+
+RegisterBankInfo::InstructionMapping
+ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+ auto Opc = MI.getOpcode();
+
+ // Try the default logic for non-generic instructions that are either copies
+ // or already have some operands assigned to banks.
+ if (!isPreISelGenericOpcode(Opc)) {
+ InstructionMapping Mapping = getInstrMappingImpl(MI);
+ if (Mapping.isValid())
+ return Mapping;
+ }
+
+ using namespace TargetOpcode;
+
+ unsigned NumOperands = MI.getNumOperands();
+ const ValueMapping *OperandsMapping = &ARM::ValueMappings[0];
+
+ switch (Opc) {
+ case G_ADD:
+ case G_LOAD:
+ // FIXME: We're abusing the fact that everything lives in a GPR for now; in
+ // the real world we would use different mappings.
+ OperandsMapping = &ARM::ValueMappings[0];
+ break;
+ case G_FRAME_INDEX:
+ OperandsMapping = getOperandsMapping({&ARM::ValueMappings[0], nullptr});
+ break;
+ default:
+ return InstructionMapping{};
+ }
+
+ return InstructionMapping{DefaultMappingID, /*Cost=*/1, OperandsMapping,
+ NumOperands};
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
new file mode 100644
index 000000000000..773920ee57a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
@@ -0,0 +1,41 @@
+//===- ARMRegisterBankInfo ---------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the RegisterBankInfo class for ARM.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMREGISTERBANKINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMREGISTERBANKINFO_H
+
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+
+namespace llvm {
+
+class TargetRegisterInfo;
+
+namespace ARM {
+enum {
+ GPRRegBankID = 0, // General purpose registers
+ NumRegisterBanks,
+};
+} // end namespace ARM
+
+/// This class provides the information for the target register banks.
+class ARMRegisterBankInfo final : public RegisterBankInfo {
+public:
+ ARMRegisterBankInfo(const TargetRegisterInfo &TRI);
+
+ const RegisterBank &
+ getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
+
+ InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
+};
+} // End llvm namespace.
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
index 47a99313025c..b7d2d34614df 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSchedule.td
+++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
@@ -364,3 +364,4 @@ include "ARMScheduleV6.td"
include "ARMScheduleA8.td"
include "ARMScheduleA9.td"
include "ARMScheduleSwift.td"
+include "ARMScheduleR52.td"
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td b/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td
new file mode 100644
index 000000000000..1b40742a093b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td
@@ -0,0 +1,983 @@
+//==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The Cortex-R52 is an in-order pipelined superscalar microprocessor with
+// a 8 stage pipeline. It can issue maximum two instructions in each cycle.
+// There are two ALUs, one LDST, one MUL and a non-pipelined integer DIV.
+// A number of forwarding paths enable results of computations to be input
+// to subsequent operations before they are written to registers.
+// This scheduler is a MachineScheduler. See TargetSchedule.td for details.
+
+def CortexR52Model : SchedMachineModel {
+ let MicroOpBufferSize = 0; // R52 is in-order processor
+ let IssueWidth = 2; // 2 micro-ops dispatched per cycle
+ let LoadLatency = 1; // Optimistic, assuming no misses
+ let MispredictPenalty = 8; // A branch direction mispredict, including PFU
+ let PostRAScheduler = 1; // Enable PostRA scheduler pass.
+ let CompleteModel = 0; // Covers instructions applicable to cortex-r52.
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
+// Cortex-R52 is an in-order processor.
+
+def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
+def R52UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
+def R52UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
+def R52UnitLd : ProcResource<1> { let BufferSize = 0; } // Load/Store
+def R52UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
+def R52UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
+def R52UnitFPMUL : ProcResource<2> { let BufferSize = 0; } // FP MUL
+def R52UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP DIV
+
+// Cortex-R52 specific SchedReads
+def R52Read_ISS : SchedRead;
+def R52Read_EX1 : SchedRead;
+def R52Read_EX2 : SchedRead;
+def R52Read_WRI : SchedRead;
+def R52Read_F0 : SchedRead; // F0 maps to ISS stage of integer pipe
+def R52Read_F1 : SchedRead;
+def R52Read_F2 : SchedRead;
+
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types which map ProcResources and set latency.
+
+let SchedModel = CortexR52Model in {
+
+// ALU - Write occurs in Late EX2 (independent of whether shift was required)
+def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
+def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
+def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
+def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
+
+// Compares
+def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
+def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
+def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
+
+// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
+def : WriteRes<WriteDiv, [R52UnitDiv]> {
+ let Latency = 8; let ResourceCycles = [8]; // not pipelined
+}
+
+// Loads
+def : WriteRes<WriteLd, [R52UnitLd]> { let Latency = 4; }
+def : WriteRes<WritePreLd, [R52UnitLd]> { let Latency = 4; }
+
+// Branches - LR written in Late EX2
+def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
+def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
+def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
+
+// Misc
+def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
+def : WriteRes<WriteCvtFP, [R52UnitALU]> { let Latency = 3; }
+
+def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
+def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
+
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedReadWrites.
+
+// Forwarding information - based on when an operand is read
+def : ReadAdvance<R52Read_ISS, 0>;
+def : ReadAdvance<R52Read_EX1, 1>;
+def : ReadAdvance<R52Read_EX2, 2>;
+def : ReadAdvance<R52Read_F0, 0>;
+def : ReadAdvance<R52Read_F1, 1>;
+def : ReadAdvance<R52Read_F2, 2>;
+
+
+// Cortex-R52 specific SchedWrites for use with InstRW
+def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
+def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
+ let Latency = 8; let ResourceCycles = [8]; // not pipelined
+}
+def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
+def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
+def R52WriteAdr : SchedWriteRes<[]> { let Latency = 0; }
+def R52WriteCC : SchedWriteRes<[]> { let Latency = 0; }
+def R52WriteALU_EX1 : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
+def R52WriteALU_EX2 : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
+def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
+
+def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
+def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
+
+def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
+def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
+ let Latency = 4;
+}
+def R52WriteFPALU_F4 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
+def R52Write2FPALU_F4 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
+ let Latency = 5;
+}
+def R52WriteFPALU_F5 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
+def R52Write2FPALU_F5 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
+ let Latency = 6;
+}
+def R52WriteFPMUL_F5 : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
+def R52Write2FPMUL_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
+ let Latency = 6;
+}
+def R52WriteFPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
+ let Latency = 11; // as it is internally two insns (MUL then ADD)
+}
+def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
+ R52UnitFPALU, R52UnitFPALU]> {
+ let Latency = 11;
+}
+
+def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
+def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
+
+def R52WriteFPDIV_SP : SchedWriteRes<[R52UnitFPDIV]> {
+ let Latency = 7; // FP div takes fixed #cycles
+ let ResourceCycles = [7]; // is not pipelined
+ }
+def R52WriteFPDIV_DP : SchedWriteRes<[R52UnitFPDIV]> {
+ let Latency = 17;
+ let ResourceCycles = [17];
+}
+
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific - map operands to SchedReadWrites
+
+def : InstRW<[WriteALU], (instrs COPY)>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
+ (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
+ "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
+
+def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
+ (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi",
+ "t2MOVi", "t2MOV_ga_dyn")>;
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
+ (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel")>;
+def : InstRW<[R52WriteLd,R52Read_ISS],
+ (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
+
+def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
+ (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
+ "(t|t2)UBFX", "(t|t2)SBFX")>;
+
+// Saturating arithmetic
+def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
+ (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
+ "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
+ "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
+ "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
+ "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
+ "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
+
+// Parallel arithmetic
+def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
+ (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
+ "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
+ "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
+ "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
+
+// Flag setting.
+def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
+ (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
+ "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
+ "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
+ "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
+ "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
+ "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
+
+// Sum of Absolute Difference
+def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
+ (instregex "USAD8", "t2USAD8", "tUSAD8","USADA8", "t2USADA8", "tUSADA8") >;
+
+// Integer Multiply
+def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
+ (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
+ "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
+ "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
+ "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
+
+// Multiply Accumulate
+// Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
+// The store pipeline is used partly for 64-bit operations.
+def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
+ (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
+ "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
+ "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
+ "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
+ "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
+ "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
+ "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
+ "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
+ "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
+ "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
+ "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB",
+ "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
+ "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
+
+def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
+ (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
+
+// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
+// However, that's non-trivial to specify, so we keep it uniform
+def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
+ (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
+ "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
+ "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
+ "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
+ "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
+ "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
+def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
+ (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
+ "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
+ "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
+ "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
+ "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T",
+ "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
+def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri","ANDS?ri",
+ "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
+ "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
+ "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
+ "ANDS?rr", "BICS?rr", "CRC*", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
+ "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
+ "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
+ "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
+ (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
+ "ORRrsrr", "RSBrsr", "RSCrsr", "SBCrsr")>;
+
+def : InstRW<[R52WriteALU_EX1],
+ (instregex "ADR", "MOVSi", "MOVSsi", "MOVST?i16*", "MVNS?s?i", "t2MOVS?si")>;
+
+def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
+def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
+ (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
+
+def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
+def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
+def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
+def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
+ (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
+
+def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
+
+def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
+def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
+
+//def : InstRW<[R52WriteLd, R52Read_ISS], (instregex "^LDRB?(_PRE_IMM|_POST_IMM)", "LDRrs")>;
+//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_PRE_REG", "LDRB?rr")>;
+//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_POST_REG")>;
+
+//def : InstRW<[R52WriteST, R52Read_ISS], (instregex "STRi12", "PICSTR")>;
+//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_PRE_REG", "STRB?_PRE_REG")>;
+//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_POST_REG", "STRB?_POST_REG")>;
+
+
+// Integer Load, Multiple.
+foreach Lat = 3-25 in {
+ def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
+ let Latency = Lat;
+ }
+ def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
+ let Latency = Lat;
+ let NumMicroOps = 0;
+ }
+}
+foreach NAddr = 1-16 in {
+ def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
+}
+def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
+def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
+def R52WriteILDM : SchedWriteVariant<[
+ SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
+
+ SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6Cy]>,
+ SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6Cy, R52WriteILDM7Cy]>,
+
+ SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6Cy, R52WriteILDM7Cy,
+ R52WriteILDM8Cy]>,
+ SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6Cy, R52WriteILDM7Cy,
+ R52WriteILDM8Cy, R52WriteILDM9Cy]>,
+
+ SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6Cy, R52WriteILDM7Cy,
+ R52WriteILDM8Cy, R52WriteILDM9Cy,
+ R52WriteILDM10Cy]>,
+ SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6Cy, R52WriteILDM7Cy,
+ R52WriteILDM8Cy, R52WriteILDM9Cy,
+ R52WriteILDM10Cy, R52WriteILDM11Cy]>,
+
+ SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6Cy, R52WriteILDM7Cy,
+ R52WriteILDM8Cy, R52WriteILDM9Cy,
+ R52WriteILDM10Cy, R52WriteILDM11Cy,
+ R52WriteILDM12Cy]>,
+ SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6Cy, R52WriteILDM7Cy,
+ R52WriteILDM8Cy, R52WriteILDM9Cy,
+ R52WriteILDM10Cy, R52WriteILDM11Cy,
+ R52WriteILDM12Cy, R52WriteILDM13Cy]>,
+
+ SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6Cy, R52WriteILDM7Cy,
+ R52WriteILDM8Cy, R52WriteILDM9Cy,
+ R52WriteILDM10Cy, R52WriteILDM11Cy,
+ R52WriteILDM12Cy, R52WriteILDM13Cy,
+ R52WriteILDM14Cy]>,
+ SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6Cy, R52WriteILDM7Cy,
+ R52WriteILDM8Cy, R52WriteILDM9Cy,
+ R52WriteILDM10Cy, R52WriteILDM11Cy,
+ R52WriteILDM12Cy, R52WriteILDM13Cy,
+ R52WriteILDM14Cy, R52WriteILDM15Cy]>,
+
+ SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6Cy, R52WriteILDM7Cy,
+ R52WriteILDM8Cy, R52WriteILDM9Cy,
+ R52WriteILDM10Cy, R52WriteILDM11Cy,
+ R52WriteILDM12Cy, R52WriteILDM13Cy,
+ R52WriteILDM14Cy, R52WriteILDM15Cy,
+ R52WriteILDM16Cy]>,
+ SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6Cy, R52WriteILDM7Cy,
+ R52WriteILDM8Cy, R52WriteILDM9Cy,
+ R52WriteILDM10Cy, R52WriteILDM11Cy,
+ R52WriteILDM12Cy, R52WriteILDM13Cy,
+ R52WriteILDM14Cy, R52WriteILDM15Cy,
+ R52WriteILDM16Cy, R52WriteILDM17Cy]>,
+
+ SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6Cy, R52WriteILDM7Cy,
+ R52WriteILDM8Cy, R52WriteILDM9Cy,
+ R52WriteILDM10Cy, R52WriteILDM11Cy,
+ R52WriteILDM12Cy, R52WriteILDM13Cy,
+ R52WriteILDM14Cy, R52WriteILDM15Cy,
+ R52WriteILDM16Cy, R52WriteILDM17Cy,
+ R52WriteILDM18Cy]>,
+ SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6Cy, R52WriteILDM7Cy,
+ R52WriteILDM8Cy, R52WriteILDM9Cy,
+ R52WriteILDM10Cy, R52WriteILDM11Cy,
+ R52WriteILDM12Cy, R52WriteILDM13Cy,
+ R52WriteILDM14Cy, R52WriteILDM15Cy,
+ R52WriteILDM16Cy, R52WriteILDM17Cy,
+ R52WriteILDM18Cy, R52WriteILDM19Cy]>,
+
+// Unknown number of registers, just use resources for two registers.
+ SchedVar<NoSchedPred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+ R52WriteILDM6CyNo, R52WriteILDM7CyNo,
+ R52WriteILDM8CyNo, R52WriteILDM9CyNo,
+ R52WriteILDM10CyNo, R52WriteILDM11CyNo,
+ R52WriteILDM12CyNo, R52WriteILDM13CyNo,
+ R52WriteILDM14CyNo, R52WriteILDM15CyNo,
+ R52WriteILDM16CyNo, R52WriteILDM17CyNo,
+ R52WriteILDM18Cy, R52WriteILDM19Cy]>
+]> { let Variadic=1; }
+
+// Integer Store, Multiple
+def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+foreach NumAddr = 1-16 in {
+ def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
+}
+def R52WriteISTM : SchedWriteVariant<[
+ SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
+ SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
+ SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
+ SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
+ SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
+ SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
+ SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
+ SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
+ SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
+ SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
+ SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
+ SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
+ SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
+ SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
+ SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
+ // Unknow number of registers, just use resources for two registers.
+ SchedVar<NoSchedPred, [R52WriteISTM2]>
+]>;
+
+def : InstRW<[R52WriteILDM, R52Read_ISS],
+ (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
+ "(t|sys)LDM(IA|DA|DB|IB)$")>;
+def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
+ (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
+def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
+ (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
+
+// Integer Store, Single Element
+def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
+ (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
+ "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
+ "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
+
+def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
+ (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
+ "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
+ "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
+ "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
+
+// Integer Store, Dual
+def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
+ (instregex "STRD$", "t2STRDi8", "STL", "t2STRD$", "t2STL")>;
+def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
+ (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
+
+def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
+ (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
+def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
+ (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
+ "PUSH", "tPUSH")>;
+
+// LDRLIT pseudo instructions, they expand to LDR + PICADD
+def : InstRW<[R52WriteLd],
+ (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
+// LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
+def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
+
+
+
+//===----------------------------------------------------------------------===//
+// VFP, Floating Point Support
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
+
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
+
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
+
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
+
+def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>;
+def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>;
+
+def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1],
+ (instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>;
+
+def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
+def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
+
+
+//===----------------------------------------------------------------------===//
+// Neon Support
+
+// vector multiple load stores
+foreach NumAddr = 1-16 in {
+ def R52LMAddrPred#NumAddr :
+ SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
+}
+foreach Lat = 1-32 in {
+ def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
+ let Latency = Lat;
+ }
+}
+foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
+ def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 0;
+ let NumMicroOps = Num;
+ let ResourceCycles = [Num];
+ }
+}
+def R52WriteVLDM : SchedWriteVariant<[
+ // 1 D reg
+ SchedVar<R52LMAddrPred1, [R52WriteLM5Cy,
+ R52ReserveLd5Cy]>,
+ SchedVar<R52LMAddrPred2, [R52WriteLM5Cy,
+ R52ReserveLd5Cy]>,
+
+ // 2 D reg
+ SchedVar<R52LMAddrPred3, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52ReserveLd6Cy]>,
+ SchedVar<R52LMAddrPred4, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52ReserveLd6Cy]>,
+
+ // 3 D reg
+ SchedVar<R52LMAddrPred5, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52WriteLM7Cy,
+ R52ReserveLd4Cy]>,
+ SchedVar<R52LMAddrPred6, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52WriteLM7Cy,
+ R52ReserveLd7Cy]>,
+
+ // 4 D reg
+ SchedVar<R52LMAddrPred7, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52WriteLM7Cy, R52WriteLM8Cy,
+ R52ReserveLd8Cy]>,
+ SchedVar<R52LMAddrPred8, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52WriteLM7Cy, R52WriteLM8Cy,
+ R52ReserveLd8Cy]>,
+
+ // 5 D reg
+ SchedVar<R52LMAddrPred9, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52WriteLM7Cy, R52WriteLM8Cy,
+ R52WriteLM9Cy,
+ R52ReserveLd9Cy]>,
+ SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52WriteLM7Cy, R52WriteLM8Cy,
+ R52WriteLM9Cy,
+ R52ReserveLd9Cy]>,
+
+ // 6 D reg
+ SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52WriteLM7Cy, R52WriteLM8Cy,
+ R52WriteLM9Cy, R52WriteLM10Cy,
+ R52ReserveLd10Cy]>,
+ SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52WriteLM7Cy, R52WriteLM8Cy,
+ R52WriteLM9Cy, R52WriteLM10Cy,
+ R52ReserveLd10Cy]>,
+
+ // 7 D reg
+ SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52WriteLM7Cy, R52WriteLM8Cy,
+ R52WriteLM9Cy, R52WriteLM10Cy,
+ R52WriteLM11Cy,
+ R52ReserveLd11Cy]>,
+ SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52WriteLM7Cy, R52WriteLM8Cy,
+ R52WriteLM9Cy, R52WriteLM10Cy,
+ R52WriteLM11Cy,
+ R52ReserveLd11Cy]>,
+
+ // 8 D reg
+ SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52WriteLM7Cy, R52WriteLM8Cy,
+ R52WriteLM9Cy, R52WriteLM10Cy,
+ R52WriteLM11Cy, R52WriteLM12Cy,
+ R52ReserveLd12Cy]>,
+ SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52WriteLM7Cy, R52WriteLM8Cy,
+ R52WriteLM9Cy, R52WriteLM10Cy,
+ R52WriteLM11Cy, R52WriteLM12Cy,
+ R52ReserveLd12Cy]>,
+ // unknown number of reg.
+ SchedVar<NoSchedPred, [R52WriteLM5Cy, R52WriteLM6Cy,
+ R52WriteLM7Cy, R52WriteLM8Cy,
+ R52WriteLM9Cy, R52WriteLM10Cy,
+ R52WriteLM11Cy, R52WriteLM12Cy,
+ R52ReserveLd5Cy]>
+]> { let Variadic=1;}
+
+// variable stores. Cannot dual-issue
+def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1];
+}
+def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2];
+}
+def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 7;
+ let NumMicroOps = 6;
+ let ResourceCycles = [3];
+}
+def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+ let ResourceCycles = [4];
+}
+def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 9;
+ let NumMicroOps = 10;
+ let ResourceCycles = [5];
+}
+def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 10;
+ let NumMicroOps = 12;
+ let ResourceCycles = [6];
+}
+def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 11;
+ let NumMicroOps = 14;
+ let ResourceCycles = [7];
+}
+def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 12;
+ let NumMicroOps = 16;
+ let ResourceCycles = [8];
+}
+def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 13;
+ let NumMicroOps = 18;
+ let ResourceCycles = [9];
+}
+def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 14;
+ let NumMicroOps = 20;
+ let ResourceCycles = [10];
+}
+def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 15;
+ let NumMicroOps = 22;
+ let ResourceCycles = [11];
+}
+
+def R52WriteSTM : SchedWriteVariant<[
+ SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
+ SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
+ SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
+ SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
+ SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
+ SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
+ SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
+ SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
+ SchedVar<R52LMAddrPred9, [R52WriteSTM9]>,
+ SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
+ SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
+ SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
+ SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
+ SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
+ SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
+ SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
+ // unknown number of registers, just use resources for two
+ SchedVar<NoSchedPred, [R52WriteSTM6]>
+]>;
+
+// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
+// another instruction in slot-1, but only in the last issue.
+def R52WriteVLD1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5;}
+def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2];
+}
+def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [3];
+}
+def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 8;
+ let NumMicroOps = 7;
+ let ResourceCycles = [4];
+}
+def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2];
+}
+def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [3];
+}
+def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 8;
+ let NumMicroOps = 7;
+ let ResourceCycles = [4];
+}
+def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> {
+ let Latency = 9;
+ let NumMicroOps = 9;
+ let ResourceCycles = [5];
+}
+
+
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
+
+def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
+def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
+def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
+
+def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
+
+def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
+ (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
+def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
+ (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
+ (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
+
+def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
+ (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
+
+def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
+
+def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
+
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
+
+def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
+
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
+ (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
+ (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
+
+def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
+
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
+
+def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
+def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
+
+def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
+def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
+
+def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
+def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>;
+def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
+def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>;
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
+def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
+def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
+ (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
+ (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
+def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
+ (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
+def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
+
+//---
+// VLDx. Vector Loads
+//---
+// 1-element structure load
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>;
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>;
+def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>;
+def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>;
+def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>;
+def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>;
+
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>;
+def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>;
+def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>;
+def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>;
+def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>;
+
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>;
+
+// 2-element structure load
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>;
+def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>;
+def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>;
+def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>;
+def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>;
+
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>;
+
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
+
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>;
+
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>;
+
+// 3-element structure load
+def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>;
+def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>;
+def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>;
+def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>;
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>;
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
+
+// 4-element structure load
+def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>;
+def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>;
+def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>;
+def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>;
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>;
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
+
+//---
+// VSTx. Vector Stores
+//---
+// 1-element structure store
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
+def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
+def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
+def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
+def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
+
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
+def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
+def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
+def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
+def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
+
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
+
+// 2-element structure store
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
+def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
+def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
+def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
+def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
+
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
+
+// 3-element structure store
+def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
+def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
+def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
+
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
+def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
+def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
+
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
+
+// 4-element structure store
+def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
+def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
+def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
+
+} // R52 SchedModel
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 1d7eef9ddcfd..e2df0bddd0d1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -31,6 +31,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/TargetParser.h"
using namespace llvm;
@@ -58,8 +59,7 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
clEnumValN(RestrictedIT, "arm-restrict-it",
"Disallow deprecated IT based on ARMv8"),
clEnumValN(NoRestrictedIT, "arm-no-restrict-it",
- "Allow IT blocks based on ARMv7"),
- clEnumValEnd));
+ "Allow IT blocks based on ARMv7")));
/// ForceFastISel - Use the fast-isel, even for subtargets where it is not
/// currently supported (for testing only).
@@ -76,6 +76,11 @@ ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
return *this;
}
+/// EnableExecuteOnly - Enables the generation of execute-only code on supported
+/// targets
+static cl::opt<bool>
+EnableExecuteOnly("arm-execute-only");
+
ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
StringRef FS) {
ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS);
@@ -89,8 +94,9 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const ARMBaseTargetMachine &TM, bool IsLittle)
: ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
- CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options),
- TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)),
+ GenExecuteOnly(EnableExecuteOnly), CPUString(CPU), IsLittle(IsLittle),
+ TargetTriple(TT), Options(TM.Options), TM(TM),
+ FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
InstrInfo(isThumb1Only()
@@ -98,7 +104,32 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
: !isThumb()
? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
: (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
- TLInfo(TM, *this) {}
+ TLInfo(TM, *this), GISel() {}
+
+const CallLowering *ARMSubtarget::getCallLowering() const {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getCallLowering();
+}
+
+const InstructionSelector *ARMSubtarget::getInstructionSelector() const {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getInstructionSelector();
+}
+
+const LegalizerInfo *ARMSubtarget::getLegalizerInfo() const {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getLegalizerInfo();
+}
+
+const RegisterBankInfo *ARMSubtarget::getRegBankInfo() const {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getRegBankInfo();
+}
+
+bool ARMSubtarget::isXRaySupported() const {
+ // We don't currently suppport Thumb, but Windows requires Thumb.
+ return hasV6Ops() && hasARMOps() && !isTargetWindows();
+}
void ARMSubtarget::initializeEnvironment() {
// MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this
@@ -117,10 +148,11 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isTargetDarwin()) {
StringRef ArchName = TargetTriple.getArchName();
- if (ArchName.endswith("v7s"))
+ unsigned ArchKind = llvm::ARM::parseArch(ArchName);
+ if (ArchKind == llvm::ARM::AK_ARMV7S)
// Default to the Swift CPU when targeting armv7s/thumbv7s.
CPUString = "swift";
- else if (ArchName.endswith("v7k"))
+ else if (ArchKind == llvm::ARM::AK_ARMV7K)
// Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
// ARMv7k does not use SjLj exception handling.
CPUString = "cortex-a7";
@@ -143,6 +175,10 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// Assert this for now to make the change obvious.
assert(hasV6T2Ops() || !hasThumb2());
+ // Execute only support requires movt support
+ if (genExecuteOnly())
+ assert(hasV8MBaselineOps() && !NoMovt && "Cannot generate execute-only code for this target");
+
// Keep a pointer to static instruction cost data for the specified CPU.
SchedModel = getSchedModelForCPU(CPUString);
@@ -199,6 +235,9 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
(Options.UnsafeFPMath || isTargetDarwin()))
UseNEONForSinglePrecisionFP = true;
+ if (isRWPI())
+ ReserveR9 = true;
+
// FIXME: Teach TableGen to deal with these instead of doing it manually here.
switch (ARMProcFamily) {
case Others:
@@ -234,6 +273,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
case CortexR7:
case CortexM3:
case ExynosM1:
+ case CortexR52:
break;
case Krait:
PreISelOperandLatencyAdjustment = 1;
@@ -261,6 +301,15 @@ bool ARMSubtarget::isAAPCS16_ABI() const {
return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
}
+bool ARMSubtarget::isROPI() const {
+ return TM.getRelocationModel() == Reloc::ROPI ||
+ TM.getRelocationModel() == Reloc::ROPI_RWPI;
+}
+bool ARMSubtarget::isRWPI() const {
+ return TM.getRelocationModel() == Reloc::RWPI ||
+ TM.getRelocationModel() == Reloc::ROPI_RWPI;
+}
+
bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
return true;
@@ -268,7 +317,7 @@ bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
// 32 bit macho has no relocation for a-b if a is undefined, even if b is in
// the section that is being relocated. This means we have to use o load even
// for GVs that are known to be local to the dso.
- if (isTargetDarwin() && TM.isPositionIndependent() &&
+ if (isTargetMachO() && TM.isPositionIndependent() &&
(GV->isDeclarationForLinker() || GV->hasCommonLinkage()))
return true;
@@ -300,9 +349,7 @@ bool ARMSubtarget::enablePostRAScheduler() const {
return (!isThumb() || hasThumb2());
}
-bool ARMSubtarget::enableAtomicExpand() const {
- return hasAnyDataBarrier() && (!isThumb() || hasV8MBaselineOps());
-}
+bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }
bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
// For general targets, the prologue can grow when VFPs are allocated with
@@ -316,7 +363,7 @@ bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
// immediates as it is inherently position independent, and may be out of
// range otherwise.
return !NoMovt && hasV8MBaselineOps() &&
- (isTargetWindows() || !MF.getFunction()->optForMinSize());
+ (isTargetWindows() || !MF.getFunction()->optForMinSize() || genExecuteOnly());
}
bool ARMSubtarget::useFastISel() const {
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index 910de0e1e72d..8c8218d0f432 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -25,6 +25,7 @@
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -43,7 +44,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
- CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexM3,
+ CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexR52, CortexM3,
CortexA32, CortexA35, CortexA53, CortexA57, CortexA72, CortexA73,
Krait, Swift, ExynosM1
};
@@ -53,7 +54,8 @@ protected:
enum ARMArchEnum {
ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te,
ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r,
- ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline
+ ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline,
+ ARMv8r
};
public:
@@ -234,6 +236,9 @@ protected:
/// particularly effective at zeroing a VFP register.
bool HasZeroCycleZeroing = false;
+ /// HasFPAO - if true, processor does positive address offset computation faster
+ bool HasFPAO = false;
+
/// If true, if conversion may decide to leave some instructions unpredicated.
bool IsProfitableToUnpredicate = false;
@@ -296,6 +301,9 @@ protected:
/// Generate calls via indirect call instructions.
bool GenLongCalls = false;
+ /// Generate code that does not contain data access to code sections.
+ bool GenExecuteOnly = false;
+
/// Target machine allowed unsafe FP math (such as use of NEON fp)
bool UnsafeFPMath = false;
@@ -346,6 +354,9 @@ public:
ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
const ARMBaseTargetMachine &TM, bool IsLittle);
+ /// This object will take onwership of \p GISelAccessor.
+ void setGISelAccessor(GISelAccessor &GISel) { this->GISel.reset(&GISel); }
+
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
unsigned getMaxInlineSizeThreshold() const {
@@ -375,6 +386,11 @@ public:
return &InstrInfo->getRegisterInfo();
}
+ const CallLowering *getCallLowering() const override;
+ const InstructionSelector *getInstructionSelector() const override;
+ const LegalizerInfo *getLegalizerInfo() const override;
+ const RegisterBankInfo *getRegBankInfo() const override;
+
private:
ARMSelectionDAGInfo TSInfo;
// Either Thumb1FrameLowering or ARMFrameLowering.
@@ -383,6 +399,11 @@ private:
std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
ARMTargetLowering TLInfo;
+ /// Gather the accessor points to GlobalISel-related APIs.
+ /// This is used to avoid ifndefs spreading around while GISel is
+ /// an optional library.
+ std::unique_ptr<GISelAccessor> GISel;
+
void initializeEnvironment();
void initSubtargetFeatures(StringRef CPU, StringRef FS);
ARMFrameLowering *initializeFrameLowering(StringRef CPU, StringRef FS);
@@ -452,6 +473,7 @@ public:
bool hasTrustZone() const { return HasTrustZone; }
bool has8MSecExt() const { return Has8MSecExt; }
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
+ bool hasFPAO() const { return HasFPAO; }
bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; }
bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; }
bool hasSlowVDUP32() const { return HasSlowVDUP32; }
@@ -475,6 +497,7 @@ public:
bool useNaClTrap() const { return UseNaClTrap; }
bool useSjLjEH() const { return UseSjLjEH; }
bool genLongCalls() const { return GenLongCalls; }
+ bool genExecuteOnly() const { return GenExecuteOnly; }
bool hasFP16() const { return HasFP16; }
bool hasD16() const { return HasD16; }
@@ -540,10 +563,15 @@ public:
}
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
+ virtual bool isXRaySupported() const override;
+
bool isAPCS_ABI() const;
bool isAAPCS_ABI() const;
bool isAAPCS16_ABI() const;
+ bool isROPI() const;
+ bool isRWPI() const;
+
bool useSoftFloat() const { return UseSoftFloat; }
bool isThumb() const { return InThumbMode; }
bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
@@ -557,11 +585,17 @@ public:
return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9;
}
+ bool useR7AsFramePointer() const {
+ return isTargetDarwin() || (!isTargetWindows() && isThumb());
+ }
/// Returns true if the frame setup is split into two separate pushes (first
/// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent
- /// to lr.
- bool splitFramePushPop() const {
- return isTargetMachO();
+ /// to lr. This is always required on Thumb1-only targets, as the push and
+ /// pop instructions can't access the high registers.
+ bool splitFramePushPop(const MachineFunction &MF) const {
+ return (useR7AsFramePointer() &&
+ MF.getTarget().Options.DisableFramePointerElim(MF)) ||
+ isThumb1Only();
}
bool useStride4VFPs(const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index dc730a675bef..70c9567d99f8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -10,11 +10,19 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMTargetMachine.h"
#include "ARM.h"
+#include "ARMCallLowering.h"
#include "ARMFrameLowering.h"
-#include "ARMTargetMachine.h"
+#include "ARMInstructionSelector.h"
+#include "ARMLegalizerInfo.h"
+#include "ARMRegisterBankInfo.h"
#include "ARMTargetObjectFile.h"
#include "ARMTargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
@@ -22,6 +30,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
@@ -50,12 +59,13 @@ EnableGlobalMerge("arm-global-merge", cl::Hidden,
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
- RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget);
- RegisterTargetMachine<ARMBETargetMachine> Y(TheARMBETarget);
- RegisterTargetMachine<ThumbLETargetMachine> A(TheThumbLETarget);
- RegisterTargetMachine<ThumbBETargetMachine> B(TheThumbBETarget);
+ RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget());
+ RegisterTargetMachine<ARMBETargetMachine> Y(getTheARMBETarget());
+ RegisterTargetMachine<ThumbLETargetMachine> A(getTheThumbLETarget());
+ RegisterTargetMachine<ThumbBETargetMachine> B(getTheThumbBETarget());
PassRegistry &Registry = *PassRegistry::getPassRegistry();
+ initializeGlobalISel(Registry);
initializeARMLoadStoreOptPass(Registry);
initializeARMPreAllocLoadStoreOptPass(Registry);
}
@@ -84,11 +94,13 @@ computeTargetABI(const Triple &TT, StringRef CPU,
ARMBaseTargetMachine::ARMABI TargetABI =
ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
+ unsigned ArchKind = llvm::ARM::parseCPUArch(CPU);
+ StringRef ArchName = llvm::ARM::getArchName(ArchKind);
// FIXME: This is duplicated code from the front end and should be unified.
if (TT.isOSBinFormatMachO()) {
if (TT.getEnvironment() == llvm::Triple::EABI ||
(TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
- CPU.startswith("cortex-m")) {
+ llvm::ARM::parseArchProfile(ArchName) == llvm::ARM::PK_M) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
} else if (TT.isWatchABI()) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
@@ -184,6 +196,10 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
// Default relocation model on Darwin is PIC.
return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static;
+ if (*RM == Reloc::ROPI || *RM == Reloc::RWPI || *RM == Reloc::ROPI_RWPI)
+ assert(TT.isOSBinFormatELF() &&
+ "ROPI/RWPI currently only supported for ELF");
+
// DynamicNoPIC is only used on darwin.
if (*RM == Reloc::DynamicNoPIC && !TT.isOSDarwin())
return Reloc::Static;
@@ -224,6 +240,29 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+namespace {
+struct ARMGISelActualAccessor : public GISelAccessor {
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+ const CallLowering *getCallLowering() const override {
+ return CallLoweringInfo.get();
+ }
+ const InstructionSelector *getInstructionSelector() const override {
+ return InstSelector.get();
+ }
+ const LegalizerInfo *getLegalizerInfo() const override {
+ return Legalizer.get();
+ }
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
+ }
+};
+} // End anonymous namespace.
+#endif
+
const ARMSubtarget *
ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
@@ -255,6 +294,24 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
// function that reside in TargetOptions.
resetTargetOptions(F);
I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+ GISelAccessor *GISel = new GISelAccessor();
+#else
+ ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor();
+ GISel->CallLoweringInfo.reset(new ARMCallLowering(*I->getTargetLowering()));
+ GISel->Legalizer.reset(new ARMLegalizerInfo());
+
+ auto *RBI = new ARMRegisterBankInfo(*I->getRegisterInfo());
+
+ // FIXME: At this point, we can't rely on Subtarget having RBI.
+ // It's awkward to mix passing RBI and the Subtarget; should we pass
+ // TII/TRI as well?
+ GISel->InstSelector.reset(new ARMInstructionSelector(*I, *RBI));
+
+ GISel->RegBankInfo.reset(RBI);
+#endif
+ I->setGISelAccessor(*GISel);
}
return I.get();
}
@@ -346,6 +403,12 @@ public:
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+ bool addIRTranslator() override;
+ bool addLegalizeMachineIR() override;
+ bool addRegBankSelect() override;
+ bool addGlobalInstructionSelect() override;
+#endif
void addPreRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
@@ -406,6 +469,28 @@ bool ARMPassConfig::addInstSelector() {
return false;
}
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+bool ARMPassConfig::addIRTranslator() {
+ addPass(new IRTranslator());
+ return false;
+}
+
+bool ARMPassConfig::addLegalizeMachineIR() {
+ addPass(new Legalizer());
+ return false;
+}
+
+bool ARMPassConfig::addRegBankSelect() {
+ addPass(new RegBankSelect());
+ return false;
+}
+
+bool ARMPassConfig::addGlobalInstructionSelect() {
+ addPass(new InstructionSelect());
+ return false;
+}
+#endif
+
void ARMPassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOpt::None) {
addPass(createMLxExpansionPass());
@@ -436,8 +521,8 @@ void ARMPassConfig::addPreSched2() {
return this->TM->getSubtarget<ARMSubtarget>(F).restrictIT();
}));
- addPass(createIfConverter([this](const Function &F) {
- return !this->TM->getSubtarget<ARMSubtarget>(F).isThumb1Only();
+ addPass(createIfConverter([](const MachineFunction &MF) {
+ return !MF.getSubtarget<ARMSubtarget>().isThumb1Only();
}));
}
addPass(createThumb2ITBlockPass());
@@ -447,8 +532,8 @@ void ARMPassConfig::addPreEmitPass() {
addPass(createThumb2SizeReductionPass());
// Constant island pass work on unbundled instructions.
- addPass(createUnpackMachineBundles([this](const Function &F) {
- return this->TM->getSubtarget<ARMSubtarget>(F).isThumb2();
+ addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
+ return MF.getSubtarget<ARMSubtarget>().isThumb2();
}));
// Don't optimize barriers at -O0.
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
index eaed5cc68750..625c4280e1a6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -27,8 +27,10 @@ using namespace dwarf;
void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
- bool isAAPCS_ABI = static_cast<const ARMTargetMachine &>(TM).TargetABI ==
- ARMTargetMachine::ARMABI::ARM_ABI_AAPCS;
+ const ARMTargetMachine &ARM_TM = static_cast<const ARMTargetMachine &>(TM);
+ bool isAAPCS_ABI = ARM_TM.TargetABI == ARMTargetMachine::ARMABI::ARM_ABI_AAPCS;
+ genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly();
+
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(isAAPCS_ABI);
@@ -38,19 +40,28 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
AttributesSection =
getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0);
+
+ // Make code section unreadable when in execute-only mode
+ if (genExecuteOnly) {
+ unsigned Type = ELF::SHT_PROGBITS;
+ unsigned Flags = ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_ARM_PURECODE;
+ // Since we cannot modify flags for an existing section, we create a new
+ // section with the right flags, and use 0 as the unique ID for
+ // execute-only text
+ TextSection = Ctx.getELFSection(".text", Type, Flags, 0, "", 0U);
+ }
}
const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
- const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
- const TargetMachine &TM, MachineModuleInfo *MMI,
- MCStreamer &Streamer) const {
+ const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
+ MachineModuleInfo *MMI, MCStreamer &Streamer) const {
if (TM.getMCAsmInfo()->getExceptionHandlingType() != ExceptionHandling::ARM)
return TargetLoweringObjectFileELF::getTTypeGlobalReference(
- GV, Encoding, Mang, TM, MMI, Streamer);
+ GV, Encoding, TM, MMI, Streamer);
assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
- return MCSymbolRefExpr::create(TM.getSymbol(GV, Mang),
+ return MCSymbolRefExpr::create(TM.getSymbol(GV),
MCSymbolRefExpr::VK_ARM_TARGET2, getContext());
}
@@ -59,3 +70,23 @@ getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_ARM_TLSLDO,
getContext());
}
+
+MCSection *
+ARMElfTargetObjectFile::getExplicitSectionGlobal(const GlobalObject *GO,
+ SectionKind SK, const TargetMachine &TM) const {
+ // Set execute-only access for the explicit section
+ if (genExecuteOnly && SK.isText())
+ SK = SectionKind::getExecuteOnly();
+
+ return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, SK, TM);
+}
+
+MCSection *
+ARMElfTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO,
+ SectionKind SK, const TargetMachine &TM) const {
+ // Place the global in the execute-only text section
+ if (genExecuteOnly && SK.isText())
+ SK = SectionKind::getExecuteOnly();
+
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, SK, TM);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
index b1db201cb30d..24e755ddac27 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
@@ -18,6 +18,7 @@ class MCContext;
class TargetMachine;
class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
+ mutable bool genExecuteOnly = false;
protected:
const MCSection *AttributesSection;
public:
@@ -28,14 +29,20 @@ public:
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
- const MCExpr *
- getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding,
- Mangler &Mang, const TargetMachine &TM,
- MachineModuleInfo *MMI,
- MCStreamer &Streamer) const override;
+ const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
+ unsigned Encoding,
+ const TargetMachine &TM,
+ MachineModuleInfo *MMI,
+ MCStreamer &Streamer) const override;
/// \brief Describe a TLS variable address within debug info.
const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
+
+ MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
+ const TargetMachine &TM) const override;
+
+ MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
+ const TargetMachine &TM) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 13c5dc61acd9..10e6297ef1ed 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -41,7 +41,7 @@ int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
// Thumb1.
if (SImmVal >= 0 && SImmVal < 256)
return 1;
- if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
+ if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
return 2;
// Load from constantpool.
return 3;
@@ -69,6 +69,25 @@ int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Idx == 1)
return 0;
+ if (Opcode == Instruction::And)
+ // Conversion to BIC is free, and means we can use ~Imm instead.
+ return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
+
+ if (Opcode == Instruction::Add)
+ // Conversion to SUB is free, and means we can use -Imm instead.
+ return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
+
+ if (Opcode == Instruction::ICmp && Imm.isNegative() &&
+ Ty->getIntegerBitWidth() == 32) {
+ int64_t NegImm = -Imm.getSExtValue();
+ if (ST->isThumb2() && NegImm < 1<<12)
+ // icmp X, #-C -> cmn X, #C
+ return 0;
+ if (ST->isThumb() && NegImm < 1<<8)
+ // icmp X, #-C -> adds X, #C
+ return 0;
+ }
+
return getIntImmCost(Imm, Ty);
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index a0ca9e648002..d83228afb0ab 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -45,13 +45,6 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
- // Provide value semantics. MSVC requires that we spell all of these out.
- ARMTTIImpl(const ARMTTIImpl &Arg)
- : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
- ARMTTIImpl(ARMTTIImpl &&Arg)
- : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
- TLI(std::move(Arg.TLI)) {}
-
bool enableInterleavedAccessVectorization() { return true; }
/// Floating-point computation using ARMv8 AArch32 Advanced
@@ -128,6 +121,16 @@ public:
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace);
+
+ bool shouldBuildLookupTablesForConstant(Constant *C) const {
+ // In the ROPI and RWPI relocation models we can't have pointers to global
+ // variables or functions in constant data, so don't convert switches to
+ // lookup tables if any of the values would need relocation.
+ if (ST->isROPI() || ST->isRWPI())
+ return !C->needsRelocation();
+
+ return true;
+ }
/// @}
};
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index f5de8a3cd25e..c243a2d35979 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -40,6 +40,7 @@
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/ARMEHABI.h"
#include "llvm/Support/COFF.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/MathExtras.h"
@@ -52,6 +53,20 @@ using namespace llvm;
namespace {
+enum class ImplicitItModeTy { Always, Never, ARMOnly, ThumbOnly };
+
+static cl::opt<ImplicitItModeTy> ImplicitItMode(
+ "arm-implicit-it", cl::init(ImplicitItModeTy::ARMOnly),
+ cl::desc("Allow conditional instructions outdside of an IT block"),
+ cl::values(clEnumValN(ImplicitItModeTy::Always, "always",
+ "Accept in both ISAs, emit implicit ITs in Thumb"),
+ clEnumValN(ImplicitItModeTy::Never, "never",
+ "Warn in ARM, reject in Thumb"),
+ clEnumValN(ImplicitItModeTy::ARMOnly, "arm",
+ "Accept in ARM, reject in Thumb"),
+ clEnumValN(ImplicitItModeTy::ThumbOnly, "thumb",
+ "Warn in ARM, emit implicit ITs in Thumb")));
+
class ARMOperand;
enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
@@ -145,6 +160,16 @@ class ARMAsmParser : public MCTargetAsmParser {
bool NextSymbolIsThumb;
+ bool useImplicitITThumb() const {
+ return ImplicitItMode == ImplicitItModeTy::Always ||
+ ImplicitItMode == ImplicitItModeTy::ThumbOnly;
+ }
+
+ bool useImplicitITARM() const {
+ return ImplicitItMode == ImplicitItModeTy::Always ||
+ ImplicitItMode == ImplicitItModeTy::ARMOnly;
+ }
+
struct {
ARMCC::CondCodes Cond; // Condition for IT block.
unsigned Mask:4; // Condition mask for instructions.
@@ -153,40 +178,174 @@ class ARMAsmParser : public MCTargetAsmParser {
// '0' inverse of condition (else).
// Count of instructions in IT block is
// 4 - trailingzeroes(mask)
-
- bool FirstCond; // Explicit flag for when we're parsing the
- // First instruction in the IT block. It's
- // implied in the mask, so needs special
- // handling.
+ // Note that this does not have the same encoding
+ // as in the IT instruction, which also depends
+ // on the low bit of the condition code.
unsigned CurPosition; // Current position in parsing of IT
- // block. In range [0,3]. Initialized
- // according to count of instructions in block.
- // ~0U if no active IT block.
+ // block. In range [0,4], with 0 being the IT
+ // instruction itself. Initialized according to
+ // count of instructions in block. ~0U if no
+ // active IT block.
+
+ bool IsExplicit; // true - The IT instruction was present in the
+ // input, we should not modify it.
+ // false - The IT instruction was added
+ // implicitly, we can extend it if that
+ // would be legal.
} ITState;
+
+ llvm::SmallVector<MCInst, 4> PendingConditionalInsts;
+
+ void flushPendingInstructions(MCStreamer &Out) override {
+ if (!inImplicitITBlock()) {
+ assert(PendingConditionalInsts.size() == 0);
+ return;
+ }
+
+ // Emit the IT instruction
+ unsigned Mask = getITMaskEncoding();
+ MCInst ITInst;
+ ITInst.setOpcode(ARM::t2IT);
+ ITInst.addOperand(MCOperand::createImm(ITState.Cond));
+ ITInst.addOperand(MCOperand::createImm(Mask));
+ Out.EmitInstruction(ITInst, getSTI());
+
+ // Emit the conditonal instructions
+ assert(PendingConditionalInsts.size() <= 4);
+ for (const MCInst &Inst : PendingConditionalInsts) {
+ Out.EmitInstruction(Inst, getSTI());
+ }
+ PendingConditionalInsts.clear();
+
+ // Clear the IT state
+ ITState.Mask = 0;
+ ITState.CurPosition = ~0U;
+ }
+
bool inITBlock() { return ITState.CurPosition != ~0U; }
+ bool inExplicitITBlock() { return inITBlock() && ITState.IsExplicit; }
+ bool inImplicitITBlock() { return inITBlock() && !ITState.IsExplicit; }
bool lastInITBlock() {
return ITState.CurPosition == 4 - countTrailingZeros(ITState.Mask);
}
void forwardITPosition() {
if (!inITBlock()) return;
// Move to the next instruction in the IT block, if there is one. If not,
- // mark the block as done.
+ // mark the block as done, except for implicit IT blocks, which we leave
+ // open until we find an instruction that can't be added to it.
unsigned TZ = countTrailingZeros(ITState.Mask);
- if (++ITState.CurPosition == 5 - TZ)
+ if (++ITState.CurPosition == 5 - TZ && ITState.IsExplicit)
ITState.CurPosition = ~0U; // Done with the IT block after this.
}
- void Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) {
- return getParser().Note(L, Msg, Ranges);
+ // Rewind the state of the current IT block, removing the last slot from it.
+ void rewindImplicitITPosition() {
+ assert(inImplicitITBlock());
+ assert(ITState.CurPosition > 1);
+ ITState.CurPosition--;
+ unsigned TZ = countTrailingZeros(ITState.Mask);
+ unsigned NewMask = 0;
+ NewMask |= ITState.Mask & (0xC << TZ);
+ NewMask |= 0x2 << TZ;
+ ITState.Mask = NewMask;
+ }
+
+ // Rewind the state of the current IT block, removing the last slot from it.
+ // If we were at the first slot, this closes the IT block.
+ void discardImplicitITBlock() {
+ assert(inImplicitITBlock());
+ assert(ITState.CurPosition == 1);
+ ITState.CurPosition = ~0U;
+ return;
}
- bool Warning(SMLoc L, const Twine &Msg,
- ArrayRef<SMRange> Ranges = None) {
- return getParser().Warning(L, Msg, Ranges);
+
+ // Get the encoding of the IT mask, as it will appear in an IT instruction.
+ unsigned getITMaskEncoding() {
+ assert(inITBlock());
+ unsigned Mask = ITState.Mask;
+ unsigned TZ = countTrailingZeros(Mask);
+ if ((ITState.Cond & 1) == 0) {
+ assert(Mask && TZ <= 3 && "illegal IT mask value!");
+ Mask ^= (0xE << TZ) & 0xF;
+ }
+ return Mask;
}
- bool Error(SMLoc L, const Twine &Msg,
- ArrayRef<SMRange> Ranges = None) {
- return getParser().Error(L, Msg, Ranges);
+
+ // Get the condition code corresponding to the current IT block slot.
+ ARMCC::CondCodes currentITCond() {
+ unsigned MaskBit;
+ if (ITState.CurPosition == 1)
+ MaskBit = 1;
+ else
+ MaskBit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
+
+ return MaskBit ? ITState.Cond : ARMCC::getOppositeCondition(ITState.Cond);
+ }
+
+ // Invert the condition of the current IT block slot without changing any
+ // other slots in the same block.
+ void invertCurrentITCondition() {
+ if (ITState.CurPosition == 1) {
+ ITState.Cond = ARMCC::getOppositeCondition(ITState.Cond);
+ } else {
+ ITState.Mask ^= 1 << (5 - ITState.CurPosition);
+ }
+ }
+
+ // Returns true if the current IT block is full (all 4 slots used).
+ bool isITBlockFull() {
+ return inITBlock() && (ITState.Mask & 1);
+ }
+
+ // Extend the current implicit IT block to have one more slot with the given
+ // condition code.
+ void extendImplicitITBlock(ARMCC::CondCodes Cond) {
+ assert(inImplicitITBlock());
+ assert(!isITBlockFull());
+ assert(Cond == ITState.Cond ||
+ Cond == ARMCC::getOppositeCondition(ITState.Cond));
+ unsigned TZ = countTrailingZeros(ITState.Mask);
+ unsigned NewMask = 0;
+ // Keep any existing condition bits.
+ NewMask |= ITState.Mask & (0xE << TZ);
+ // Insert the new condition bit.
+ NewMask |= (Cond == ITState.Cond) << TZ;
+ // Move the trailing 1 down one bit.
+ NewMask |= 1 << (TZ - 1);
+ ITState.Mask = NewMask;
+ }
+
+ // Create a new implicit IT block with a dummy condition code.
+ void startImplicitITBlock() {
+ assert(!inITBlock());
+ ITState.Cond = ARMCC::AL;
+ ITState.Mask = 8;
+ ITState.CurPosition = 1;
+ ITState.IsExplicit = false;
+ return;
+ }
+
+ // Create a new explicit IT block with the given condition and mask. The mask
+ // should be in the parsed format, with a 1 implying 't', regardless of the
+ // low bit of the condition.
+ void startExplicitITBlock(ARMCC::CondCodes Cond, unsigned Mask) {
+ assert(!inITBlock());
+ ITState.Cond = Cond;
+ ITState.Mask = Mask;
+ ITState.CurPosition = 0;
+ ITState.IsExplicit = true;
+ return;
+ }
+
+ void Note(SMLoc L, const Twine &Msg, SMRange Range = None) {
+ return getParser().Note(L, Msg, Range);
+ }
+ bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) {
+ return getParser().Warning(L, Msg, Range);
+ }
+ bool Error(SMLoc L, const Twine &Msg, SMRange Range = None) {
+ return getParser().Error(L, Msg, Range);
}
bool validatetLDMRegList(const MCInst &Inst, const OperandVector &Operands,
@@ -355,6 +514,7 @@ class ARMAsmParser : public MCTargetAsmParser {
bool processInstruction(MCInst &Inst, const OperandVector &Ops, MCStreamer &Out);
bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands);
bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands);
+ bool isITBlockTerminator(MCInst &Inst) const;
public:
enum ARMMatchResultTy {
@@ -363,6 +523,7 @@ public:
Match_RequiresV6,
Match_RequiresThumb2,
Match_RequiresV8,
+ Match_RequiresFlagSetting,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "ARMGenAsmMatcher.inc"
@@ -399,6 +560,9 @@ public:
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
+ unsigned MatchInstruction(OperandVector &Operands, MCInst &Inst,
+ uint64_t &ErrorInfo, bool MatchingInlineAsm,
+ bool &EmitInITBlock, MCStreamer &Out);
void onLabelParsed(MCSymbol *Symbol) override;
};
} // end anonymous namespace
@@ -3286,7 +3450,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
}
/// parseITCondCode - Try to parse a condition code for an IT instruction.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseITCondCode(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
@@ -3324,7 +3488,7 @@ ARMAsmParser::parseITCondCode(OperandVector &Operands) {
/// parseCoprocNumOperand - Try to parse an coprocessor number operand. The
/// token must be an Identifier when called, and if it is a coprocessor
/// number, the token is eaten and the operand is added to the operand list.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
@@ -3347,7 +3511,7 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
/// parseCoprocRegOperand - Try to parse an coprocessor register operand. The
/// token must be an Identifier when called, and if it is a coprocessor
/// number, the token is eaten and the operand is added to the operand list.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
@@ -3366,7 +3530,7 @@ ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
/// parseCoprocOptionOperand - Try to parse an coprocessor option operand.
/// coproc_option : '{' imm0_255 '}'
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
@@ -3447,8 +3611,8 @@ static unsigned getDRegFromQReg(unsigned QReg) {
/// Parse a register list.
bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
- assert(Parser.getTok().is(AsmToken::LCurly) &&
- "Token is not a Left Curly Brace");
+ if (Parser.getTok().isNot(AsmToken::LCurly))
+ return TokError("Token is not a Left Curly Brace");
SMLoc S = Parser.getTok().getLoc();
Parser.Lex(); // Eat '{' token.
SMLoc RegLoc = Parser.getTok().getLoc();
@@ -3576,7 +3740,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
}
// Helper function to parse the lane index for vector lists.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+OperandMatchResultTy ARMAsmParser::
parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
MCAsmParser &Parser = getParser();
Index = 0; // Always return a defined index value.
@@ -3628,7 +3792,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
}
// parse a vector register list
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseVectorList(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
VectorLaneTy LaneKind;
@@ -3880,7 +4044,7 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
}
/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
@@ -3952,7 +4116,7 @@ ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
}
/// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
@@ -4004,7 +4168,7 @@ ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
@@ -4039,7 +4203,7 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
}
/// parseMSRMaskOperand - Try to parse mask flags from MSR instruction.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
@@ -4192,7 +4356,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
/// parseBankedRegOperand - Try to parse a banked register (e.g. "lr_irq") for
/// use in the MRS/MSR instructions added to support virtualization.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
@@ -4247,7 +4411,7 @@ ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) {
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
int High) {
MCAsmParser &Parser = getParser();
@@ -4296,7 +4460,7 @@ ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
@@ -4326,7 +4490,7 @@ ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
/// lsl #n 'n' in [0,31]
/// asr #n 'n' in [1,32]
/// n == 32 encoded as n == 0.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseShifterImm(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
@@ -4397,7 +4561,7 @@ ARMAsmParser::parseShifterImm(OperandVector &Operands) {
/// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family
/// of instructions. Legal values are:
/// ror #n 'n' in {0, 8, 16, 24}
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseRotImm(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
@@ -4444,7 +4608,7 @@ ARMAsmParser::parseRotImm(OperandVector &Operands) {
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseModImm(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
MCAsmLexer &Lexer = getLexer();
@@ -4561,7 +4725,7 @@ ARMAsmParser::parseModImm(OperandVector &Operands) {
}
}
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseBitfield(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
@@ -4630,7 +4794,7 @@ ARMAsmParser::parseBitfield(OperandVector &Operands) {
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
// Check for a post-index addressing register operand. Specifically:
// postidx_reg := '+' register {, shift}
@@ -4680,7 +4844,7 @@ ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
// Check for a post-index addressing register operand. Specifically:
// am3offset := '+' register
@@ -4833,8 +4997,8 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
bool ARMAsmParser::parseMemory(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S, E;
- assert(Parser.getTok().is(AsmToken::LBrac) &&
- "Token is not a Left Bracket");
+ if (Parser.getTok().isNot(AsmToken::LBrac))
+ return TokError("Token is not a Left Bracket");
S = Parser.getTok().getLoc();
Parser.Lex(); // Eat left bracket token.
@@ -5082,7 +5246,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
}
/// parseFPImm - A floating point immediate expression operand.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
ARMAsmParser::parseFPImm(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
// Anything that can accept a floating point constant as an operand
@@ -5131,7 +5295,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) {
const AsmToken &Tok = Parser.getTok();
SMLoc Loc = Tok.getLoc();
if (Tok.is(AsmToken::Real) && isVmovf) {
- APFloat RealVal(APFloat::IEEEsingle, Tok.getString());
+ APFloat RealVal(APFloat::IEEEsingle(), Tok.getString());
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
// If we had a '-' in front, toggle the sign bit.
IntVal ^= (uint64_t)isNegative << 31;
@@ -5259,7 +5423,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
return false;
}
// w/ a ':' after the '#', it's just like a plain ':'.
- // FALLTHROUGH
+ LLVM_FALLTHROUGH;
}
case AsmToken::Colon: {
S = Parser.getTok().getLoc();
@@ -5289,6 +5453,9 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
if (getParser().parseExpression(SubExprVal))
return true;
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ // execute-only: we assume that assembly programmers know what they are
+ // doing and allow literal pool creation here
Operands.push_back(ARMOperand::CreateConstantPoolImm(SubExprVal, S, E));
return false;
}
@@ -5842,7 +6009,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// In Thumb1, only the branch (B) instruction can be predicated.
if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") {
- Parser.eatToEndOfStatement();
return Error(NameLoc, "conditional execution not supported in Thumb1");
}
@@ -5856,14 +6022,12 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (Mnemonic == "it") {
SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2);
if (ITMask.size() > 3) {
- Parser.eatToEndOfStatement();
return Error(Loc, "too many conditions on IT instruction");
}
unsigned Mask = 8;
for (unsigned i = ITMask.size(); i != 0; --i) {
char pos = ITMask[i - 1];
if (pos != 't' && pos != 'e') {
- Parser.eatToEndOfStatement();
return Error(Loc, "illegal IT block condition mask '" + ITMask + "'");
}
Mask >>= 1;
@@ -5889,14 +6053,12 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// If we had a carry-set on an instruction that can't do that, issue an
// error.
if (!CanAcceptCarrySet && CarrySetting) {
- Parser.eatToEndOfStatement();
return Error(NameLoc, "instruction '" + Mnemonic +
"' can not set flags, but 's' suffix specified");
}
// If we had a predication code on an instruction that can't do that, issue an
// error.
if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) {
- Parser.eatToEndOfStatement();
return Error(NameLoc, "instruction '" + Mnemonic +
"' is not predicable, but condition code specified");
}
@@ -5940,7 +6102,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// For for ARM mode generate an error if the .n qualifier is used.
if (ExtraToken == ".n" && !isThumb()) {
SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start);
- Parser.eatToEndOfStatement();
return Error(Loc, "instruction with .n (narrow) qualifier not allowed in "
"arm mode");
}
@@ -5958,28 +6119,19 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
if (parseOperand(Operands, Mnemonic)) {
- Parser.eatToEndOfStatement();
return true;
}
- while (getLexer().is(AsmToken::Comma)) {
- Parser.Lex(); // Eat the comma.
-
+ while (parseOptionalToken(AsmToken::Comma)) {
// Parse and remember the operand.
if (parseOperand(Operands, Mnemonic)) {
- Parser.eatToEndOfStatement();
return true;
}
}
}
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- SMLoc Loc = getLexer().getLoc();
- Parser.eatToEndOfStatement();
- return Error(Loc, "unexpected token in argument list");
- }
-
- Parser.Lex(); // Consume the EndOfStatement
+ if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
+ return true;
if (RequireVFPRegisterListCheck) {
ARMOperand &Op = static_cast<ARMOperand &>(*Operands.back());
@@ -6043,10 +6195,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Rt2 must be Rt + 1 and Rt must be even.
if (Rt + 1 != Rt2 || (Rt & 1)) {
- Error(Op2.getStartLoc(), isLoad
- ? "destination operands must be sequential"
- : "source operands must be sequential");
- return true;
+ return Error(Op2.getStartLoc(),
+ isLoad ? "destination operands must be sequential"
+ : "source operands must be sequential");
}
unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0,
&(MRI->getRegClass(ARM::GPRPairRegClassID)));
@@ -6188,18 +6339,11 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
// NOTE: BKPT and HLT instructions have the interesting property of being
// allowed in IT blocks, but not being predicable. They just always execute.
if (inITBlock() && !instIsBreakpoint(Inst)) {
- unsigned Bit = 1;
- if (ITState.FirstCond)
- ITState.FirstCond = false;
- else
- Bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
// The instruction must be predicable.
if (!MCID.isPredicable())
return Error(Loc, "instructions in IT block must be predicable");
unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm();
- unsigned ITCond = Bit ? ITState.Cond :
- ARMCC::getOppositeCondition(ITState.Cond);
- if (Cond != ITCond) {
+ if (Cond != currentITCond()) {
// Find the condition code Operand to get its SMLoc information.
SMLoc CondLoc;
for (unsigned I = 1; I < Operands.size(); ++I)
@@ -6208,14 +6352,19 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
return Error(CondLoc, "incorrect condition in IT block; got '" +
StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) +
"', but expected '" +
- ARMCondCodeToString(ARMCC::CondCodes(ITCond)) + "'");
+ ARMCondCodeToString(ARMCC::CondCodes(currentITCond())) + "'");
}
// Check for non-'al' condition codes outside of the IT block.
} else if (isThumbTwo() && MCID.isPredicable() &&
Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
ARMCC::AL && Inst.getOpcode() != ARM::tBcc &&
- Inst.getOpcode() != ARM::t2Bcc)
+ Inst.getOpcode() != ARM::t2Bcc) {
return Error(Loc, "predicated instructions must be in IT block");
+ } else if (!isThumb() && !useImplicitITARM() && MCID.isPredicable() &&
+ Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
+ ARMCC::AL) {
+ return Warning(Loc, "predicated instructions should be in IT block");
+ }
const unsigned Opcode = Inst.getOpcode();
switch (Opcode) {
@@ -6520,6 +6669,12 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
return Error(Operands[Op]->getStartLoc(), "branch target out of range");
break;
}
+ case ARM::tCBZ:
+ case ARM::tCBNZ: {
+ if (!static_cast<ARMOperand &>(*Operands[2]).isUnsignedOffset<6, 1>())
+ return Error(Operands[2]->getStartLoc(), "branch target out of range");
+ break;
+ }
case ARM::MOVi16:
case ARM::t2MOVi16:
case ARM::t2MOVTi16:
@@ -8639,27 +8794,15 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
}
case ARM::ITasm:
case ARM::t2IT: {
- // The mask bits for all but the first condition are represented as
- // the low bit of the condition code value implies 't'. We currently
- // always have 1 implies 't', so XOR toggle the bits if the low bit
- // of the condition code is zero.
MCOperand &MO = Inst.getOperand(1);
unsigned Mask = MO.getImm();
- unsigned OrigMask = Mask;
- unsigned TZ = countTrailingZeros(Mask);
- if ((Inst.getOperand(0).getImm() & 1) == 0) {
- assert(Mask && TZ <= 3 && "illegal IT mask value!");
- Mask ^= (0xE << TZ) & 0xF;
- }
- MO.setImm(Mask);
+ ARMCC::CondCodes Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm());
// Set up the IT block state according to the IT instruction we just
// matched.
assert(!inITBlock() && "nested IT blocks?!");
- ITState.Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm());
- ITState.Mask = OrigMask; // Use the original mask, not the updated one.
- ITState.CurPosition = 0;
- ITState.FirstCond = true;
+ startExplicitITBlock(Cond, Mask);
+ MO.setImm(getITMaskEncoding());
break;
}
case ARM::t2LSLrr:
@@ -8766,7 +8909,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
;
// If we're parsing Thumb1, reject it completely.
if (isThumbOne() && Inst.getOperand(OpNo).getReg() != ARM::CPSR)
- return Match_MnemonicFail;
+ return Match_RequiresFlagSetting;
// If we're parsing Thumb2, which form is legal depends on whether we're
// in an IT block.
if (isThumbTwo() && Inst.getOperand(OpNo).getReg() != ARM::CPSR &&
@@ -8807,6 +8950,132 @@ template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) {
}
}
+// Returns true if Inst is unpredictable if it is in and IT block, but is not
+// the last instruction in the block.
+bool ARMAsmParser::isITBlockTerminator(MCInst &Inst) const {
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+
+ // All branch & call instructions terminate IT blocks.
+ if (MCID.isTerminator() || MCID.isCall() || MCID.isReturn() ||
+ MCID.isBranch() || MCID.isIndirectBranch())
+ return true;
+
+ // Any arithmetic instruction which writes to the PC also terminates the IT
+ // block.
+ for (unsigned OpIdx = 0; OpIdx < MCID.getNumDefs(); ++OpIdx) {
+ MCOperand &Op = Inst.getOperand(OpIdx);
+ if (Op.isReg() && Op.getReg() == ARM::PC)
+ return true;
+ }
+
+ if (MCID.hasImplicitDefOfPhysReg(ARM::PC, MRI))
+ return true;
+
+ // Instructions with variable operand lists, which write to the variable
+ // operands. We only care about Thumb instructions here, as ARM instructions
+ // obviously can't be in an IT block.
+ switch (Inst.getOpcode()) {
+ case ARM::t2LDMIA:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMDB_UPD:
+ if (listContainsReg(Inst, 3, ARM::PC))
+ return true;
+ break;
+ case ARM::tPOP:
+ if (listContainsReg(Inst, 2, ARM::PC))
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm,
+ bool &EmitInITBlock,
+ MCStreamer &Out) {
+ // If we can't use an implicit IT block here, just match as normal.
+ if (inExplicitITBlock() || !isThumbTwo() || !useImplicitITThumb())
+ return MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+
+ // Try to match the instruction in an extension of the current IT block (if
+ // there is one).
+ if (inImplicitITBlock()) {
+ extendImplicitITBlock(ITState.Cond);
+ if (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm) ==
+ Match_Success) {
+ // The match succeded, but we still have to check that the instruction is
+ // valid in this implicit IT block.
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+ if (MCID.isPredicable()) {
+ ARMCC::CondCodes InstCond =
+ (ARMCC::CondCodes)Inst.getOperand(MCID.findFirstPredOperandIdx())
+ .getImm();
+ ARMCC::CondCodes ITCond = currentITCond();
+ if (InstCond == ITCond) {
+ EmitInITBlock = true;
+ return Match_Success;
+ } else if (InstCond == ARMCC::getOppositeCondition(ITCond)) {
+ invertCurrentITCondition();
+ EmitInITBlock = true;
+ return Match_Success;
+ }
+ }
+ }
+ rewindImplicitITPosition();
+ }
+
+ // Finish the current IT block, and try to match outside any IT block.
+ flushPendingInstructions(Out);
+ unsigned PlainMatchResult =
+ MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+ if (PlainMatchResult == Match_Success) {
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+ if (MCID.isPredicable()) {
+ ARMCC::CondCodes InstCond =
+ (ARMCC::CondCodes)Inst.getOperand(MCID.findFirstPredOperandIdx())
+ .getImm();
+ // Some forms of the branch instruction have their own condition code
+ // fields, so can be conditionally executed without an IT block.
+ if (Inst.getOpcode() == ARM::tBcc || Inst.getOpcode() == ARM::t2Bcc) {
+ EmitInITBlock = false;
+ return Match_Success;
+ }
+ if (InstCond == ARMCC::AL) {
+ EmitInITBlock = false;
+ return Match_Success;
+ }
+ } else {
+ EmitInITBlock = false;
+ return Match_Success;
+ }
+ }
+
+ // Try to match in a new IT block. The matcher doesn't check the actual
+ // condition, so we create an IT block with a dummy condition, and fix it up
+ // once we know the actual condition.
+ startImplicitITBlock();
+ if (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm) ==
+ Match_Success) {
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+ if (MCID.isPredicable()) {
+ ITState.Cond =
+ (ARMCC::CondCodes)Inst.getOperand(MCID.findFirstPredOperandIdx())
+ .getImm();
+ EmitInITBlock = true;
+ return Match_Success;
+ }
+ }
+ discardImplicitITBlock();
+
+ // If none of these succeed, return the error we got when trying to match
+ // outside any IT blocks.
+ EmitInITBlock = false;
+ return PlainMatchResult;
+}
+
static const char *getSubtargetFeatureName(uint64_t Val);
bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
@@ -8814,9 +9083,11 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
bool MatchingInlineAsm) {
MCInst Inst;
unsigned MatchResult;
+ bool PendConditionalInstruction = false;
+
+ MatchResult = MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
+ PendConditionalInstruction, Out);
- MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
- MatchingInlineAsm);
switch (MatchResult) {
case Match_Success:
// Context sensitive operand constraints aren't handled by the matcher,
@@ -8856,7 +9127,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return false;
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, getSTI());
+ if (PendConditionalInstruction) {
+ PendingConditionalInsts.push_back(Inst);
+ if (isITBlockFull() || isITBlockTerminator(Inst))
+ flushPendingInstructions(Out);
+ } else {
+ Out.EmitInstruction(Inst, getSTI());
+ }
return false;
case Match_MissingFeature: {
assert(ErrorInfo && "Unknown missing feature!");
@@ -8898,6 +9175,8 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "instruction variant requires Thumb2");
case Match_RequiresV8:
return Error(IDLoc, "instruction variant requires ARMv8 or later");
+ case Match_RequiresFlagSetting:
+ return Error(IDLoc, "no flag-preserving variant of this instruction available");
case Match_ImmRange0_15: {
SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
@@ -8958,78 +9237,79 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
- return parseLiteralValues(4, DirectiveID.getLoc());
+ parseLiteralValues(4, DirectiveID.getLoc());
else if (IDVal == ".short" || IDVal == ".hword")
- return parseLiteralValues(2, DirectiveID.getLoc());
+ parseLiteralValues(2, DirectiveID.getLoc());
else if (IDVal == ".thumb")
- return parseDirectiveThumb(DirectiveID.getLoc());
+ parseDirectiveThumb(DirectiveID.getLoc());
else if (IDVal == ".arm")
- return parseDirectiveARM(DirectiveID.getLoc());
+ parseDirectiveARM(DirectiveID.getLoc());
else if (IDVal == ".thumb_func")
- return parseDirectiveThumbFunc(DirectiveID.getLoc());
+ parseDirectiveThumbFunc(DirectiveID.getLoc());
else if (IDVal == ".code")
- return parseDirectiveCode(DirectiveID.getLoc());
+ parseDirectiveCode(DirectiveID.getLoc());
else if (IDVal == ".syntax")
- return parseDirectiveSyntax(DirectiveID.getLoc());
+ parseDirectiveSyntax(DirectiveID.getLoc());
else if (IDVal == ".unreq")
- return parseDirectiveUnreq(DirectiveID.getLoc());
+ parseDirectiveUnreq(DirectiveID.getLoc());
else if (IDVal == ".fnend")
- return parseDirectiveFnEnd(DirectiveID.getLoc());
+ parseDirectiveFnEnd(DirectiveID.getLoc());
else if (IDVal == ".cantunwind")
- return parseDirectiveCantUnwind(DirectiveID.getLoc());
+ parseDirectiveCantUnwind(DirectiveID.getLoc());
else if (IDVal == ".personality")
- return parseDirectivePersonality(DirectiveID.getLoc());
+ parseDirectivePersonality(DirectiveID.getLoc());
else if (IDVal == ".handlerdata")
- return parseDirectiveHandlerData(DirectiveID.getLoc());
+ parseDirectiveHandlerData(DirectiveID.getLoc());
else if (IDVal == ".setfp")
- return parseDirectiveSetFP(DirectiveID.getLoc());
+ parseDirectiveSetFP(DirectiveID.getLoc());
else if (IDVal == ".pad")
- return parseDirectivePad(DirectiveID.getLoc());
+ parseDirectivePad(DirectiveID.getLoc());
else if (IDVal == ".save")
- return parseDirectiveRegSave(DirectiveID.getLoc(), false);
+ parseDirectiveRegSave(DirectiveID.getLoc(), false);
else if (IDVal == ".vsave")
- return parseDirectiveRegSave(DirectiveID.getLoc(), true);
+ parseDirectiveRegSave(DirectiveID.getLoc(), true);
else if (IDVal == ".ltorg" || IDVal == ".pool")
- return parseDirectiveLtorg(DirectiveID.getLoc());
+ parseDirectiveLtorg(DirectiveID.getLoc());
else if (IDVal == ".even")
- return parseDirectiveEven(DirectiveID.getLoc());
+ parseDirectiveEven(DirectiveID.getLoc());
else if (IDVal == ".personalityindex")
- return parseDirectivePersonalityIndex(DirectiveID.getLoc());
+ parseDirectivePersonalityIndex(DirectiveID.getLoc());
else if (IDVal == ".unwind_raw")
- return parseDirectiveUnwindRaw(DirectiveID.getLoc());
+ parseDirectiveUnwindRaw(DirectiveID.getLoc());
else if (IDVal == ".movsp")
- return parseDirectiveMovSP(DirectiveID.getLoc());
+ parseDirectiveMovSP(DirectiveID.getLoc());
else if (IDVal == ".arch_extension")
- return parseDirectiveArchExtension(DirectiveID.getLoc());
+ parseDirectiveArchExtension(DirectiveID.getLoc());
else if (IDVal == ".align")
- return parseDirectiveAlign(DirectiveID.getLoc());
+ return parseDirectiveAlign(DirectiveID.getLoc()); // Use Generic on failure.
else if (IDVal == ".thumb_set")
- return parseDirectiveThumbSet(DirectiveID.getLoc());
-
- if (!IsMachO && !IsCOFF) {
+ parseDirectiveThumbSet(DirectiveID.getLoc());
+ else if (!IsMachO && !IsCOFF) {
if (IDVal == ".arch")
- return parseDirectiveArch(DirectiveID.getLoc());
+ parseDirectiveArch(DirectiveID.getLoc());
else if (IDVal == ".cpu")
- return parseDirectiveCPU(DirectiveID.getLoc());
+ parseDirectiveCPU(DirectiveID.getLoc());
else if (IDVal == ".eabi_attribute")
- return parseDirectiveEabiAttr(DirectiveID.getLoc());
+ parseDirectiveEabiAttr(DirectiveID.getLoc());
else if (IDVal == ".fpu")
- return parseDirectiveFPU(DirectiveID.getLoc());
+ parseDirectiveFPU(DirectiveID.getLoc());
else if (IDVal == ".fnstart")
- return parseDirectiveFnStart(DirectiveID.getLoc());
+ parseDirectiveFnStart(DirectiveID.getLoc());
else if (IDVal == ".inst")
- return parseDirectiveInst(DirectiveID.getLoc());
+ parseDirectiveInst(DirectiveID.getLoc());
else if (IDVal == ".inst.n")
- return parseDirectiveInst(DirectiveID.getLoc(), 'n');
+ parseDirectiveInst(DirectiveID.getLoc(), 'n');
else if (IDVal == ".inst.w")
- return parseDirectiveInst(DirectiveID.getLoc(), 'w');
+ parseDirectiveInst(DirectiveID.getLoc(), 'w');
else if (IDVal == ".object_arch")
- return parseDirectiveObjectArch(DirectiveID.getLoc());
+ parseDirectiveObjectArch(DirectiveID.getLoc());
else if (IDVal == ".tlsdescseq")
- return parseDirectiveTLSDescSeq(DirectiveID.getLoc());
- }
-
- return true;
+ parseDirectiveTLSDescSeq(DirectiveID.getLoc());
+ else
+ return true;
+ } else
+ return true;
+ return false;
}
/// parseLiteralValues
@@ -9037,47 +9317,22 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
/// ::= .short expression [, expression]*
/// ::= .word expression [, expression]*
bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
- MCAsmParser &Parser = getParser();
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- for (;;) {
- const MCExpr *Value;
- if (getParser().parseExpression(Value)) {
- Parser.eatToEndOfStatement();
- return false;
- }
-
- getParser().getStreamer().EmitValue(Value, Size, L);
-
- if (getLexer().is(AsmToken::EndOfStatement))
- break;
-
- // FIXME: Improve diagnostic.
- if (getLexer().isNot(AsmToken::Comma)) {
- Error(L, "unexpected token in directive");
- return false;
- }
- Parser.Lex();
- }
- }
-
- Parser.Lex();
- return false;
+ auto parseOne = [&]() -> bool {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+ getParser().getStreamer().EmitValue(Value, Size, L);
+ return false;
+ };
+ return (parseMany(parseOne));
}
/// parseDirectiveThumb
/// ::= .thumb
bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
- MCAsmParser &Parser = getParser();
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- Error(L, "unexpected token in directive");
- return false;
- }
- Parser.Lex();
-
- if (!hasThumb()) {
- Error(L, "target does not support Thumb mode");
- return false;
- }
+ if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive") ||
+ check(!hasThumb(), L, "target does not support Thumb mode"))
+ return true;
if (!isThumb())
SwitchMode();
@@ -9089,26 +9344,20 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
/// parseDirectiveARM
/// ::= .arm
bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
- MCAsmParser &Parser = getParser();
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- Error(L, "unexpected token in directive");
- return false;
- }
- Parser.Lex();
-
- if (!hasARM()) {
- Error(L, "target does not support ARM mode");
- return false;
- }
+ if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive") ||
+ check(!hasARM(), L, "target does not support ARM mode"))
+ return true;
if (isThumb())
SwitchMode();
-
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
return false;
}
void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
+ // We need to flush the current implicit IT block on a label, because it is
+ // not legal to branch into an IT block.
+ flushPendingInstructions(getStreamer());
if (NextSymbolIsThumb) {
getParser().getStreamer().EmitThumbFunc(Symbol);
NextSymbolIsThumb = false;
@@ -9124,27 +9373,24 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
// Darwin asm has (optionally) function name after .thumb_func direction
// ELF doesn't
- if (IsMachO) {
- const AsmToken &Tok = Parser.getTok();
- if (Tok.isNot(AsmToken::EndOfStatement)) {
- if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) {
- Error(L, "unexpected token in .thumb_func directive");
- return false;
- }
- MCSymbol *Func =
- getParser().getContext().getOrCreateSymbol(Tok.getIdentifier());
+ if (IsMachO) {
+ if (Parser.getTok().is(AsmToken::Identifier) ||
+ Parser.getTok().is(AsmToken::String)) {
+ MCSymbol *Func = getParser().getContext().getOrCreateSymbol(
+ Parser.getTok().getIdentifier());
getParser().getStreamer().EmitThumbFunc(Func);
- Parser.Lex(); // Consume the identifier token.
+ Parser.Lex();
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.thumb_func' directive"))
+ return true;
return false;
}
}
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- Error(Parser.getTok().getLoc(), "unexpected token in directive");
- Parser.eatToEndOfStatement();
- return false;
- }
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.thumb_func' directive"))
+ return true;
NextSymbolIsThumb = true;
return false;
@@ -9161,21 +9407,13 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
}
StringRef Mode = Tok.getString();
- if (Mode == "unified" || Mode == "UNIFIED") {
- Parser.Lex();
- } else if (Mode == "divided" || Mode == "DIVIDED") {
- Error(L, "'.syntax divided' arm asssembly not supported");
- return false;
- } else {
- Error(L, "unrecognized syntax mode in .syntax directive");
- return false;
- }
-
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- Error(Parser.getTok().getLoc(), "unexpected token in directive");
- return false;
- }
Parser.Lex();
+ if (check(Mode == "divided" || Mode == "DIVIDED", L,
+ "'.syntax divided' arm assembly not supported") ||
+ check(Mode != "unified" && Mode != "UNIFIED", L,
+ "unrecognized syntax mode in .syntax directive") ||
+ parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ return true;
// TODO tell the MC streamer the mode
// getParser().getStreamer().Emit???();
@@ -9187,10 +9425,8 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
- if (Tok.isNot(AsmToken::Integer)) {
- Error(L, "unexpected token in .code directive");
- return false;
- }
+ if (Tok.isNot(AsmToken::Integer))
+ return Error(L, "unexpected token in .code directive");
int64_t Val = Parser.getTok().getIntVal();
if (Val != 16 && Val != 32) {
Error(L, "invalid operand to .code directive");
@@ -9198,26 +9434,19 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
}
Parser.Lex();
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- Error(Parser.getTok().getLoc(), "unexpected token in directive");
- return false;
- }
- Parser.Lex();
+ if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ return true;
if (Val == 16) {
- if (!hasThumb()) {
- Error(L, "target does not support Thumb mode");
- return false;
- }
+ if (!hasThumb())
+ return Error(L, "target does not support Thumb mode");
if (!isThumb())
SwitchMode();
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
} else {
- if (!hasARM()) {
- Error(L, "target does not support ARM mode");
- return false;
- }
+ if (!hasARM())
+ return Error(L, "target does not support ARM mode");
if (isThumb())
SwitchMode();
@@ -9234,25 +9463,15 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
Parser.Lex(); // Eat the '.req' token.
unsigned Reg;
SMLoc SRegLoc, ERegLoc;
- if (ParseRegister(Reg, SRegLoc, ERegLoc)) {
- Parser.eatToEndOfStatement();
- Error(SRegLoc, "register name expected");
- return false;
- }
-
- // Shouldn't be anything else.
- if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
- Parser.eatToEndOfStatement();
- Error(Parser.getTok().getLoc(), "unexpected input in .req directive.");
- return false;
- }
-
- Parser.Lex(); // Consume the EndOfStatement
+ if (check(ParseRegister(Reg, SRegLoc, ERegLoc), SRegLoc,
+ "register name expected") ||
+ parseToken(AsmToken::EndOfStatement,
+ "unexpected input in .req directive."))
+ return true;
- if (RegisterReqs.insert(std::make_pair(Name, Reg)).first->second != Reg) {
- Error(SRegLoc, "redefinition of '" + Name + "' does not match original.");
- return false;
- }
+ if (RegisterReqs.insert(std::make_pair(Name, Reg)).first->second != Reg)
+ return Error(SRegLoc,
+ "redefinition of '" + Name + "' does not match original.");
return false;
}
@@ -9261,13 +9480,13 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
/// ::= .unreq registername
bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
MCAsmParser &Parser = getParser();
- if (Parser.getTok().isNot(AsmToken::Identifier)) {
- Parser.eatToEndOfStatement();
- Error(L, "unexpected input in .unreq directive.");
- return false;
- }
+ if (Parser.getTok().isNot(AsmToken::Identifier))
+ return Error(L, "unexpected input in .unreq directive.");
RegisterReqs.erase(Parser.getTok().getIdentifier().lower());
Parser.Lex(); // Eat the identifier.
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected input in '.unreq' directive"))
+ return true;
return false;
}
@@ -9300,13 +9519,10 @@ void ARMAsmParser::FixModeAfterArchChange(bool WasThumb, SMLoc Loc) {
/// ::= .arch token
bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
StringRef Arch = getParser().parseStringToEndOfStatement().trim();
-
unsigned ID = ARM::parseArch(Arch);
- if (ID == ARM::AK_INVALID) {
- Error(L, "Unknown arch name");
- return false;
- }
+ if (ID == ARM::AK_INVALID)
+ return Error(L, "Unknown arch name");
bool WasThumb = isThumb();
Triple T;
@@ -9332,7 +9548,6 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
Tag = ARMBuildAttrs::AttrTypeFromString(Name);
if (Tag == -1) {
Error(TagLoc, "attribute name not recognised: " + Name);
- Parser.eatToEndOfStatement();
return false;
}
Parser.Lex();
@@ -9340,27 +9555,18 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
const MCExpr *AttrExpr;
TagLoc = Parser.getTok().getLoc();
- if (Parser.parseExpression(AttrExpr)) {
- Parser.eatToEndOfStatement();
- return false;
- }
+ if (Parser.parseExpression(AttrExpr))
+ return true;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(AttrExpr);
- if (!CE) {
- Error(TagLoc, "expected numeric constant");
- Parser.eatToEndOfStatement();
- return false;
- }
+ if (check(!CE, TagLoc, "expected numeric constant"))
+ return true;
Tag = CE->getValue();
}
- if (Parser.getTok().isNot(AsmToken::Comma)) {
- Error(Parser.getTok().getLoc(), "comma expected");
- Parser.eatToEndOfStatement();
- return false;
- }
- Parser.Lex(); // skip comma
+ if (Parser.parseToken(AsmToken::Comma, "comma expected"))
+ return true;
StringRef StringValue = "";
bool IsStringValue = false;
@@ -9383,44 +9589,32 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
if (IsIntegerValue) {
const MCExpr *ValueExpr;
SMLoc ValueExprLoc = Parser.getTok().getLoc();
- if (Parser.parseExpression(ValueExpr)) {
- Parser.eatToEndOfStatement();
- return false;
- }
+ if (Parser.parseExpression(ValueExpr))
+ return true;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ValueExpr);
- if (!CE) {
- Error(ValueExprLoc, "expected numeric constant");
- Parser.eatToEndOfStatement();
- return false;
- }
-
+ if (!CE)
+ return Error(ValueExprLoc, "expected numeric constant");
IntegerValue = CE->getValue();
}
if (Tag == ARMBuildAttrs::compatibility) {
- if (Parser.getTok().isNot(AsmToken::Comma))
- IsStringValue = false;
- if (Parser.getTok().isNot(AsmToken::Comma)) {
- Error(Parser.getTok().getLoc(), "comma expected");
- Parser.eatToEndOfStatement();
- return false;
- } else {
- Parser.Lex();
- }
+ if (Parser.parseToken(AsmToken::Comma, "comma expected"))
+ return true;
}
if (IsStringValue) {
- if (Parser.getTok().isNot(AsmToken::String)) {
- Error(Parser.getTok().getLoc(), "bad string constant");
- Parser.eatToEndOfStatement();
- return false;
- }
+ if (Parser.getTok().isNot(AsmToken::String))
+ return Error(Parser.getTok().getLoc(), "bad string constant");
StringValue = Parser.getTok().getStringContents();
Parser.Lex();
}
+ if (Parser.parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.eabi_attribute' directive"))
+ return true;
+
if (IsIntegerValue && IsStringValue) {
assert(Tag == ARMBuildAttrs::compatibility);
getTargetStreamer().emitIntTextAttribute(Tag, IntegerValue, StringValue);
@@ -9439,10 +9633,8 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
// FIXME: This is using table-gen data, but should be moved to
// ARMTargetParser once that is table-gen'd.
- if (!getSTI().isCPUStringValid(CPU)) {
- Error(L, "Unknown CPU name");
- return false;
- }
+ if (!getSTI().isCPUStringValid(CPU))
+ return Error(L, "Unknown CPU name");
bool WasThumb = isThumb();
MCSubtargetInfo &STI = copySTI();
@@ -9459,11 +9651,9 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
StringRef FPU = getParser().parseStringToEndOfStatement().trim();
unsigned ID = ARM::parseFPU(FPU);
- std::vector<const char *> Features;
- if (!ARM::getFPUFeatures(ID, Features)) {
- Error(FPUNameLoc, "Unknown FPU name");
- return false;
- }
+ std::vector<StringRef> Features;
+ if (!ARM::getFPUFeatures(ID, Features))
+ return Error(FPUNameLoc, "Unknown FPU name");
MCSubtargetInfo &STI = copySTI();
for (auto Feature : Features)
@@ -9477,10 +9667,14 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
/// parseDirectiveFnStart
/// ::= .fnstart
bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.fnstart' directive"))
+ return true;
+
if (UC.hasFnStart()) {
Error(L, ".fnstart starts before the end of previous one");
UC.emitFnStartLocNotes();
- return false;
+ return true;
}
// Reset the unwind directives parser state
@@ -9495,11 +9689,12 @@ bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
/// parseDirectiveFnEnd
/// ::= .fnend
bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) {
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.fnend' directive"))
+ return true;
// Check the ordering of unwind directives
- if (!UC.hasFnStart()) {
- Error(L, ".fnstart must precede .fnend directive");
- return false;
- }
+ if (!UC.hasFnStart())
+ return Error(L, ".fnstart must precede .fnend directive");
// Reset the unwind directives parser state
getTargetStreamer().emitFnEnd();
@@ -9511,22 +9706,24 @@ bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) {
/// parseDirectiveCantUnwind
/// ::= .cantunwind
bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) {
- UC.recordCantUnwind(L);
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.cantunwind' directive"))
+ return true;
+ UC.recordCantUnwind(L);
// Check the ordering of unwind directives
- if (!UC.hasFnStart()) {
- Error(L, ".fnstart must precede .cantunwind directive");
- return false;
- }
+ if (check(!UC.hasFnStart(), L, ".fnstart must precede .cantunwind directive"))
+ return true;
+
if (UC.hasHandlerData()) {
Error(L, ".cantunwind can't be used with .handlerdata directive");
UC.emitHandlerDataLocNotes();
- return false;
+ return true;
}
if (UC.hasPersonality()) {
Error(L, ".cantunwind can't be used with .personality directive");
UC.emitPersonalityLocNotes();
- return false;
+ return true;
}
getTargetStreamer().emitCantUnwind();
@@ -9539,38 +9736,36 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
MCAsmParser &Parser = getParser();
bool HasExistingPersonality = UC.hasPersonality();
+ // Parse the name of the personality routine
+ if (Parser.getTok().isNot(AsmToken::Identifier))
+ return Error(L, "unexpected input in .personality directive.");
+ StringRef Name(Parser.getTok().getIdentifier());
+ Parser.Lex();
+
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.personality' directive"))
+ return true;
+
UC.recordPersonality(L);
// Check the ordering of unwind directives
- if (!UC.hasFnStart()) {
- Error(L, ".fnstart must precede .personality directive");
- return false;
- }
+ if (!UC.hasFnStart())
+ return Error(L, ".fnstart must precede .personality directive");
if (UC.cantUnwind()) {
Error(L, ".personality can't be used with .cantunwind directive");
UC.emitCantUnwindLocNotes();
- return false;
+ return true;
}
if (UC.hasHandlerData()) {
Error(L, ".personality must precede .handlerdata directive");
UC.emitHandlerDataLocNotes();
- return false;
+ return true;
}
if (HasExistingPersonality) {
- Parser.eatToEndOfStatement();
Error(L, "multiple personality directives");
UC.emitPersonalityLocNotes();
- return false;
- }
-
- // Parse the name of the personality routine
- if (Parser.getTok().isNot(AsmToken::Identifier)) {
- Parser.eatToEndOfStatement();
- Error(L, "unexpected input in .personality directive.");
- return false;
+ return true;
}
- StringRef Name(Parser.getTok().getIdentifier());
- Parser.Lex();
MCSymbol *PR = getParser().getContext().getOrCreateSymbol(Name);
getTargetStreamer().emitPersonality(PR);
@@ -9580,17 +9775,18 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
/// parseDirectiveHandlerData
/// ::= .handlerdata
bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) {
- UC.recordHandlerData(L);
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.handlerdata' directive"))
+ return true;
+ UC.recordHandlerData(L);
// Check the ordering of unwind directives
- if (!UC.hasFnStart()) {
- Error(L, ".fnstart must precede .personality directive");
- return false;
- }
+ if (!UC.hasFnStart())
+ return Error(L, ".fnstart must precede .personality directive");
if (UC.cantUnwind()) {
Error(L, ".handlerdata can't be used with .cantunwind directive");
UC.emitCantUnwindLocNotes();
- return false;
+ return true;
}
getTargetStreamer().emitHandlerData();
@@ -9602,74 +9798,52 @@ bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) {
bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
MCAsmParser &Parser = getParser();
// Check the ordering of unwind directives
- if (!UC.hasFnStart()) {
- Error(L, ".fnstart must precede .setfp directive");
- return false;
- }
- if (UC.hasHandlerData()) {
- Error(L, ".setfp must precede .handlerdata directive");
- return false;
- }
+ if (check(!UC.hasFnStart(), L, ".fnstart must precede .setfp directive") ||
+ check(UC.hasHandlerData(), L,
+ ".setfp must precede .handlerdata directive"))
+ return true;
// Parse fpreg
SMLoc FPRegLoc = Parser.getTok().getLoc();
int FPReg = tryParseRegister();
- if (FPReg == -1) {
- Error(FPRegLoc, "frame pointer register expected");
- return false;
- }
- // Consume comma
- if (Parser.getTok().isNot(AsmToken::Comma)) {
- Error(Parser.getTok().getLoc(), "comma expected");
- return false;
- }
- Parser.Lex(); // skip comma
+ if (check(FPReg == -1, FPRegLoc, "frame pointer register expected") ||
+ Parser.parseToken(AsmToken::Comma, "comma expected"))
+ return true;
// Parse spreg
SMLoc SPRegLoc = Parser.getTok().getLoc();
int SPReg = tryParseRegister();
- if (SPReg == -1) {
- Error(SPRegLoc, "stack pointer register expected");
- return false;
- }
-
- if (SPReg != ARM::SP && SPReg != UC.getFPReg()) {
- Error(SPRegLoc, "register should be either $sp or the latest fp register");
- return false;
- }
+ if (check(SPReg == -1, SPRegLoc, "stack pointer register expected") ||
+ check(SPReg != ARM::SP && SPReg != UC.getFPReg(), SPRegLoc,
+ "register should be either $sp or the latest fp register"))
+ return true;
// Update the frame pointer register
UC.saveFPReg(FPReg);
// Parse offset
int64_t Offset = 0;
- if (Parser.getTok().is(AsmToken::Comma)) {
- Parser.Lex(); // skip comma
-
+ if (Parser.parseOptionalToken(AsmToken::Comma)) {
if (Parser.getTok().isNot(AsmToken::Hash) &&
- Parser.getTok().isNot(AsmToken::Dollar)) {
- Error(Parser.getTok().getLoc(), "'#' expected");
- return false;
- }
+ Parser.getTok().isNot(AsmToken::Dollar))
+ return Error(Parser.getTok().getLoc(), "'#' expected");
Parser.Lex(); // skip hash token.
const MCExpr *OffsetExpr;
SMLoc ExLoc = Parser.getTok().getLoc();
SMLoc EndLoc;
- if (getParser().parseExpression(OffsetExpr, EndLoc)) {
- Error(ExLoc, "malformed setfp offset");
- return false;
- }
+ if (getParser().parseExpression(OffsetExpr, EndLoc))
+ return Error(ExLoc, "malformed setfp offset");
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
- if (!CE) {
- Error(ExLoc, "setfp offset must be an immediate");
- return false;
- }
-
+ if (check(!CE, ExLoc, "setfp offset must be an immediate"))
+ return true;
Offset = CE->getValue();
}
+ if (Parser.parseToken(AsmToken::EndOfStatement))
+ return true;
+
getTargetStreamer().emitSetFP(static_cast<unsigned>(FPReg),
static_cast<unsigned>(SPReg), Offset);
return false;
@@ -9680,35 +9854,29 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
bool ARMAsmParser::parseDirectivePad(SMLoc L) {
MCAsmParser &Parser = getParser();
// Check the ordering of unwind directives
- if (!UC.hasFnStart()) {
- Error(L, ".fnstart must precede .pad directive");
- return false;
- }
- if (UC.hasHandlerData()) {
- Error(L, ".pad must precede .handlerdata directive");
- return false;
- }
+ if (!UC.hasFnStart())
+ return Error(L, ".fnstart must precede .pad directive");
+ if (UC.hasHandlerData())
+ return Error(L, ".pad must precede .handlerdata directive");
// Parse the offset
if (Parser.getTok().isNot(AsmToken::Hash) &&
- Parser.getTok().isNot(AsmToken::Dollar)) {
- Error(Parser.getTok().getLoc(), "'#' expected");
- return false;
- }
+ Parser.getTok().isNot(AsmToken::Dollar))
+ return Error(Parser.getTok().getLoc(), "'#' expected");
Parser.Lex(); // skip hash token.
const MCExpr *OffsetExpr;
SMLoc ExLoc = Parser.getTok().getLoc();
SMLoc EndLoc;
- if (getParser().parseExpression(OffsetExpr, EndLoc)) {
- Error(ExLoc, "malformed pad offset");
- return false;
- }
+ if (getParser().parseExpression(OffsetExpr, EndLoc))
+ return Error(ExLoc, "malformed pad offset");
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
- if (!CE) {
- Error(ExLoc, "pad offset must be an immediate");
- return false;
- }
+ if (!CE)
+ return Error(ExLoc, "pad offset must be an immediate");
+
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.pad' directive"))
+ return true;
getTargetStreamer().emitPad(CE->getValue());
return false;
@@ -9719,30 +9887,23 @@ bool ARMAsmParser::parseDirectivePad(SMLoc L) {
/// ::= .vsave { registers }
bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
// Check the ordering of unwind directives
- if (!UC.hasFnStart()) {
- Error(L, ".fnstart must precede .save or .vsave directives");
- return false;
- }
- if (UC.hasHandlerData()) {
- Error(L, ".save or .vsave must precede .handlerdata directive");
- return false;
- }
+ if (!UC.hasFnStart())
+ return Error(L, ".fnstart must precede .save or .vsave directives");
+ if (UC.hasHandlerData())
+ return Error(L, ".save or .vsave must precede .handlerdata directive");
// RAII object to make sure parsed operands are deleted.
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
// Parse the register list
- if (parseRegisterList(Operands))
- return false;
+ if (parseRegisterList(Operands) ||
+ parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ return true;
ARMOperand &Op = (ARMOperand &)*Operands[0];
- if (!IsVector && !Op.isRegList()) {
- Error(L, ".save expects GPR registers");
- return false;
- }
- if (IsVector && !Op.isDPRRegList()) {
- Error(L, ".vsave expects DPR registers");
- return false;
- }
+ if (!IsVector && !Op.isRegList())
+ return Error(L, ".save expects GPR registers");
+ if (IsVector && !Op.isDPRRegList())
+ return Error(L, ".vsave expects DPR registers");
getTargetStreamer().emitRegSave(Op.getRegList(), IsVector);
return false;
@@ -9753,8 +9914,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
/// ::= .inst.n opcode [, ...]
/// ::= .inst.w opcode [, ...]
bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
- MCAsmParser &Parser = getParser();
- int Width;
+ int Width = 4;
if (isThumb()) {
switch (Suffix) {
@@ -9762,96 +9922,68 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
Width = 2;
break;
case 'w':
- Width = 4;
break;
default:
- Parser.eatToEndOfStatement();
- Error(Loc, "cannot determine Thumb instruction size, "
- "use inst.n/inst.w instead");
- return false;
+ return Error(Loc, "cannot determine Thumb instruction size, "
+ "use inst.n/inst.w instead");
}
} else {
- if (Suffix) {
- Parser.eatToEndOfStatement();
- Error(Loc, "width suffixes are invalid in ARM mode");
- return false;
- }
- Width = 4;
- }
-
- if (getLexer().is(AsmToken::EndOfStatement)) {
- Parser.eatToEndOfStatement();
- Error(Loc, "expected expression following directive");
- return false;
+ if (Suffix)
+ return Error(Loc, "width suffixes are invalid in ARM mode");
}
- for (;;) {
+ auto parseOne = [&]() -> bool {
const MCExpr *Expr;
-
- if (getParser().parseExpression(Expr)) {
- Error(Loc, "expected expression");
- return false;
- }
-
+ if (getParser().parseExpression(Expr))
+ return true;
const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr);
if (!Value) {
- Error(Loc, "expected constant expression");
- return false;
+ return Error(Loc, "expected constant expression");
}
switch (Width) {
case 2:
- if (Value->getValue() > 0xffff) {
- Error(Loc, "inst.n operand is too big, use inst.w instead");
- return false;
- }
+ if (Value->getValue() > 0xffff)
+ return Error(Loc, "inst.n operand is too big, use inst.w instead");
break;
case 4:
- if (Value->getValue() > 0xffffffff) {
- Error(Loc,
- StringRef(Suffix ? "inst.w" : "inst") + " operand is too big");
- return false;
- }
+ if (Value->getValue() > 0xffffffff)
+ return Error(Loc, StringRef(Suffix ? "inst.w" : "inst") +
+ " operand is too big");
break;
default:
llvm_unreachable("only supported widths are 2 and 4");
}
getTargetStreamer().emitInst(Value->getValue(), Suffix);
+ return false;
+ };
- if (getLexer().is(AsmToken::EndOfStatement))
- break;
-
- if (getLexer().isNot(AsmToken::Comma)) {
- Error(Loc, "unexpected token in directive");
- return false;
- }
-
- Parser.Lex();
- }
-
- Parser.Lex();
+ if (parseOptionalToken(AsmToken::EndOfStatement))
+ return Error(Loc, "expected expression following directive");
+ if (parseMany(parseOne))
+ return true;
return false;
}
/// parseDirectiveLtorg
/// ::= .ltorg | .pool
bool ARMAsmParser::parseDirectiveLtorg(SMLoc L) {
+ if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ return true;
getTargetStreamer().emitCurrentConstantPool();
return false;
}
bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
- const MCSection *Section = getStreamer().getCurrentSection().first;
+ const MCSection *Section = getStreamer().getCurrentSectionOnly();
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- TokError("unexpected token in directive");
- return false;
- }
+ if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ return true;
if (!Section) {
getStreamer().InitSections(false);
- Section = getStreamer().getCurrentSection().first;
+ Section = getStreamer().getCurrentSectionOnly();
}
assert(Section && "must have section to emit alignment");
@@ -9869,51 +10001,41 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) {
MCAsmParser &Parser = getParser();
bool HasExistingPersonality = UC.hasPersonality();
+ const MCExpr *IndexExpression;
+ SMLoc IndexLoc = Parser.getTok().getLoc();
+ if (Parser.parseExpression(IndexExpression) ||
+ parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.personalityindex' directive")) {
+ return true;
+ }
+
UC.recordPersonalityIndex(L);
if (!UC.hasFnStart()) {
- Parser.eatToEndOfStatement();
- Error(L, ".fnstart must precede .personalityindex directive");
- return false;
+ return Error(L, ".fnstart must precede .personalityindex directive");
}
if (UC.cantUnwind()) {
- Parser.eatToEndOfStatement();
Error(L, ".personalityindex cannot be used with .cantunwind");
UC.emitCantUnwindLocNotes();
- return false;
+ return true;
}
if (UC.hasHandlerData()) {
- Parser.eatToEndOfStatement();
Error(L, ".personalityindex must precede .handlerdata directive");
UC.emitHandlerDataLocNotes();
- return false;
+ return true;
}
if (HasExistingPersonality) {
- Parser.eatToEndOfStatement();
Error(L, "multiple personality directives");
UC.emitPersonalityLocNotes();
- return false;
- }
-
- const MCExpr *IndexExpression;
- SMLoc IndexLoc = Parser.getTok().getLoc();
- if (Parser.parseExpression(IndexExpression)) {
- Parser.eatToEndOfStatement();
- return false;
+ return true;
}
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(IndexExpression);
- if (!CE) {
- Parser.eatToEndOfStatement();
- Error(IndexLoc, "index must be a constant number");
- return false;
- }
- if (CE->getValue() < 0 ||
- CE->getValue() >= ARM::EHABI::NUM_PERSONALITY_INDEX) {
- Parser.eatToEndOfStatement();
- Error(IndexLoc, "personality routine index should be in range [0-3]");
- return false;
- }
+ if (!CE)
+ return Error(IndexLoc, "index must be a constant number");
+ if (CE->getValue() < 0 || CE->getValue() >= ARM::EHABI::NUM_PERSONALITY_INDEX)
+ return Error(IndexLoc,
+ "personality routine index should be in range [0-3]");
getTargetStreamer().emitPersonalityIndex(CE->getValue());
return false;
@@ -9923,81 +10045,51 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) {
/// ::= .unwind_raw offset, opcode [, opcode...]
bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
MCAsmParser &Parser = getParser();
- if (!UC.hasFnStart()) {
- Parser.eatToEndOfStatement();
- Error(L, ".fnstart must precede .unwind_raw directives");
- return false;
- }
-
int64_t StackOffset;
-
const MCExpr *OffsetExpr;
SMLoc OffsetLoc = getLexer().getLoc();
- if (getLexer().is(AsmToken::EndOfStatement) ||
- getParser().parseExpression(OffsetExpr)) {
- Error(OffsetLoc, "expected expression");
- Parser.eatToEndOfStatement();
- return false;
- }
+
+ if (!UC.hasFnStart())
+ return Error(L, ".fnstart must precede .unwind_raw directives");
+ if (getParser().parseExpression(OffsetExpr))
+ return Error(OffsetLoc, "expected expression");
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
- if (!CE) {
- Error(OffsetLoc, "offset must be a constant");
- Parser.eatToEndOfStatement();
- return false;
- }
+ if (!CE)
+ return Error(OffsetLoc, "offset must be a constant");
StackOffset = CE->getValue();
- if (getLexer().isNot(AsmToken::Comma)) {
- Error(getLexer().getLoc(), "expected comma");
- Parser.eatToEndOfStatement();
- return false;
- }
- Parser.Lex();
+ if (Parser.parseToken(AsmToken::Comma, "expected comma"))
+ return true;
SmallVector<uint8_t, 16> Opcodes;
- for (;;) {
- const MCExpr *OE;
+ auto parseOne = [&]() -> bool {
+ const MCExpr *OE;
SMLoc OpcodeLoc = getLexer().getLoc();
- if (getLexer().is(AsmToken::EndOfStatement) || Parser.parseExpression(OE)) {
- Error(OpcodeLoc, "expected opcode expression");
- Parser.eatToEndOfStatement();
- return false;
- }
-
+ if (check(getLexer().is(AsmToken::EndOfStatement) ||
+ Parser.parseExpression(OE),
+ OpcodeLoc, "expected opcode expression"))
+ return true;
const MCConstantExpr *OC = dyn_cast<MCConstantExpr>(OE);
- if (!OC) {
- Error(OpcodeLoc, "opcode value must be a constant");
- Parser.eatToEndOfStatement();
- return false;
- }
-
+ if (!OC)
+ return Error(OpcodeLoc, "opcode value must be a constant");
const int64_t Opcode = OC->getValue();
- if (Opcode & ~0xff) {
- Error(OpcodeLoc, "invalid opcode");
- Parser.eatToEndOfStatement();
- return false;
- }
-
+ if (Opcode & ~0xff)
+ return Error(OpcodeLoc, "invalid opcode");
Opcodes.push_back(uint8_t(Opcode));
+ return false;
+ };
- if (getLexer().is(AsmToken::EndOfStatement))
- break;
-
- if (getLexer().isNot(AsmToken::Comma)) {
- Error(getLexer().getLoc(), "unexpected token in directive");
- Parser.eatToEndOfStatement();
- return false;
- }
-
- Parser.Lex();
- }
+ // Must have at least 1 element
+ SMLoc OpcodeLoc = getLexer().getLoc();
+ if (parseOptionalToken(AsmToken::EndOfStatement))
+ return Error(OpcodeLoc, "expected opcode expression");
+ if (parseMany(parseOne))
+ return true;
getTargetStreamer().emitUnwindRaw(StackOffset, Opcodes);
-
- Parser.Lex();
return false;
}
@@ -10006,22 +10098,17 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
MCAsmParser &Parser = getParser();
- if (getLexer().isNot(AsmToken::Identifier)) {
- TokError("expected variable after '.tlsdescseq' directive");
- Parser.eatToEndOfStatement();
- return false;
- }
+ if (getLexer().isNot(AsmToken::Identifier))
+ return TokError("expected variable after '.tlsdescseq' directive");
const MCSymbolRefExpr *SRE =
MCSymbolRefExpr::create(Parser.getTok().getIdentifier(),
MCSymbolRefExpr::VK_ARM_TLSDESCSEQ, getContext());
Lex();
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- Error(Parser.getTok().getLoc(), "unexpected token");
- Parser.eatToEndOfStatement();
- return false;
- }
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.tlsdescseq' directive"))
+ return true;
getTargetStreamer().AnnotateTLSDescriptorSequence(SRE);
return false;
@@ -10031,60 +10118,40 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
/// ::= .movsp reg [, #offset]
bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
MCAsmParser &Parser = getParser();
- if (!UC.hasFnStart()) {
- Parser.eatToEndOfStatement();
- Error(L, ".fnstart must precede .movsp directives");
- return false;
- }
- if (UC.getFPReg() != ARM::SP) {
- Parser.eatToEndOfStatement();
- Error(L, "unexpected .movsp directive");
- return false;
- }
+ if (!UC.hasFnStart())
+ return Error(L, ".fnstart must precede .movsp directives");
+ if (UC.getFPReg() != ARM::SP)
+ return Error(L, "unexpected .movsp directive");
SMLoc SPRegLoc = Parser.getTok().getLoc();
int SPReg = tryParseRegister();
- if (SPReg == -1) {
- Parser.eatToEndOfStatement();
- Error(SPRegLoc, "register expected");
- return false;
- }
-
- if (SPReg == ARM::SP || SPReg == ARM::PC) {
- Parser.eatToEndOfStatement();
- Error(SPRegLoc, "sp and pc are not permitted in .movsp directive");
- return false;
- }
+ if (SPReg == -1)
+ return Error(SPRegLoc, "register expected");
+ if (SPReg == ARM::SP || SPReg == ARM::PC)
+ return Error(SPRegLoc, "sp and pc are not permitted in .movsp directive");
int64_t Offset = 0;
- if (Parser.getTok().is(AsmToken::Comma)) {
- Parser.Lex();
-
- if (Parser.getTok().isNot(AsmToken::Hash)) {
- Error(Parser.getTok().getLoc(), "expected #constant");
- Parser.eatToEndOfStatement();
- return false;
- }
- Parser.Lex();
+ if (Parser.parseOptionalToken(AsmToken::Comma)) {
+ if (Parser.parseToken(AsmToken::Hash, "expected #constant"))
+ return true;
const MCExpr *OffsetExpr;
SMLoc OffsetLoc = Parser.getTok().getLoc();
- if (Parser.parseExpression(OffsetExpr)) {
- Parser.eatToEndOfStatement();
- Error(OffsetLoc, "malformed offset expression");
- return false;
- }
+
+ if (Parser.parseExpression(OffsetExpr))
+ return Error(OffsetLoc, "malformed offset expression");
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
- if (!CE) {
- Parser.eatToEndOfStatement();
- Error(OffsetLoc, "offset must be an immediate constant");
- return false;
- }
+ if (!CE)
+ return Error(OffsetLoc, "offset must be an immediate constant");
Offset = CE->getValue();
}
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.movsp' directive"))
+ return true;
+
getTargetStreamer().emitMovSP(SPReg, Offset);
UC.saveFPReg(SPReg);
@@ -10095,11 +10162,8 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
/// ::= .object_arch name
bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
MCAsmParser &Parser = getParser();
- if (getLexer().isNot(AsmToken::Identifier)) {
- Error(getLexer().getLoc(), "unexpected token");
- Parser.eatToEndOfStatement();
- return false;
- }
+ if (getLexer().isNot(AsmToken::Identifier))
+ return Error(getLexer().getLoc(), "unexpected token");
StringRef Arch = Parser.getTok().getString();
SMLoc ArchLoc = Parser.getTok().getLoc();
@@ -10107,19 +10171,12 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
unsigned ID = ARM::parseArch(Arch);
- if (ID == ARM::AK_INVALID) {
- Error(ArchLoc, "unknown architecture '" + Arch + "'");
- Parser.eatToEndOfStatement();
- return false;
- }
+ if (ID == ARM::AK_INVALID)
+ return Error(ArchLoc, "unknown architecture '" + Arch + "'");
+ if (parseToken(AsmToken::EndOfStatement))
+ return true;
getTargetStreamer().emitObjectArch(ID);
-
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- Error(getLexer().getLoc(), "unexpected token");
- Parser.eatToEndOfStatement();
- }
-
return false;
}
@@ -10128,18 +10185,17 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
bool ARMAsmParser::parseDirectiveAlign(SMLoc L) {
// NOTE: if this is not the end of the statement, fall back to the target
// agnostic handling for this directive which will correctly handle this.
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return true;
-
- // '.align' is target specifically handled to mean 2**2 byte alignment.
- const MCSection *Section = getStreamer().getCurrentSection().first;
- assert(Section && "must have section to emit alignment");
- if (Section->UseCodeAlign())
- getStreamer().EmitCodeAlignment(4, 0);
- else
- getStreamer().EmitValueToAlignment(4, 0, 1, 0);
-
- return false;
+ if (parseOptionalToken(AsmToken::EndOfStatement)) {
+ // '.align' is target specifically handled to mean 2**2 byte alignment.
+ const MCSection *Section = getStreamer().getCurrentSectionOnly();
+ assert(Section && "must have section to emit alignment");
+ if (Section->UseCodeAlign())
+ getStreamer().EmitCodeAlignment(4, 0);
+ else
+ getStreamer().EmitValueToAlignment(4, 0, 1, 0);
+ return false;
+ }
+ return true;
}
/// parseDirectiveThumbSet
@@ -10148,18 +10204,10 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
MCAsmParser &Parser = getParser();
StringRef Name;
- if (Parser.parseIdentifier(Name)) {
- TokError("expected identifier after '.thumb_set'");
- Parser.eatToEndOfStatement();
- return false;
- }
-
- if (getLexer().isNot(AsmToken::Comma)) {
- TokError("expected comma after name '" + Name + "'");
- Parser.eatToEndOfStatement();
- return false;
- }
- Lex();
+ if (check(Parser.parseIdentifier(Name),
+ "expected identifier after '.thumb_set'") ||
+ parseToken(AsmToken::Comma, "expected comma after name '" + Name + "'"))
+ return true;
MCSymbol *Sym;
const MCExpr *Value;
@@ -10173,10 +10221,10 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
/// Force static initialization.
extern "C" void LLVMInitializeARMAsmParser() {
- RegisterMCAsmParser<ARMAsmParser> X(TheARMLETarget);
- RegisterMCAsmParser<ARMAsmParser> Y(TheARMBETarget);
- RegisterMCAsmParser<ARMAsmParser> A(TheThumbLETarget);
- RegisterMCAsmParser<ARMAsmParser> B(TheThumbBETarget);
+ RegisterMCAsmParser<ARMAsmParser> X(getTheARMLETarget());
+ RegisterMCAsmParser<ARMAsmParser> Y(getTheARMBETarget());
+ RegisterMCAsmParser<ARMAsmParser> A(getTheThumbLETarget());
+ RegisterMCAsmParser<ARMAsmParser> B(getTheThumbBETarget());
}
#define GET_REGISTER_MATCHER
@@ -10218,16 +10266,17 @@ static const struct {
bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
MCAsmParser &Parser = getParser();
- if (getLexer().isNot(AsmToken::Identifier)) {
- Error(getLexer().getLoc(), "unexpected token");
- Parser.eatToEndOfStatement();
- return false;
- }
+ if (getLexer().isNot(AsmToken::Identifier))
+ return Error(getLexer().getLoc(), "expected architecture extension name");
StringRef Name = Parser.getTok().getString();
SMLoc ExtLoc = Parser.getTok().getLoc();
Lex();
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.arch_extension' directive"))
+ return true;
+
bool EnableFeature = true;
if (Name.startswith_lower("no")) {
EnableFeature = false;
@@ -10235,20 +10284,19 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
}
unsigned FeatureKind = ARM::parseArchExt(Name);
if (FeatureKind == ARM::AEK_INVALID)
- Error(ExtLoc, "unknown architectural extension: " + Name);
+ return Error(ExtLoc, "unknown architectural extension: " + Name);
for (const auto &Extension : Extensions) {
if (Extension.Kind != FeatureKind)
continue;
if (Extension.Features.none())
- report_fatal_error("unsupported architectural extension: " + Name);
+ return Error(ExtLoc, "unsupported architectural extension: " + Name);
- if ((getAvailableFeatures() & Extension.ArchCheck) != Extension.ArchCheck) {
- Error(ExtLoc, "architectural extension '" + Name + "' is not "
- "allowed for the current base architecture");
- return false;
- }
+ if ((getAvailableFeatures() & Extension.ArchCheck) != Extension.ArchCheck)
+ return Error(ExtLoc, "architectural extension '" + Name +
+ "' is not "
+ "allowed for the current base architecture");
MCSubtargetInfo &STI = copySTI();
FeatureBitset ToggleFeatures = EnableFeature
@@ -10261,9 +10309,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
return false;
}
- Error(ExtLoc, "unknown architectural extension: " + Name);
- Parser.eatToEndOfStatement();
- return false;
+ return Error(ExtLoc, "unknown architectural extension: " + Name);
}
// Define this matcher function after the auto-generated include so we
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 3196a57ccc3e..ac3d8c780af2 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -861,13 +861,13 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
extern "C" void LLVMInitializeARMDisassembler() {
- TargetRegistry::RegisterMCDisassembler(TheARMLETarget,
+ TargetRegistry::RegisterMCDisassembler(getTheARMLETarget(),
createARMDisassembler);
- TargetRegistry::RegisterMCDisassembler(TheARMBETarget,
+ TargetRegistry::RegisterMCDisassembler(getTheARMBETarget(),
createARMDisassembler);
- TargetRegistry::RegisterMCDisassembler(TheThumbLETarget,
+ TargetRegistry::RegisterMCDisassembler(getTheThumbLETarget(),
createThumbDisassembler);
- TargetRegistry::RegisterMCDisassembler(TheThumbBETarget,
+ TargetRegistry::RegisterMCDisassembler(getTheThumbBETarget(),
createThumbDisassembler);
}
@@ -1432,7 +1432,7 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
case ARM::STC_POST:
case ARM::STCL_POST:
imm |= U << 8;
- // fall through.
+ LLVM_FALLTHROUGH;
default:
// The 'option' variant doesn't encode 'U' in the immediate since
// the immediate is unsigned [0,255].
@@ -2555,6 +2555,7 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
break;
}
// Fall through to handle the register offset variant.
+ LLVM_FALLTHROUGH;
case ARM::VLD1d8wb_fixed:
case ARM::VLD1d16wb_fixed:
case ARM::VLD1d32wb_fixed:
@@ -4157,7 +4158,7 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
case 0x93: // faultmask_ns
if (!(FeatureBits[ARM::HasV8MMainlineOps]))
return MCDisassembler::Fail;
- // fall through
+ LLVM_FALLTHROUGH;
case 10: // msplim
case 11: // psplim
case 0x88: // msp_ns
@@ -5310,4 +5311,3 @@ static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val,
return S;
}
-
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index e81bb77dbdfc..3667952d44c0 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -726,6 +726,12 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
+ assert(std::is_sorted(MI->begin() + OpNum, MI->end(),
+ [&](const MCOperand &LHS, const MCOperand &RHS) {
+ return MRI.getEncodingValue(LHS.getReg()) <
+ MRI.getEncodingValue(RHS.getReg());
+ }));
+
O << "{";
for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
if (i != OpNum)
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 0fc758201d47..a58d5b34131b 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -375,7 +375,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_movt_hi16:
if (!IsPCRel)
Value >>= 16;
- // Fallthrough
+ LLVM_FALLTHROUGH;
case ARM::fixup_arm_movw_lo16: {
unsigned Hi4 = (Value & 0xF000) >> 12;
unsigned Lo12 = Value & 0x0FFF;
@@ -387,7 +387,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_t2_movt_hi16:
if (!IsPCRel)
Value >>= 16;
- // Fallthrough
+ LLVM_FALLTHROUGH;
case ARM::fixup_t2_movw_lo16: {
unsigned Hi4 = (Value & 0xF000) >> 12;
unsigned i = (Value & 0x800) >> 11;
@@ -403,7 +403,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_ldst_pcrel_12:
// ARM PC-relative values are offset by 8.
Value -= 4;
- // FALLTHROUGH
+ LLVM_FALLTHROUGH;
case ARM::fixup_t2_ldst_pcrel_12: {
// Offset by 4, adjusted by two due to the half-word ordering of thumb.
Value -= 4;
@@ -541,7 +541,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
//
// Note that the halfwords are stored high first, low second; so we need
// to transpose the fixup value here to map properly.
- if (Ctx && Value % 4 != 0) {
+ if (Ctx && Value % 4 != 0) {
Ctx->reportError(Fixup.getLoc(), "misaligned ARM call destination");
return 0;
}
@@ -578,6 +578,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Offset by 4, and don't encode the low two bits.
return ((Value - 4) >> 2) & 0xff;
case ARM::fixup_arm_thumb_cb: {
+ // CB instructions can only branch to offsets in [4, 126] in multiples of 2
+ // so ensure that the raw value LSB is zero and it lies in [2, 130].
+ // An offset of 2 will be relaxed to a NOP.
+ if (Ctx && ((int64_t)Value < 2 || Value > 0x82 || Value & 1)) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
// Offset by 4 and don't encode the lower bit, which is always 0.
// FIXME: diagnose if no Thumb2
uint32_t Binary = (Value - 4) >> 1;
@@ -623,7 +630,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_pcrel_10:
Value = Value - 4; // ARM fixups offset by an additional word and don't
// need to adjust for the half-word ordering.
- // Fall through.
+ LLVM_FALLTHROUGH;
case ARM::fixup_t2_pcrel_10: {
// Offset by 4, adjusted by two due to the half-word ordering of thumb.
Value = Value - 4;
@@ -650,7 +657,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_pcrel_9:
Value = Value - 4; // ARM fixups offset by an additional word and don't
// need to adjust for the half-word ordering.
- // Fall through.
+ LLVM_FALLTHROUGH;
case ARM::fixup_t2_pcrel_9: {
// Offset by 4, adjusted by two due to the half-word ordering of thumb.
Value = Value - 4;
@@ -696,14 +703,16 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
bool &IsResolved) {
const MCSymbolRefExpr *A = Target.getSymA();
const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
- // Some fixups to thumb function symbols need the low bit (thumb bit)
- // twiddled.
- if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
- (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 &&
- (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 &&
- (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
- (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
- (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
+ // MachO (the only user of "Value") tries to make .o files that look vaguely
+ // pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit
+ // into the addend if possible. Other relocation types don't want this bit
+ // though (branches couldn't encode it if it *was* present, and no other
+ // relocations exist) and it can interfere with checking valid expressions.
+ if ((unsigned)Fixup.getKind() == FK_Data_4 ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_movw_lo16 ||
+ (unsigned)Fixup.getKind() == ARM::fixup_arm_movt_hi16 ||
+ (unsigned)Fixup.getKind() == ARM::fixup_t2_movw_lo16 ||
+ (unsigned)Fixup.getKind() == ARM::fixup_t2_movt_hi16) {
if (Sym) {
if (Asm.isThumbFunc(Sym))
Value |= 1;
@@ -1111,6 +1120,7 @@ static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) {
MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const Triple &TheTriple, StringRef CPU,
+ const MCTargetOptions &Options,
bool isLittle) {
switch (TheTriple.getObjectFormat()) {
default:
@@ -1131,24 +1141,28 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU) {
- return createARMAsmBackend(T, MRI, TT, CPU, true);
+ const Triple &TT, StringRef CPU,
+ const MCTargetOptions &Options) {
+ return createARMAsmBackend(T, MRI, TT, CPU, Options, true);
}
MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU) {
- return createARMAsmBackend(T, MRI, TT, CPU, false);
+ const Triple &TT, StringRef CPU,
+ const MCTargetOptions &Options) {
+ return createARMAsmBackend(T, MRI, TT, CPU, Options, false);
}
MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU) {
- return createARMAsmBackend(T, MRI, TT, CPU, true);
+ const Triple &TT, StringRef CPU,
+ const MCTargetOptions &Options) {
+ return createARMAsmBackend(T, MRI, TT, CPU, Options, true);
}
MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU) {
- return createARMAsmBackend(T, MRI, TT, CPU, false);
+ const Triple &TT, StringRef CPU,
+ const MCTargetOptions &Options) {
+ return createARMAsmBackend(T, MRI, TT, CPU, Options, false);
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 4118fe8e8cdb..6f19754b899e 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -140,6 +140,12 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case ARM::fixup_t2_movw_lo16:
Type = ELF::R_ARM_THM_MOVW_PREL_NC;
break;
+ case ARM::fixup_arm_thumb_br:
+ Type = ELF::R_ARM_THM_JUMP11;
+ break;
+ case ARM::fixup_arm_thumb_bcc:
+ Type = ELF::R_ARM_THM_JUMP8;
+ break;
case ARM::fixup_arm_thumb_bl:
case ARM::fixup_arm_thumb_blx:
switch (Modifier) {
@@ -221,6 +227,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_TLSDESC:
Type = ELF::R_ARM_TLS_GOTDESC;
break;
+ case MCSymbolRefExpr::VK_TLSLDM:
+ Type = ELF::R_ARM_TLS_LDM32;
+ break;
case MCSymbolRefExpr::VK_ARM_TLSDESCSEQ:
Type = ELF::R_ARM_TLS_DESCSEQ;
break;
@@ -239,10 +248,26 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
Type = ELF::R_ARM_JUMP24;
break;
case ARM::fixup_arm_movt_hi16:
- Type = ELF::R_ARM_MOVT_ABS;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_MOVT_ABS;
+ break;
+ case MCSymbolRefExpr::VK_ARM_SBREL:
+ Type = ELF:: R_ARM_MOVT_BREL;
+ break;
+ }
break;
case ARM::fixup_arm_movw_lo16:
- Type = ELF::R_ARM_MOVW_ABS_NC;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_MOVW_ABS_NC;
+ break;
+ case MCSymbolRefExpr::VK_ARM_SBREL:
+ Type = ELF:: R_ARM_MOVW_BREL_NC;
+ break;
+ }
break;
case ARM::fixup_t2_movt_hi16:
Type = ELF::R_ARM_THM_MOVT_ABS;
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 36cb74765f3b..f6bb35d2326b 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -591,7 +591,7 @@ private:
void FlushPendingOffset();
void FlushUnwindOpcodes(bool NoHandlerData);
- void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
+ void SwitchToEHSection(StringRef Prefix, unsigned Type, unsigned Flags,
SectionKind Kind, const MCSymbol &Fn);
void SwitchToExTabSection(const MCSymbol &FnStart);
void SwitchToExIdxSection(const MCSymbol &FnStart);
@@ -1074,7 +1074,7 @@ void ARMELFStreamer::reset() {
getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
}
-inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
+inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix,
unsigned Type,
unsigned Flags,
SectionKind Kind,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 53cd29a6061e..1e062ad45af5 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -90,6 +90,7 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
PrivateGlobalPrefix = "$M";
PrivateLabelPrefix = "$M";
+ CommentString = ";";
}
void ARMCOFFMCAsmInfoGNU::anchor() { }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 9fca13eeea93..559a4f8de75f 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1493,7 +1493,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
case ARM_AM::lsl: SBits = 0x0; break;
case ARM_AM::lsr: SBits = 0x2; break;
case ARM_AM::asr: SBits = 0x4; break;
- case ARM_AM::rrx: // FALLTHROUGH
+ case ARM_AM::rrx: LLVM_FALLTHROUGH;
case ARM_AM::ror: SBits = 0x6; break;
}
@@ -1545,8 +1545,15 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
else
Binary |= NumRegs * 2;
} else {
+ const MCRegisterInfo &MRI = *CTX.getRegisterInfo();
+ assert(std::is_sorted(MI.begin() + Op, MI.end(),
+ [&](const MCOperand &LHS, const MCOperand &RHS) {
+ return MRI.getEncodingValue(LHS.getReg()) <
+ MRI.getEncodingValue(RHS.getReg());
+ }));
+
for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
- unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(MI.getOperand(I).getReg());
+ unsigned RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg());
Binary |= 1 << RegNo;
}
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index afb089ab0286..9e4d202321e6 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -204,7 +204,8 @@ static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
MCAsmBackend &MAB, raw_pwrite_stream &OS,
MCCodeEmitter *Emitter, bool RelaxAll) {
return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
- T.getArch() == Triple::thumb);
+ (T.getArch() == Triple::thumb ||
+ T.getArch() == Triple::thumbeb));
}
static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB,
@@ -273,8 +274,8 @@ static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
// Force static initialization.
extern "C" void LLVMInitializeARMTargetMC() {
- for (Target *T : {&TheARMLETarget, &TheARMBETarget, &TheThumbLETarget,
- &TheThumbBETarget}) {
+ for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget(),
+ &getTheThumbLETarget(), &getTheThumbBETarget()}) {
// Register the MC asm info.
RegisterMCAsmInfoFn X(*T, createARMMCAsmInfo);
@@ -313,16 +314,18 @@ extern "C" void LLVMInitializeARMTargetMC() {
}
// Register the MC Code Emitter
- for (Target *T : {&TheARMLETarget, &TheThumbLETarget})
+ for (Target *T : {&getTheARMLETarget(), &getTheThumbLETarget()})
TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter);
- for (Target *T : {&TheARMBETarget, &TheThumbBETarget})
+ for (Target *T : {&getTheARMBETarget(), &getTheThumbBETarget()})
TargetRegistry::RegisterMCCodeEmitter(*T, createARMBEMCCodeEmitter);
// Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(TheARMLETarget, createARMLEAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheARMBETarget, createARMBEAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheThumbLETarget,
+ TargetRegistry::RegisterMCAsmBackend(getTheARMLETarget(),
+ createARMLEAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(getTheARMBETarget(),
+ createARMBEAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(getTheThumbLETarget(),
createThumbLEAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheThumbBETarget,
+ TargetRegistry::RegisterMCAsmBackend(getTheThumbBETarget(),
createThumbBEAsmBackend);
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index c2bbc8e828c4..ba834201e585 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -28,6 +28,7 @@ class MCObjectWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
class MCStreamer;
+class MCTargetOptions;
class MCRelocationInfo;
class MCTargetStreamer;
class StringRef;
@@ -36,8 +37,10 @@ class Triple;
class raw_ostream;
class raw_pwrite_stream;
-extern Target TheARMLETarget, TheThumbLETarget;
-extern Target TheARMBETarget, TheThumbBETarget;
+Target &getTheARMLETarget();
+Target &getTheThumbLETarget();
+Target &getTheARMBETarget();
+Target &getTheThumbBETarget();
namespace ARM_MC {
std::string ParseARMTriple(const Triple &TT, StringRef CPU);
@@ -66,21 +69,26 @@ MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI,
const Triple &TT, StringRef CPU,
+ const MCTargetOptions &Options,
bool IsLittleEndian);
MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU);
+ const Triple &TT, StringRef CPU,
+ const MCTargetOptions &Options);
MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU);
+ const Triple &TT, StringRef CPU,
+ const MCTargetOptions &Options);
MCAsmBackend *createThumbLEAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU);
+ const Triple &TT, StringRef CPU,
+ const MCTargetOptions &Options);
MCAsmBackend *createThumbBEAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU);
+ const Triple &TT, StringRef CPU,
+ const MCTargetOptions &Options);
// Construct a PE/COFF machine code streamer which will generate a PE/COFF
// object file.
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index cfa6ce7da65e..b77181f29b2d 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -208,7 +208,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
if (Asm.isThumbFunc(A))
FixedValue &= 0xfffffffe;
MovtBit = 1;
- // Fallthrough
+ LLVM_FALLTHROUGH;
case ARM::fixup_t2_movw_lo16:
ThumbBit = 1;
break;
diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
index 7f2124033982..744761bcddb8 100644
--- a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -43,7 +43,7 @@ namespace {
bool runOnMachineFunction(MachineFunction &Fn) override;
- const char *getPassName() const override {
+ StringRef getPassName() const override {
return "ARM MLA / MLS expansion pass";
}
@@ -334,18 +334,15 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
unsigned Skip = 0;
MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
while (MII != E) {
- MachineInstr *MI = &*MII;
+ MachineInstr *MI = &*MII++;
- if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy()) {
- ++MII;
+ if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy())
continue;
- }
const MCInstrDesc &MCID = MI->getDesc();
if (MI->isBarrier()) {
clearStack();
Skip = 0;
- ++MII;
continue;
}
@@ -365,13 +362,9 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
pushStack(MI);
else {
ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
- E = MBB.rend(); // May have changed if MI was the 1st instruction.
Changed = true;
- continue;
}
}
-
- ++MII;
}
return Changed;
diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index 3f88eb818062..caa69f8d71b7 100644
--- a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -11,17 +11,31 @@
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-Target llvm::TheARMLETarget, llvm::TheARMBETarget;
-Target llvm::TheThumbLETarget, llvm::TheThumbBETarget;
+Target &llvm::getTheARMLETarget() {
+ static Target TheARMLETarget;
+ return TheARMLETarget;
+}
+Target &llvm::getTheARMBETarget() {
+ static Target TheARMBETarget;
+ return TheARMBETarget;
+}
+Target &llvm::getTheThumbLETarget() {
+ static Target TheThumbLETarget;
+ return TheThumbLETarget;
+}
+Target &llvm::getTheThumbBETarget() {
+ static Target TheThumbBETarget;
+ return TheThumbBETarget;
+}
extern "C" void LLVMInitializeARMTargetInfo() {
- RegisterTarget<Triple::arm, /*HasJIT=*/true>
- X(TheARMLETarget, "arm", "ARM");
- RegisterTarget<Triple::armeb, /*HasJIT=*/true>
- Y(TheARMBETarget, "armeb", "ARM (big endian)");
+ RegisterTarget<Triple::arm, /*HasJIT=*/true> X(getTheARMLETarget(), "arm",
+ "ARM");
+ RegisterTarget<Triple::armeb, /*HasJIT=*/true> Y(getTheARMBETarget(), "armeb",
+ "ARM (big endian)");
- RegisterTarget<Triple::thumb, /*HasJIT=*/true>
- A(TheThumbLETarget, "thumb", "Thumb");
- RegisterTarget<Triple::thumbeb, /*HasJIT=*/true>
- B(TheThumbBETarget, "thumbeb", "Thumb (big endian)");
+ RegisterTarget<Triple::thumb, /*HasJIT=*/true> A(getTheThumbLETarget(),
+ "thumb", "Thumb");
+ RegisterTarget<Triple::thumbeb, /*HasJIT=*/true> B(
+ getTheThumbBETarget(), "thumbeb", "Thumb (big endian)");
}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index c0732e4b750a..9953c61cd89c 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -26,8 +26,8 @@ Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti)
: ARMFrameLowering(sti) {}
bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
- const MachineFrameInfo *FFI = MF.getFrameInfo();
- unsigned CFSize = FFI->getMaxCallFrameSize();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ unsigned CFSize = MFI.getMaxCallFrameSize();
// It's not always a good idea to include the call frame as part of the
// stack frame. ARM (especially Thumb) has small immediate offset to
// address the stack frame. So a large call frame can cause poor codegen
@@ -35,7 +35,7 @@ bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
return false;
- return !MF.getFrameInfo()->hasVarSizedObjects();
+ return !MFI.hasVarSizedObjects();
}
static void emitSPUpdate(MachineBasicBlock &MBB,
@@ -85,7 +85,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
@@ -95,10 +95,10 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
*static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
- unsigned NumBytes = MFI->getStackSize();
+ unsigned NumBytes = MFI.getStackSize();
assert(NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -110,7 +110,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
// Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
NumBytes = (NumBytes + 3) & ~3;
- MFI->setStackSize(NumBytes);
+ MFI.setStackSize(NumBytes);
// Determine the sizes of each callee-save spill areas and record which frame
// belongs to which callee-save spill areas.
@@ -121,7 +121,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
MachineInstr::FrameSetup);
CFAOffset -= ArgRegsSaveSize;
- unsigned CFIIndex = MMI.addFrameInst(
+ unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
@@ -133,7 +133,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -(NumBytes - ArgRegsSaveSize),
MachineInstr::FrameSetup);
CFAOffset -= NumBytes - ArgRegsSaveSize;
- unsigned CFIIndex = MMI.addFrameInst(
+ unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
@@ -150,11 +150,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R9:
case ARM::R10:
case ARM::R11:
- if (STI.splitFramePushPop()) {
+ if (STI.splitFramePushPop(MF)) {
GPRCS2Size += 4;
break;
}
- // fallthrough
+ LLVM_FALLTHROUGH;
case ARM::R4:
case ARM::R5:
case ARM::R6:
@@ -179,7 +179,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
bool HasFP = hasFP(MF);
if (HasFP)
- AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+ AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
@@ -188,7 +188,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
int FramePtrOffsetInBlock = 0;
unsigned adjustedGPRCS1Size = GPRCS1Size;
- if (tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) {
+ if (GPRCS1Size > 0 && GPRCS2Size == 0 &&
+ tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) {
FramePtrOffsetInBlock = NumBytes;
adjustedGPRCS1Size += NumBytes;
NumBytes = 0;
@@ -196,7 +197,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
if (adjustedGPRCS1Size) {
CFAOffset -= adjustedGPRCS1Size;
- unsigned CFIIndex = MMI.addFrameInst(
+ unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
@@ -212,7 +213,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.splitFramePushPop())
+ if (STI.splitFramePushPop(MF))
break;
// fallthough
case ARM::R0:
@@ -224,8 +225,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R6:
case ARM::R7:
case ARM::LR:
- unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI)));
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -236,20 +237,20 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
// Adjust FP so it point to the stack slot that contains the previous FP.
if (HasFP) {
FramePtrOffsetInBlock +=
- MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
+ MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4)
.setMIFlags(MachineInstr::FrameSetup));
if(FramePtrOffsetInBlock) {
CFAOffset += FramePtrOffsetInBlock;
- unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
} else {
unsigned CFIIndex =
- MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
nullptr, MRI->getDwarfRegNum(FramePtr, true)));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
@@ -261,13 +262,55 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
AFI->setShouldRestoreSPFromFP(true);
}
+ // Skip past the spilling of r8-r11, which could consist of multiple tPUSH
+ // and tMOVr instructions. We don't need to add any call frame information
+ // in-between these instructions, because they do not modify the high
+ // registers.
+ while (true) {
+ MachineBasicBlock::iterator OldMBBI = MBBI;
+ // Skip a run of tMOVr instructions
+ while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr)
+ MBBI++;
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+ MBBI++;
+ } else {
+ // We have reached an instruction which is not a push, so the previous
+ // run of tMOVr instructions (which may have been empty) was not part of
+ // the prologue. Reset MBBI back to the last PUSH of the prologue.
+ MBBI = OldMBBI;
+ break;
+ }
+ }
+
+ // Emit call frame information for the callee-saved high registers.
+ for (auto &I : CSI) {
+ unsigned Reg = I.getReg();
+ int FI = I.getFrameIdx();
+ switch (Reg) {
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ case ARM::R12: {
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
if (NumBytes) {
// Insert it after all the callee-save spills.
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
MachineInstr::FrameSetup);
if (!HasFP) {
CFAOffset -= NumBytes;
- unsigned CFIIndex = MMI.addFrameInst(
+ unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
@@ -276,8 +319,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
}
if (STI.isTargetELF() && HasFP)
- MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
- AFI->getFramePtrSpillOffset());
+ MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
@@ -299,7 +342,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
// If the frame has variable sized objects then the epilogue must restore
// the sp from fp. We can assume there's an FP here since hasFP already
// checks for hasVarSizedObjects.
- if (MFI->hasVarSizedObjects())
+ if (MFI.hasVarSizedObjects())
AFI->setShouldRestoreSPFromFP(true);
}
@@ -308,12 +351,12 @@ static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) {
isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs))
return true;
else if (MI.getOpcode() == ARM::tPOP) {
- // The first two operands are predicates. The last two are
- // imp-def and imp-use of SP. Check everything in between.
- for (int i = 2, e = MI.getNumOperands() - 2; i != e; ++i)
- if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
- return false;
return true;
+ } else if (MI.getOpcode() == ARM::tMOVr) {
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
+ return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) &&
+ ARM::hGPRRegClass.contains(Dst));
}
return false;
}
@@ -322,7 +365,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const ThumbRegisterInfo *RegInfo =
static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
@@ -330,7 +373,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
*static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
- int NumBytes = (int)MFI->getStackSize();
+ int NumBytes = (int)MFI.getStackSize();
assert((unsigned)NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
@@ -361,7 +404,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
// frame pointer stack slot, the target is ELF and the function has FP, or
// the target uses var sized objects.
if (NumBytes) {
- assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&
+ assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
"No scratch register to restore SP from FP!");
emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
TII, *RegInfo);
@@ -405,7 +448,7 @@ bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const {
return true;
// LR cannot be encoded with Thumb1, i.e., it requires a special fix-up.
- for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo())
+ for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo())
if (CSI.getReg() == ARM::LR)
return true;
@@ -568,6 +611,19 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
return true;
}
+// Return the first iteraror after CurrentReg which is present in EnabledRegs,
+// or OrderEnd if no further registers are in that set. This does not advance
+// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs.
+template <unsigned SetSize>
+static const unsigned *
+findNextOrderedReg(const unsigned *CurrentReg,
+ SmallSet<unsigned, SetSize> &EnabledRegs,
+ const unsigned *OrderEnd) {
+ while (CurrentReg != OrderEnd && !EnabledRegs.count(*CurrentReg))
+ ++CurrentReg;
+ return CurrentReg;
+}
+
bool Thumb1FrameLowering::
spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
@@ -578,29 +634,114 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
DebugLoc DL;
const TargetInstrInfo &TII = *STI.getInstrInfo();
+ MachineFunction &MF = *MBB.getParent();
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
+
+ SmallSet<unsigned, 9> LoRegsToSave; // r0-r7, lr
+ SmallSet<unsigned, 4> HiRegsToSave; // r8-r11
+ SmallSet<unsigned, 9> CopyRegs; // Registers which can be used after pushing
+ // LoRegs for saving HiRegs.
- MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
- AddDefaultPred(MIB);
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- bool isKill = true;
- // Add the callee-saved register as live-in unless it's LR and
- // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
- // then it's already added to the function and entry block live-in sets.
- if (Reg == ARM::LR) {
- MachineFunction &MF = *MBB.getParent();
- if (MF.getFrameInfo()->isReturnAddressTaken() &&
- MF.getRegInfo().isLiveIn(Reg))
- isKill = false;
+ if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
+ LoRegsToSave.insert(Reg);
+ } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
+ HiRegsToSave.insert(Reg);
+ } else {
+ llvm_unreachable("callee-saved register of unexpected class");
+ }
+
+ if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) &&
+ !MF.getRegInfo().isLiveIn(Reg) &&
+ !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
+ CopyRegs.insert(Reg);
+ }
+
+ // Unused argument registers can be used for the high register saving.
+ for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
+ if (!MF.getRegInfo().isLiveIn(ArgReg))
+ CopyRegs.insert(ArgReg);
+
+ // Push the low registers and lr
+ if (!LoRegsToSave.empty()) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
+ AddDefaultPred(MIB);
+ for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
+ if (LoRegsToSave.count(Reg)) {
+ bool isKill = !MF.getRegInfo().isLiveIn(Reg);
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ MIB.addReg(Reg, getKillRegState(isKill));
+ }
+ }
+ MIB.setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ // Push the high registers. There are no store instructions that can access
+ // these registers directly, so we have to move them to low registers, and
+ // push them. This might take multiple pushes, as it is possible for there to
+ // be fewer low registers available than high registers which need saving.
+
+ // These are in reverse order so that in the case where we need to use
+ // multiple PUSH instructions, the order of the registers on the stack still
+ // matches the unwind info. They need to be swicthed back to ascending order
+ // before adding to the PUSH instruction.
+ static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6,
+ ARM::R5, ARM::R4, ARM::R3,
+ ARM::R2, ARM::R1, ARM::R0};
+ static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8};
+
+ const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
+ const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
+
+ // Find the first register to save.
+ const unsigned *HiRegToSave = findNextOrderedReg(
+ std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd);
+
+ while (HiRegToSave != AllHighRegsEnd) {
+ // Find the first low register to use.
+ const unsigned *CopyReg =
+ findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
+
+ // Create the PUSH, but don't insert it yet (the MOVs need to come first).
+ MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH));
+ AddDefaultPred(PushMIB);
+
+ SmallVector<unsigned, 4> RegsToPush;
+ while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
+ if (HiRegsToSave.count(*HiRegToSave)) {
+ bool isKill = !MF.getRegInfo().isLiveIn(*HiRegToSave);
+ if (isKill)
+ MBB.addLiveIn(*HiRegToSave);
+
+ // Emit a MOV from the high reg to the low reg.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr));
+ MIB.addReg(*CopyReg, RegState::Define);
+ MIB.addReg(*HiRegToSave, getKillRegState(isKill));
+ AddDefaultPred(MIB);
+
+ // Record the register that must be added to the PUSH.
+ RegsToPush.push_back(*CopyReg);
+
+ CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
+ HiRegToSave =
+ findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd);
+ }
}
- if (isKill)
- MBB.addLiveIn(Reg);
+ // Add the low registers to the PUSH, in ascending order.
+ for (unsigned Reg : reverse(RegsToPush))
+ PushMIB.addReg(Reg, RegState::Kill);
- MIB.addReg(Reg, getKillRegState(isKill));
+ // Insert the PUSH instruction after the MOVs.
+ MBB.insert(MI, PushMIB);
}
- MIB.setMIFlags(MachineInstr::FrameSetup);
+
return true;
}
@@ -615,15 +756,101 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const TargetInstrInfo &TII = *STI.getInstrInfo();
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
bool isVarArg = AFI->getArgRegsSaveSize() > 0;
DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
+
+ SmallSet<unsigned, 9> LoRegsToRestore;
+ SmallSet<unsigned, 4> HiRegsToRestore;
+ // Low registers (r0-r7) which can be used to restore the high registers.
+ SmallSet<unsigned, 9> CopyRegs;
+
+ for (CalleeSavedInfo I : CSI) {
+ unsigned Reg = I.getReg();
+
+ if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
+ LoRegsToRestore.insert(Reg);
+ } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
+ HiRegsToRestore.insert(Reg);
+ } else {
+ llvm_unreachable("callee-saved register of unexpected class");
+ }
+
+ // If this is a low register not used as the frame pointer, we may want to
+ // use it for restoring the high registers.
+ if ((ARM::tGPRRegClass.contains(Reg)) &&
+ !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
+ CopyRegs.insert(Reg);
+ }
+
+ // If this is a return block, we may be able to use some unused return value
+ // registers for restoring the high regs.
+ auto Terminator = MBB.getFirstTerminator();
+ if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
+ CopyRegs.insert(ARM::R0);
+ CopyRegs.insert(ARM::R1);
+ CopyRegs.insert(ARM::R2);
+ CopyRegs.insert(ARM::R3);
+ for (auto Op : Terminator->implicit_operands()) {
+ if (Op.isReg())
+ CopyRegs.erase(Op.getReg());
+ }
+ }
+
+ static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7};
+ static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11};
+
+ const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
+ const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
+
+ // Find the first register to restore.
+ auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs),
+ HiRegsToRestore, AllHighRegsEnd);
+
+ while (HiRegToRestore != AllHighRegsEnd) {
+ assert(!CopyRegs.empty());
+ // Find the first low register to use.
+ auto CopyReg =
+ findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
+
+ // Create the POP instruction.
+ MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP));
+ AddDefaultPred(PopMIB);
+
+ while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
+ // Add the low register to the POP.
+ PopMIB.addReg(*CopyReg, RegState::Define);
+
+ // Create the MOV from low to high register.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr));
+ MIB.addReg(*HiRegToRestore, RegState::Define);
+ MIB.addReg(*CopyReg, RegState::Kill);
+ AddDefaultPred(MIB);
+
+ CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
+ HiRegToRestore =
+ findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd);
+ }
+ }
+
+
+
+
MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
AddDefaultPred(MIB);
bool NeedsPop = false;
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
+
+ // High registers (excluding lr) have already been dealt with
+ if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR))
+ continue;
+
if (Reg == ARM::LR) {
if (MBB.succ_empty()) {
// Special epilogue for vararg functions. See emitEpilogue
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 159731d8fc72..4b4fbaab28d9 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -83,7 +83,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
@@ -109,7 +109,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 0c7055551632..d01fc8c40ddf 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -38,10 +38,10 @@ namespace {
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
- const char *getPassName() const override {
+ StringRef getPassName() const override {
return "Thumb IT blocks insertion pass";
}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index e2e6dafd218a..1c731d669eda 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -130,7 +130,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
@@ -170,7 +170,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index c4fdb9b3147d..8208e7e24770 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -148,10 +148,10 @@ namespace {
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
- const char *getPassName() const override {
+ StringRef getPassName() const override {
return "Thumb2 instruction size reduction pass";
}
@@ -430,6 +430,10 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
if (!MBB.getParent()->getFunction()->optForMinSize())
return false;
+ if (!MI->hasOneMemOperand() ||
+ (*MI->memoperands_begin())->getAlignment() < 4)
+ return false;
+
// We're creating a completely different type of load/store - LDM from LDR.
// For this reason we can't reuse the logic at the end of this function; we
// have to implement the MI building here.
@@ -651,7 +655,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
case ARM::t2ADDSri: {
if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
return true;
- // fallthrough
+ LLVM_FALLTHROUGH;
}
case ARM::t2ADDSrr:
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
diff --git a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index 6c26c8843865..2efd63b84a2c 100644
--- a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -126,6 +126,7 @@ static void emitThumbRegPlusImmInReg(
bool CanChangeCC, const TargetInstrInfo &TII,
const ARMBaseRegisterInfo &MRI, unsigned MIFlags = MachineInstr::NoFlags) {
MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
bool isHigh = !isARMLowRegister(DestReg) ||
(BaseReg != 0 && !isARMLowRegister(BaseReg));
bool isSub = false;
@@ -154,6 +155,9 @@ static void emitThumbRegPlusImmInReg(
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
.addReg(LdReg, RegState::Kill)
.setMIFlags(MIFlags);
+ } else if (ST.genExecuteOnly()) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), LdReg)
+ .addImm(NumBytes).setMIFlags(MIFlags);
} else
MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes, ARMCC::AL, 0,
MIFlags);
@@ -511,10 +515,10 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned FrameReg = ARM::SP;
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
- int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MF.getFrameInfo()->getStackSize() + SPAdj;
+ int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
+ MF.getFrameInfo().getStackSize() + SPAdj;
- if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ if (MF.getFrameInfo().hasVarSizedObjects()) {
assert(SPAdj == 0 && STI.getFrameLowering()->hasFP(MF) && "Unexpected");
// There are alloca()'s in this function, must reference off the frame
// pointer or base pointer instead.
@@ -534,7 +538,7 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(STI.getFrameLowering()->hasReservedCallFrame(MF) &&
"Cannot use SP to access the emergency spill slot in "
"functions without a reserved call frame");
- assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
+ assert(!MF.getFrameInfo().hasVarSizedObjects() &&
"Cannot use SP to access the emergency spill slot in "
"functions with variable sized frame objects");
}
@@ -570,7 +574,7 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned TmpReg = MI.getOperand(0).getReg();
bool UseRR = false;
if (Opcode == ARM::tLDRspi) {
- if (FrameReg == ARM::SP)
+ if (FrameReg == ARM::SP || STI.genExecuteOnly())
emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg,
Offset, false, TII, *this);
else {
@@ -594,7 +598,7 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
bool UseRR = false;
if (Opcode == ARM::tSTRspi) {
- if (FrameReg == ARM::SP)
+ if (FrameReg == ARM::SP || STI.genExecuteOnly())
emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg,
Offset, false, TII, *this);
else {