aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/ARM
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
commit145449b1e420787bb99721a429341fa6be3adfb6 (patch)
tree1d56ae694a6de602e348dd80165cf881a36600ed /llvm/lib/Target/ARM
parentecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff)
downloadsrc-145449b1e420787bb99721a429341fa6be3adfb6.tar.gz
src-145449b1e420787bb99721a429341fa6be3adfb6.zip
Vendor import of llvm-project main llvmorg-15-init-15358-g53dc0f107877.vendor/llvm-project/llvmorg-15-init-15358-g53dc0f107877
Diffstat (limited to 'llvm/lib/Target/ARM')
-rw-r--r--llvm/lib/Target/ARM/A15SDOptimizer.cpp3
-rw-r--r--llvm/lib/Target/ARM/ARM.h2
-rw-r--r--llvm/lib/Target/ARM/ARM.td163
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.cpp143
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp227
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.h29
-rw-r--r--llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp26
-rw-r--r--llvm/lib/Target/ARM/ARMBaseRegisterInfo.h39
-rw-r--r--llvm/lib/Target/ARM/ARMBlockPlacement.cpp3
-rw-r--r--llvm/lib/Target/ARM/ARMCallingConv.td21
-rw-r--r--llvm/lib/Target/ARM/ARMConstantIslandPass.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp17
-rw-r--r--llvm/lib/Target/ARM/ARMFastISel.cpp45
-rw-r--r--llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp432
-rw-r--r--llvm/lib/Target/ARM/ARMFrameLowering.cpp846
-rw-r--r--llvm/lib/Target/ARM/ARMFrameLowering.h1
-rw-r--r--llvm/lib/Target/ARM/ARMHazardRecognizer.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp35
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp413
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h12
-rw-r--r--llvm/lib/Target/ARM/ARMInstrFormats.td26
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td27
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td89
-rw-r--r--llvm/lib/Target/ARM/ARMInstrNEON.td3
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb2.td7
-rw-r--r--llvm/lib/Target/ARM/ARMInstrVFP.td96
-rw-r--r--llvm/lib/Target/ARM/ARMInstructionSelector.cpp16
-rw-r--r--llvm/lib/Target/ARM/ARMLegalizerInfo.cpp1
-rw-r--r--llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp12
-rw-r--r--llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp7
-rw-r--r--llvm/lib/Target/ARM/ARMMachineFunctionInfo.h8
-rw-r--r--llvm/lib/Target/ARM/ARMParallelDSP.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp7
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterBankInfo.h2
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMSLSHardening.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp9
-rw-r--r--llvm/lib/Target/ARM/ARMSelectionDAGInfo.h1
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.cpp43
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h476
-rw-r--r--llvm/lib/Target/ARM/ARMTargetMachine.cpp28
-rw-r--r--llvm/lib/Target/ARM/ARMTargetMachine.h2
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp50
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp317
-rw-r--r--llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp1287
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp132
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp12
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp6
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp29
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp227
-rw-r--r--llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp57
-rw-r--r--llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp7
-rw-r--r--llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp14
-rw-r--r--llvm/lib/Target/ARM/MVEVPTBlockPass.cpp3
-rw-r--r--llvm/lib/Target/ARM/Thumb1FrameLowering.cpp758
-rw-r--r--llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp6
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.cpp6
-rw-r--r--llvm/lib/Target/ARM/Thumb2SizeReduction.cpp32
-rw-r--r--llvm/lib/Target/ARM/ThumbRegisterInfo.cpp50
64 files changed, 4204 insertions, 2138 deletions
diff --git a/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/llvm/lib/Target/ARM/A15SDOptimizer.cpp
index d0efecad63bc..65da95b0fc8d 100644
--- a/llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -361,9 +361,8 @@ void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
MI = Front.pop_back_val();
// If we have already explored this MachineInstr, ignore it.
- if (Reached.find(MI) != Reached.end())
+ if (!Reached.insert(MI).second)
continue;
- Reached.insert(MI);
if (MI->isPHI()) {
for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
Register Reg = MI->getOperand(I).getReg();
diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h
index 979371bf7cf6..9990078cfdbb 100644
--- a/llvm/lib/Target/ARM/ARM.h
+++ b/llvm/lib/Target/ARM/ARM.h
@@ -57,6 +57,7 @@ Pass *createMVEGatherScatterLoweringPass();
FunctionPass *createARMSLSHardeningPass();
FunctionPass *createARMIndirectThunks();
Pass *createMVELaneInterleavingPass();
+FunctionPass *createARMFixCortexA57AES1742098Pass();
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
@@ -77,6 +78,7 @@ void initializeMVETailPredicationPass(PassRegistry &);
void initializeMVEGatherScatterLoweringPass(PassRegistry &);
void initializeARMSLSHardeningPass(PassRegistry &);
void initializeMVELaneInterleavingPass(PassRegistry &);
+void initializeARMFixCortexA57AES1742098Pass(PassRegistry &);
} // end namespace llvm
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 27edf69b4abf..48559a89a30a 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -19,9 +19,11 @@ include "llvm/Target/Target.td"
// ARM Subtarget state.
//
-def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode",
+// True if compiling for Thumb, false for ARM.
+def ModeThumb : SubtargetFeature<"thumb-mode", "IsThumb",
"true", "Thumb mode">;
+// True if we're using software floating point features.
def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat",
"true", "Use software floating "
"point features.">;
@@ -48,14 +50,18 @@ def FeatureFPRegs64 : SubtargetFeature<"fpregs64", "HasFPRegs64", "true",
"Enable 64-bit FP registers",
[FeatureFPRegs]>;
+// True if the floating point unit supports double precision.
def FeatureFP64 : SubtargetFeature<"fp64", "HasFP64", "true",
"Floating point unit supports "
"double precision",
[FeatureFPRegs64]>;
+// True if subtarget has the full 32 double precision FP registers for VFPv3.
def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true",
"Extend FP to 32 double registers">;
+/// Versions of the VFP flags restricted to single precision, or to
+/// 16 d-registers, or both.
multiclass VFPver<string name, string query, string description,
list<SubtargetFeature> prev,
list<SubtargetFeature> otherimplies,
@@ -100,6 +106,7 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable NEON instructions",
[FeatureVFP3]>;
+// True if subtarget supports half-precision FP conversions.
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision "
"floating point">;
@@ -110,169 +117,211 @@ defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions",
defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP",
[FeatureVFP4], []>;
+// True if subtarget supports half-precision FP operations.
def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
"Enable full half-precision "
"floating point",
[FeatureFPARMv8_D16_SP, FeatureFPRegs16]>;
+// True if subtarget supports half-precision FP fml operations.
def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
"Enable full half-precision "
"floating point fml instructions",
[FeatureFullFP16]>;
+// True if subtarget supports [su]div in Thumb mode.
def FeatureHWDivThumb : SubtargetFeature<"hwdiv",
- "HasHardwareDivideInThumb", "true",
+ "HasDivideInThumbMode", "true",
"Enable divide instructions in Thumb">;
+// True if subtarget supports [su]div in ARM mode.
def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
- "HasHardwareDivideInARM", "true",
+ "HasDivideInARMMode", "true",
"Enable divide instructions in ARM mode">;
// Atomic Support
+
+// True if the subtarget supports DMB / DSB data barrier instructions.
def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
"Has data barrier (dmb/dsb) instructions">;
+// True if the subtarget supports CLREX instructions.
def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true",
"Has v7 clrex instruction">;
+// True if the subtarget supports DFB data barrier instruction.
def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true",
"Has full data barrier (dfb) instruction">;
+// True if the subtarget supports v8 atomics (LDA/LDAEX etc) instructions.
def FeatureAcquireRelease : SubtargetFeature<"acquire-release",
"HasAcquireRelease", "true",
"Has v8 acquire/release (lda/ldaex "
" etc) instructions">;
-def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
+// True if floating point compare + branch is slow.
+def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "IsFPBrccSlow", "true",
"FP compare + branch is slow">;
+// True if the processor supports the Performance Monitor Extensions. These
+// include a generic cycle-counter as well as more fine-grained (often
+// implementation-specific) events.
def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
"Enable support for Performance "
"Monitor extensions">;
// TrustZone Security Extensions
+
+// True if processor supports TrustZone security extensions.
def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
"Enable support for TrustZone "
"security extensions">;
+// True if processor supports ARMv8-M Security Extensions.
def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true",
"Enable support for ARMv8-M "
"Security Extensions">;
+// True if processor supports SHA1 and SHA256.
def FeatureSHA2 : SubtargetFeature<"sha2", "HasSHA2", "true",
"Enable SHA1 and SHA256 support", [FeatureNEON]>;
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
"Enable AES support", [FeatureNEON]>;
+// True if processor supports Cryptography extensions.
def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
"Enable support for "
"Cryptography extensions",
[FeatureNEON, FeatureSHA2, FeatureAES]>;
+// True if processor supports CRC instructions.
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
"Enable support for CRC instructions">;
+// True if the ARMv8.2A dot product instructions are supported.
def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true",
"Enable support for dot product instructions",
[FeatureNEON]>;
-// Not to be confused with FeatureHasRetAddrStack (return address stack)
+// True if the processor supports RAS extensions.
+// Not to be confused with FeatureHasRetAddrStack (return address stack).
def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
"Enable Reliability, Availability "
"and Serviceability extensions">;
-// Fast computation of non-negative address offsets
+// Fast computation of non-negative address offsets.
+// True if processor does positive address offset computation faster.
def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
"Enable fast computation of "
"positive address offsets">;
-// Fast execution of AES crypto operations
+// Fast execution of AES crypto operations.
+// True if processor executes back to back AES instruction pairs faster.
def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true",
"CPU fuses AES crypto operations">;
-// Fast execution of bottom and top halves of literal generation
+// Fast execution of bottom and top halves of literal generation.
+// True if processor executes back to back bottom and top halves of literal generation faster.
def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true",
"CPU fuses literal generation operations">;
-// The way of reading thread pointer
-def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true",
+// The way of reading thread pointer.
+// True if read thread pointer from coprocessor register.
+def FeatureReadTp : SubtargetFeature<"read-tp-hard", "IsReadTPHard", "true",
"Reading thread pointer from register">;
// Cyclone can zero VFP registers in 0 cycles.
+// True if the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
+// particularly effective at zeroing a VFP register.
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
"Has zero-cycle zeroing instructions">;
-// Whether it is profitable to unpredicate certain instructions during if-conversion
+// Whether it is profitable to unpredicate certain instructions during if-conversion.
+// True if if conversion may decide to leave some instructions unpredicated.
def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr",
"IsProfitableToUnpredicate", "true",
"Is profitable to unpredicate">;
// Some targets (e.g. Swift) have microcoded VGETLNi32.
+// True if VMOV will be favored over VGETLNi32.
def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32",
"HasSlowVGETLNi32", "true",
"Has slow VGETLNi32 - prefer VMOV">;
// Some targets (e.g. Swift) have microcoded VDUP32.
+// True if VMOV will be favored over VDUP.
def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32",
"true",
"Has slow VDUP32 - prefer VMOV">;
// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON
// for scalar FP, as this allows more effective execution domain optimization.
+// True if VMOVSR will be favored over VMOVDRR.
def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR",
"true", "Prefer VMOVSR">;
// Swift has ISHST barriers compatible with Atomic Release semantics but weaker
-// than ISH
-def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST",
+// than ISH.
+// True if ISHST barriers will be used for Release semantics.
+def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHSTBarriers",
"true", "Prefer ISHST barriers">;
// Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU.
+// True if the AGU and NEON/FPU units are multiplexed.
def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits",
"true",
"Has muxed AGU and NEON/FPU">;
// Whether VLDM/VSTM starting with odd register number need more microops
-// than single VLDRS
-def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister",
+// than single VLDRS.
+// True if a VLDM/VSTM starting with an odd register number is considered to
+// take more microops than single VLDRS/VSTRS.
+def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "HasSlowOddRegister",
"true", "VLDM/VSTM starting "
"with an odd register is slow">;
// Some targets have a renaming dependency when loading into D subregisters.
+// True if loading into a D subregister will be penalized.
def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg",
- "SlowLoadDSubregister", "true",
+ "HasSlowLoadDSubregister", "true",
"Loading into D subregs is slow">;
+// True if use a wider stride when allocating VFP registers.
def FeatureUseWideStrideVFP : SubtargetFeature<"wide-stride-vfp",
"UseWideStrideVFP", "true",
"Use a wide stride when allocating VFP registers">;
// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD.
+// True if VMOVS will never be widened to VMOVD.
def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs",
"DontWidenVMOVS", "true",
"Don't widen VMOVS to VMOVD">;
// Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different
// VFP register widths.
+// True if splat a register between VFP and NEON instructions.
def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon",
- "SplatVFPToNeon", "true",
+ "UseSplatVFPToNeon", "true",
"Splat register from VFP to NEON",
[FeatureDontWidenVMOVS]>;
// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions.
+// True if run the MLx expansion pass.
def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx",
"ExpandMLx", "true",
"Expand VFP/NEON MLA/MLS instructions">;
// Some targets have special RAW hazards for VFP/NEON VMLA/VMLS.
+// True if VFP/NEON VMLA/VMLS have special RAW hazards.
def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards",
"true", "Has VMLx hazards">;
// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from
// VFP to NEON, as an execution domain optimization.
+// True if VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON.
def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs",
"UseNEONForFPMovs", "true",
"Convert VMOVSR, VMOVRS, "
@@ -281,18 +330,21 @@ def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs",
// Some processors benefit from using NEON instructions for scalar
// single-precision FP operations. This affects instruction selection and should
// only be enabled if the handling of denormals is not important.
+// Use the method useNEONForSinglePrecisionFP() to determine if NEON should actually be used.
def FeatureNEONForFP : SubtargetFeature<"neonfp",
- "UseNEONForSinglePrecisionFP",
+ "HasNEONForFP",
"true",
"Use NEON for single precision FP">;
// On some processors, VLDn instructions that access unaligned data take one
// extra cycle. Take that into account when computing operand latencies.
-def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign",
+// True if VLDn instructions take an extra cycle for unaligned accesses.
+def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAccessAlignment",
"true",
"Check for VLDn unaligned access">;
// Some processors have a nonpipelined VFP coprocessor.
+// True if VFP instructions are not pipelined.
def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp",
"NonpipelinedVFP", "true",
"VFP instructions are not pipelined">;
@@ -300,20 +352,27 @@ def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp",
// Some processors have FP multiply-accumulate instructions that don't
// play nicely with other VFP / NEON instructions, and it's generally better
// to just not use them.
+// If the VFP2 / NEON instructions are available, indicates
+// whether the FP VML[AS] instructions are slow (if so, don't use them).
def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
"Disable VFP / NEON MAC instructions">;
-// VFPv4 added VFMA instructions that can similar be fast or slow.
+// VFPv4 added VFMA instructions that can similarly be fast or slow.
+// If the VFP4 / NEON instructions are available, indicates
+// whether the FP VFM[AS] instructions are slow (if so, don't use them).
def FeatureHasSlowFPVFMx : SubtargetFeature<"slowfpvfmx", "SlowFPVFMx", "true",
"Disable VFP / NEON FMA instructions">;
// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding.
+/// True if NEON has special multiplier accumulator
+/// forwarding to allow mul + mla being issued back to back.
def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
"HasVMLxForwarding", "true",
"Has multiplier accumulator forwarding">;
// Disable 32-bit to 16-bit narrowing for experimentation.
-def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
+// True if codegen would prefer 32-bit Thumb instructions over 16-bit ones.
+def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Prefers32BitThumb", "true",
"Prefer 32-bit Thumb instrs">;
def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopLogAlignment","2",
@@ -332,17 +391,22 @@ def FeatureMVEVectorCostFactor4 : SubtargetFeature<"mve4beat", "MVEVectorCostFac
/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is
/// mapped to a separate physical register. Avoid partial CPSR update for these
/// processors.
+/// True if codegen would avoid using instructions
+/// that partially update CPSR and add false dependency on the previous
+/// CPSR setting instruction.
def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
"AvoidCPSRPartialUpdate", "true",
"Avoid CPSR partial update for OOO execution">;
/// Disable +1 predication cost for instructions updating CPSR.
/// Enabled for Cortex-A57.
+/// True if disable +1 predication cost for instructions updating CPSR. Enabled for Cortex-A57.
def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr",
"CheapPredicableCPSRDef",
"true",
"Disable +1 predication cost for instructions updating CPSR">;
+// True if codegen should avoid using flag setting movs with shifter operand (i.e. asr, lsl, lsr).
def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
"AvoidMOVsShifterOperand", "true",
"Avoid movs instructions with "
@@ -357,16 +421,20 @@ def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack",
// Some processors have no branch predictor, which changes the expected cost of
// taking a branch which affects the choice of whether to use predicated
// instructions.
+// True if the subtarget has a branch predictor. Having
+// a branch predictor or not changes the expected cost of taking a branch
+// which affects the choice of whether to use predicated instructions.
def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor",
"HasBranchPredictor", "false",
"Has no branch predictor">;
/// DSP extension.
+/// True if the subtarget supports the DSP (saturating arith and such) instructions.
def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true",
"Supports DSP instructions in "
"ARM and/or Thumb2">;
-// Multiprocessing extension.
+// True if the subtarget supports Multiprocessing extension (ARMv7 only).
def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
"Supports Multiprocessing extension">;
@@ -378,31 +446,42 @@ def FeatureVirtualization : SubtargetFeature<"virtualization",
// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too.
// See ARMInstrInfo.td for details.
+// True if NaCl TRAP instruction is generated instead of the regular TRAP.
def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
"NaCl trap">;
+// True if the subtarget disallows unaligned memory
+// accesses for some types. For details, see
+// ARMTargetLowering::allowsMisalignedMemoryAccesses().
def FeatureStrictAlign : SubtargetFeature<"strict-align",
"StrictAlign", "true",
"Disallow all unaligned memory "
"access">;
+// Generate calls via indirect call instructions.
def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
"Generate calls via indirect call "
"instructions">;
+// Generate code that does not contain data access to code sections.
def FeatureExecuteOnly : SubtargetFeature<"execute-only",
"GenExecuteOnly", "true",
"Enable the generation of "
"execute only code.">;
+// True if R9 is not available as a general purpose register.
def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true",
"Reserve R9, making it unavailable"
" as GPR">;
+// True if MOVT / MOVW pairs are not used for materialization of
+// 32-bit imms (including global addresses).
def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true",
"Don't use movt/movw pairs for "
"32-bit imms">;
+/// Implicitly convert an instruction to a different one if its immediates
+/// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
def FeatureNoNegativeImmediates
: SubtargetFeature<"no-neg-immediates",
"NegativeImmediates", "false",
@@ -415,28 +494,39 @@ def FeatureNoNegativeImmediates
def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true",
"Use the MachineScheduler">;
+// Use the MachinePipeliner for instruction scheduling for the subtarget.
+def FeatureUseMIPipeliner: SubtargetFeature<"use-mipipeliner", "UseMIPipeliner", "true",
+ "Use the MachinePipeliner">;
+
+// False if scheduling should happen again after register allocation.
def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler",
"DisablePostRAScheduler", "true",
"Don't schedule again after register allocation">;
// Armv8.5-A extensions
+// Has speculation barrier.
def FeatureSB : SubtargetFeature<"sb", "HasSB", "true",
"Enable v8.5a Speculation Barrier" >;
// Armv8.6-A extensions
+
+// True if subtarget supports BFloat16 floating point operations.
def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", "true",
"Enable support for BFloat16 instructions", [FeatureNEON]>;
+// True if subtarget supports 8-bit integer matrix multiply.
def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8",
"true", "Enable Matrix Multiply Int8 Extension", [FeatureNEON]>;
// Armv8.1-M extensions
+// True if the processor supports the Low Overhead Branch extension.
def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true",
"Enable Low Overhead Branch "
"extensions">;
+// Mitigate against the cve-2021-35465 security vulnurability.
def FeatureFixCMSE_CVE_2021_35465 : SubtargetFeature<"fix-cmse-cve-2021-35465",
"FixCMSE_CVE_2021_35465", "true",
"Mitigate against the cve-2021-35465 "
@@ -446,11 +536,26 @@ def FeaturePACBTI : SubtargetFeature<"pacbti", "HasPACBTI", "true",
"Enable Pointer Authentication and Branch "
"Target Identification">;
+/// Don't place a BTI instruction after return-twice constructs (setjmp).
def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice",
"NoBTIAtReturnTwice", "true",
"Don't place a BTI instruction "
"after a return-twice">;
+def FeatureFixCortexA57AES1742098 : SubtargetFeature<"fix-cortex-a57-aes-1742098",
+ "FixCortexA57AES1742098", "true",
+ "Work around Cortex-A57 Erratum 1742098 / Cortex-A72 Erratum 1655431 (AES)">;
+
+def FeatureAAPCSFrameChain : SubtargetFeature<"aapcs-frame-chain",
+ "CreateAAPCSFrameChain", "true",
+ "Create an AAPCS compliant frame chain">;
+
+def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf",
+ "CreateAAPCSFrameChainLeaf", "true",
+ "Create an AAPCS compliant frame chain "
+ "for leaf functions",
+ [FeatureAAPCSFrameChain]>;
+
//===----------------------------------------------------------------------===//
// ARM architecture class
//
@@ -467,16 +572,18 @@ def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass",
def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass",
"Is microcontroller profile ('M' series)">;
-
+// True if Thumb2 instructions are supported.
def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
"Enable Thumb2 instructions">;
+// True if subtarget does not support ARM mode execution.
def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
"Does not support ARM mode execution">;
//===----------------------------------------------------------------------===//
// ARM ISAa.
//
+// Specify whether target support specific ARM ISA variants.
def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
"Support ARM v4T instructions">;
@@ -599,13 +706,16 @@ foreach i = {0-7} in
// Control codegen mitigation against Straight Line Speculation vulnerability.
//===----------------------------------------------------------------------===//
+/// Harden against Straight Line Speculation for Returns and Indirect Branches.
def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr",
"HardenSlsRetBr", "true",
"Harden against straight line speculation across RETurn and BranchRegister "
"instructions">;
+/// Harden against Straight Line Speculation for indirect calls.
def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr",
"HardenSlsBlr", "true",
"Harden against straight line speculation across indirect calls">;
+/// Generate thunk code for SLS mitigation in the normal text section.
def FeatureHardenSlsNoComdat : SubtargetFeature<"harden-sls-nocomdat",
"HardenSlsNoComdat", "true",
"Generate thunk code for SLS mitigation in the normal text section">;
@@ -1303,6 +1413,7 @@ def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em,
def : ProcessorModel<"cortex-m7", CortexM7Model, [ARMv7em,
ProcM7,
FeatureFPARMv8_D16,
+ FeatureUseMIPipeliner,
FeatureUseMISched]>;
def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
@@ -1370,13 +1481,15 @@ def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57,
FeatureCRC,
FeatureFPAO,
FeatureAvoidPartialCPSR,
- FeatureCheapPredicableCPSR]>;
+ FeatureCheapPredicableCPSR,
+ FeatureFixCortexA57AES1742098]>;
def : ProcessorModel<"cortex-a72", CortexA57Model, [ARMv8a, ProcA72,
FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
- FeatureCRC]>;
+ FeatureCRC,
+ FeatureFixCortexA57AES1742098]>;
def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73,
FeatureHWDivThumb,
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index fa09b2567aa9..4aa28bc5d28d 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -161,10 +161,10 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
: COFF::IMAGE_SYM_CLASS_EXTERNAL;
int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
- OutStreamer->BeginCOFFSymbolDef(CurrentFnSym);
- OutStreamer->EmitCOFFSymbolStorageClass(Scl);
- OutStreamer->EmitCOFFSymbolType(Type);
- OutStreamer->EndCOFFSymbolDef();
+ OutStreamer->beginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer->emitCOFFSymbolStorageClass(Scl);
+ OutStreamer->emitCOFFSymbolType(Type);
+ OutStreamer->endCOFFSymbolDef();
}
// Emit the rest of the function body.
@@ -535,27 +535,27 @@ void ARMAsmPrinter::emitEndOfAsmFile(Module &M) {
if (!Stubs.empty()) {
// Switch with ".non_lazy_symbol_pointer" directive.
- OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ OutStreamer->switchSection(TLOFMacho.getNonLazySymbolPointerSection());
emitAlignment(Align(4));
for (auto &Stub : Stubs)
emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second);
Stubs.clear();
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
Stubs = MMIMacho.GetThreadLocalGVStubList();
if (!Stubs.empty()) {
// Switch with ".non_lazy_symbol_pointer" directive.
- OutStreamer->SwitchSection(TLOFMacho.getThreadLocalPointerSection());
+ OutStreamer->switchSection(TLOFMacho.getThreadLocalPointerSection());
emitAlignment(Align(4));
for (auto &Stub : Stubs)
emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second);
Stubs.clear();
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
// Funny Darwin hack: This flag tells the linker that no global symbols
@@ -740,55 +740,53 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format,
ARMBuildAttrs::FP16FormatIEEE);
- if (MMI) {
- if (const Module *SourceModule = MMI->getModule()) {
- // ABI_PCS_wchar_t to indicate wchar_t width
- // FIXME: There is no way to emit value 0 (wchar_t prohibited).
- if (auto WCharWidthValue = mdconst::extract_or_null<ConstantInt>(
- SourceModule->getModuleFlag("wchar_size"))) {
- int WCharWidth = WCharWidthValue->getZExtValue();
- assert((WCharWidth == 2 || WCharWidth == 4) &&
- "wchar_t width must be 2 or 4 bytes");
- ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth);
- }
+ if (const Module *SourceModule = MMI->getModule()) {
+ // ABI_PCS_wchar_t to indicate wchar_t width
+ // FIXME: There is no way to emit value 0 (wchar_t prohibited).
+ if (auto WCharWidthValue = mdconst::extract_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("wchar_size"))) {
+ int WCharWidth = WCharWidthValue->getZExtValue();
+ assert((WCharWidth == 2 || WCharWidth == 4) &&
+ "wchar_t width must be 2 or 4 bytes");
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth);
+ }
- // ABI_enum_size to indicate enum width
- // FIXME: There is no way to emit value 0 (enums prohibited) or value 3
- // (all enums contain a value needing 32 bits to encode).
- if (auto EnumWidthValue = mdconst::extract_or_null<ConstantInt>(
- SourceModule->getModuleFlag("min_enum_size"))) {
- int EnumWidth = EnumWidthValue->getZExtValue();
- assert((EnumWidth == 1 || EnumWidth == 4) &&
- "Minimum enum width must be 1 or 4 bytes");
- int EnumBuildAttr = EnumWidth == 1 ? 1 : 2;
- ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr);
- }
+ // ABI_enum_size to indicate enum width
+ // FIXME: There is no way to emit value 0 (enums prohibited) or value 3
+ // (all enums contain a value needing 32 bits to encode).
+ if (auto EnumWidthValue = mdconst::extract_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("min_enum_size"))) {
+ int EnumWidth = EnumWidthValue->getZExtValue();
+ assert((EnumWidth == 1 || EnumWidth == 4) &&
+ "Minimum enum width must be 1 or 4 bytes");
+ int EnumBuildAttr = EnumWidth == 1 ? 1 : 2;
+ ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr);
+ }
- auto *PACValue = mdconst::extract_or_null<ConstantInt>(
- SourceModule->getModuleFlag("sign-return-address"));
- if (PACValue && PACValue->getZExtValue() == 1) {
- // If "+pacbti" is used as an architecture extension,
- // Tag_PAC_extension is emitted in
- // ARMTargetStreamer::emitTargetAttributes().
- if (!STI.hasPACBTI()) {
- ATS.emitAttribute(ARMBuildAttrs::PAC_extension,
- ARMBuildAttrs::AllowPACInNOPSpace);
- }
- ATS.emitAttribute(ARMBuildAttrs::PACRET_use, ARMBuildAttrs::PACRETUsed);
+ auto *PACValue = mdconst::extract_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("sign-return-address"));
+ if (PACValue && PACValue->getZExtValue() == 1) {
+ // If "+pacbti" is used as an architecture extension,
+ // Tag_PAC_extension is emitted in
+ // ARMTargetStreamer::emitTargetAttributes().
+ if (!STI.hasPACBTI()) {
+ ATS.emitAttribute(ARMBuildAttrs::PAC_extension,
+ ARMBuildAttrs::AllowPACInNOPSpace);
}
+ ATS.emitAttribute(ARMBuildAttrs::PACRET_use, ARMBuildAttrs::PACRETUsed);
+ }
- auto *BTIValue = mdconst::extract_or_null<ConstantInt>(
- SourceModule->getModuleFlag("branch-target-enforcement"));
- if (BTIValue && BTIValue->getZExtValue() == 1) {
- // If "+pacbti" is used as an architecture extension,
- // Tag_BTI_extension is emitted in
- // ARMTargetStreamer::emitTargetAttributes().
- if (!STI.hasPACBTI()) {
- ATS.emitAttribute(ARMBuildAttrs::BTI_extension,
- ARMBuildAttrs::AllowBTIInNOPSpace);
- }
- ATS.emitAttribute(ARMBuildAttrs::BTI_use, ARMBuildAttrs::BTIUsed);
+ auto *BTIValue = mdconst::extract_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("branch-target-enforcement"));
+ if (BTIValue && BTIValue->getZExtValue() == 1) {
+ // If "+pacbti" is used as an architecture extension,
+ // Tag_BTI_extension is emitted in
+ // ARMTargetStreamer::emitTargetAttributes().
+ if (!STI.hasPACBTI()) {
+ ATS.emitAttribute(ARMBuildAttrs::BTI_extension,
+ ARMBuildAttrs::AllowBTIInNOPSpace);
}
+ ATS.emitAttribute(ARMBuildAttrs::BTI_use, ARMBuildAttrs::BTIUsed);
}
}
@@ -2276,6 +2274,47 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInstSB);
return;
}
+
+ case ARM::SEH_StackAlloc:
+ ATS.emitARMWinCFIAllocStack(MI->getOperand(0).getImm(),
+ MI->getOperand(1).getImm());
+ return;
+
+ case ARM::SEH_SaveRegs:
+ case ARM::SEH_SaveRegs_Ret:
+ ATS.emitARMWinCFISaveRegMask(MI->getOperand(0).getImm(),
+ MI->getOperand(1).getImm());
+ return;
+
+ case ARM::SEH_SaveSP:
+ ATS.emitARMWinCFISaveSP(MI->getOperand(0).getImm());
+ return;
+
+ case ARM::SEH_SaveFRegs:
+ ATS.emitARMWinCFISaveFRegs(MI->getOperand(0).getImm(),
+ MI->getOperand(1).getImm());
+ return;
+
+ case ARM::SEH_SaveLR:
+ ATS.emitARMWinCFISaveLR(MI->getOperand(0).getImm());
+ return;
+
+ case ARM::SEH_Nop:
+ case ARM::SEH_Nop_Ret:
+ ATS.emitARMWinCFINop(MI->getOperand(0).getImm());
+ return;
+
+ case ARM::SEH_PrologEnd:
+ ATS.emitARMWinCFIPrologEnd(/*Fragment=*/false);
+ return;
+
+ case ARM::SEH_EpilogStart:
+ ATS.emitARMWinCFIEpilogStart(ARMCC::AL);
+ return;
+
+ case ARM::SEH_EpilogEnd:
+ ATS.emitARMWinCFIEpilogEnd();
+ return;
}
MCInst TmpInst;
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 5b0bae4d9274..80ba7b5f0d2e 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -343,6 +343,13 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
}
// Branch analysis.
+// Cond vector output format:
+// 0 elements indicates an unconditional branch
+// 2 elements indicates a conditional branch; the elements are
+// the condition to check and the CPSR.
+// 3 elements indicates a hardware loop end; the elements
+// are the opcode, the operand value to test, and a dummy
+// operand used to pad out to 3 operands.
bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
@@ -394,6 +401,17 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
} else if (I->isReturn()) {
// Returns can't be analyzed, but we should run cleanup.
CantAnalyze = true;
+ } else if (I->getOpcode() == ARM::t2LoopEnd &&
+ MBB.getParent()
+ ->getSubtarget<ARMSubtarget>()
+ .enableMachinePipeliner()) {
+ if (!Cond.empty())
+ return true;
+ FBB = TBB;
+ TBB = I->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
+ Cond.push_back(I->getOperand(0));
+ Cond.push_back(MachineOperand::CreateImm(0));
} else {
// We encountered other unrecognized terminator. Bail out immediately.
return true;
@@ -457,7 +475,7 @@ unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
return 0;
if (!isUncondBranchOpcode(I->getOpcode()) &&
- !isCondBranchOpcode(I->getOpcode()))
+ !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
return 0;
// Remove the branch.
@@ -467,7 +485,7 @@ unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
if (I == MBB.begin()) return 1;
--I;
- if (!isCondBranchOpcode(I->getOpcode()))
+ if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
return 1;
// Remove the branch.
@@ -491,8 +509,8 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
// Shouldn't be a fall through.
assert(TBB && "insertBranch must not be told to insert a fallthrough");
- assert((Cond.size() == 2 || Cond.size() == 0) &&
- "ARM branch conditions have two components!");
+ assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
+ "ARM branch conditions have two or three components!");
// For conditional branches, we use addOperand to preserve CPSR flags.
@@ -502,19 +520,24 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
else
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
- } else
+ } else if (Cond.size() == 2) {
BuildMI(&MBB, DL, get(BccOpc))
.addMBB(TBB)
.addImm(Cond[0].getImm())
.add(Cond[1]);
+ } else
+ BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
return 1;
}
// Two-way conditional branch.
- BuildMI(&MBB, DL, get(BccOpc))
- .addMBB(TBB)
- .addImm(Cond[0].getImm())
- .add(Cond[1]);
+ if (Cond.size() == 2)
+ BuildMI(&MBB, DL, get(BccOpc))
+ .addMBB(TBB)
+ .addImm(Cond[0].getImm())
+ .add(Cond[1]);
+ else if (Cond.size() == 3)
+ BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
if (isThumb)
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
else
@@ -524,9 +547,12 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
bool ARMBaseInstrInfo::
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
- ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
- Cond[0].setImm(ARMCC::getOppositeCondition(CC));
- return false;
+ if (Cond.size() == 2) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+ Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+ return false;
+ }
+ return true;
}
bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const {
@@ -556,7 +582,7 @@ std::string ARMBaseInstrInfo::createMIROperandComment(
return GenericComment;
// If not, check if we have an immediate operand.
- if (Op.getType() != MachineOperand::MO_Immediate)
+ if (!Op.isImm())
return std::string();
// And print its corresponding condition code if the immediate is a
@@ -1703,7 +1729,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// or some other super-register.
int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
if (ImpDefIdx != -1)
- MI.RemoveOperand(ImpDefIdx);
+ MI.removeOperand(ImpDefIdx);
// Change the opcode and operands.
MI.setDesc(get(ARM::VMOVD));
@@ -2045,6 +2071,9 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
return true;
+ if (isSEHInstruction(MI))
+ return true;
+
// Treat the start of the IT block as a scheduling boundary, but schedule
// t2IT along with all instructions following it.
// FIXME: This is a big hammer. But the alternative is to add all potential
@@ -2598,7 +2627,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
// ahead: strip all existing registers off and add them back again
// in the right order.
for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
- MI->RemoveOperand(i);
+ MI->removeOperand(i);
// Add the complete list back in.
MachineInstrBuilder MIB(MF, &*MI);
@@ -2626,7 +2655,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Turn it into a move.
MI.setDesc(TII.get(ARM::MOVr));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- MI.RemoveOperand(FrameRegIdx+1);
+ MI.removeOperand(FrameRegIdx+1);
Offset = 0;
return true;
} else if (Offset < 0) {
@@ -5103,7 +5132,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
SrcReg = MI.getOperand(1).getReg();
for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
- MI.RemoveOperand(i - 1);
+ MI.removeOperand(i - 1);
// Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
MI.setDesc(get(ARM::VORRd));
@@ -5122,7 +5151,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
SrcReg = MI.getOperand(1).getReg();
for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
- MI.RemoveOperand(i - 1);
+ MI.removeOperand(i - 1);
DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
@@ -5155,7 +5184,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
break;
for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
- MI.RemoveOperand(i - 1);
+ MI.removeOperand(i - 1);
// Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
// Again DDst may be undefined at the beginning of this instruction.
@@ -5190,7 +5219,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
break;
for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
- MI.RemoveOperand(i - 1);
+ MI.removeOperand(i - 1);
if (DSrc == DDst) {
// Destination can be:
@@ -5766,26 +5795,25 @@ struct OutlinerCosts {
SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
};
-unsigned
-ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
- assert(C.LRUWasSet && "LRU wasn't set?");
+Register
+ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
MachineFunction *MF = C.getMF();
- const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo *>(
- MF->getSubtarget().getRegisterInfo());
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ const ARMBaseRegisterInfo *ARI =
+ static_cast<const ARMBaseRegisterInfo *>(&TRI);
BitVector regsReserved = ARI->getReservedRegs(*MF);
// Check if there is an available register across the sequence that we can
// use.
- for (unsigned Reg : ARM::rGPRRegClass) {
+ for (Register Reg : ARM::rGPRRegClass) {
if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
Reg != ARM::LR && // LR is not reserved, but don't use it.
Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
- C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
+ C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
+ C.isAvailableInsideSeq(Reg, TRI))
return Reg;
}
-
- // No suitable register. Return 0.
- return 0u;
+ return Register();
}
// Compute liveness of LR at the point after the interval [I, E), which
@@ -5833,9 +5861,8 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
// Compute liveness information for each candidate, and set FlagsSetInAll.
const TargetRegisterInfo &TRI = getRegisterInfo();
- std::for_each(
- RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
- [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; });
+ for (outliner::Candidate &C : RepeatedSequenceLocs)
+ FlagsSetInAll &= C.Flags;
// According to the ARM Procedure Call Standard, the following are
// undefined on entry/exit from a function call:
@@ -5854,9 +5881,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
// to compute liveness here.
if (C.Flags & UnsafeRegsDead)
return false;
- C.initLRU(TRI);
- LiveRegUnits LRU = C.LRU;
- return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR));
+ return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);
};
// Are there any candidates where those registers are live?
@@ -5969,7 +5994,6 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
for (outliner::Candidate &C : RepeatedSequenceLocs) {
- C.initLRU(TRI);
// LR liveness is overestimated in return blocks, unless they end with a
// tail call.
const auto Last = C.getMBB()->rbegin();
@@ -5977,7 +6001,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
C.getMBB()->isReturnBlock() && !Last->isCall()
? isLRAvailable(TRI, Last,
(MachineBasicBlock::reverse_iterator)C.front())
- : C.LRU.available(ARM::LR);
+ : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);
if (LRIsAvailable) {
FrameID = MachineOutlinerNoLRSave;
NumBytesNoStackCalls += Costs.CallNoLRSave;
@@ -5996,7 +6020,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
// Is SP used in the sequence at all? If not, we don't have to modify
// the stack, so we are guaranteed to get the same frame.
- else if (C.UsedInSequence.available(ARM::SP)) {
+ else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {
NumBytesNoStackCalls += Costs.CallDefault;
C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
CandidatesWithoutStackFixups.push_back(C);
@@ -6189,8 +6213,8 @@ bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
LiveRegUnits LRU(getRegisterInfo());
- std::for_each(MBB.rbegin(), MBB.rend(),
- [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
+ for (MachineInstr &MI : llvm::reverse(MBB))
+ LRU.accumulate(MI);
// Check if each of the unsafe registers are available...
bool R12AvailableInBlock = LRU.available(ARM::R12);
@@ -6635,7 +6659,7 @@ void ARMBaseInstrInfo::buildOutlinedFrame(
MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
- MachineFunction &MF, const outliner::Candidate &C) const {
+ MachineFunction &MF, outliner::Candidate &C) const {
MachineInstrBuilder MIB;
MachineBasicBlock::iterator CallPt;
unsigned Opc;
@@ -6726,3 +6750,122 @@ unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) {
: ARM::BLX_pred;
}
+namespace {
+class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
+ MachineInstr *EndLoop, *LoopCount;
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+
+ // Meanings of the various stuff with loop types:
+ // t2Bcc:
+ // EndLoop = branch at end of original BB that will become a kernel
+ // LoopCount = CC setter live into branch
+ // t2LoopEnd:
+ // EndLoop = branch at end of original BB
+ // LoopCount = t2LoopDec
+public:
+ ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)
+ : EndLoop(EndLoop), LoopCount(LoopCount),
+ MF(EndLoop->getParent()->getParent()),
+ TII(MF->getSubtarget().getInstrInfo()) {}
+
+ bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
+ // Only ignore the terminator.
+ return MI == EndLoop || MI == LoopCount;
+ }
+
+ Optional<bool> createTripCountGreaterCondition(
+ int TC, MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineOperand> &Cond) override {
+
+ if (isCondBranchOpcode(EndLoop->getOpcode())) {
+ Cond.push_back(EndLoop->getOperand(1));
+ Cond.push_back(EndLoop->getOperand(2));
+ if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
+ TII->reverseBranchCondition(Cond);
+ }
+ return {};
+ } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
+ // General case just lets the unrolled t2LoopDec do the subtraction and
+ // therefore just needs to check if zero has been reached.
+ MachineInstr *LoopDec = nullptr;
+ for (auto &I : MBB.instrs())
+ if (I.getOpcode() == ARM::t2LoopDec)
+ LoopDec = &I;
+ assert(LoopDec && "Unable to find copied LoopDec");
+ // Check if we're done with the loop.
+ BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))
+ .addReg(LoopDec->getOperand(0).getReg())
+ .addImm(0)
+ .addImm(ARMCC::AL)
+ .addReg(ARM::NoRegister);
+ Cond.push_back(MachineOperand::CreateImm(ARMCC::EQ));
+ Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
+ return {};
+ } else
+ llvm_unreachable("Unknown EndLoop");
+ }
+
+ void setPreheader(MachineBasicBlock *NewPreheader) override {}
+
+ void adjustTripCount(int TripCountAdjust) override {}
+
+ void disposed() override {}
+};
+} // namespace
+
+std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
+ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
+ MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
+ MachineBasicBlock *Preheader = *LoopBB->pred_begin();
+ if (Preheader == LoopBB)
+ Preheader = *std::next(LoopBB->pred_begin());
+
+ if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
+ // If the branch is a Bcc, then the CPSR should be set somewhere within the
+ // block. We need to determine the reaching definition of CPSR so that
+ // it can be marked as non-pipelineable, allowing the pipeliner to force
+ // it into stage 0 or give up if it cannot or will not do so.
+ MachineInstr *CCSetter = nullptr;
+ for (auto &L : LoopBB->instrs()) {
+ if (L.isCall())
+ return nullptr;
+ if (isCPSRDefined(L))
+ CCSetter = &L;
+ }
+ if (CCSetter)
+ return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter);
+ else
+ return nullptr; // Unable to find the CC setter, so unable to guarantee
+ // that pipeline will work
+ }
+
+ // Recognize:
+ // preheader:
+ // %1 = t2DoopLoopStart %0
+ // loop:
+ // %2 = phi %1, <not loop>, %..., %loop
+ // %3 = t2LoopDec %2, <imm>
+ // t2LoopEnd %3, %loop
+
+ if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
+ for (auto &L : LoopBB->instrs())
+ if (L.isCall())
+ return nullptr;
+ else if (isVCTP(&L))
+ return nullptr;
+ Register LoopDecResult = I->getOperand(0).getReg();
+ MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
+ MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult);
+ if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
+ return nullptr;
+ MachineInstr *LoopStart = nullptr;
+ for (auto &J : Preheader->instrs())
+ if (J.getOpcode() == ARM::t2DoLoopStart)
+ LoopStart = &J;
+ if (!LoopStart)
+ return nullptr;
+ return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec);
+ }
+ return nullptr;
+}
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index defce07dd862..3b8f3403e3c3 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -360,7 +360,7 @@ public:
MachineBasicBlock::iterator
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It, MachineFunction &MF,
- const outliner::Candidate &C) const override;
+ outliner::Candidate &C) const override;
/// Enable outlining by default at -Oz.
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
@@ -372,10 +372,15 @@ public:
MI->getOpcode() == ARM::t2WhileLoopStartTP;
}
+ /// Analyze loop L, which must be a single-basic-block loop, and if the
+ /// conditions can be understood enough produce a PipelinerLoopInfo object.
+ std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
+ analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
+
private:
/// Returns an unused general-purpose register which can be used for
/// constructing an outlined call if one exists. Returns 0 otherwise.
- unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
+ Register findRegisterToSaveLRTo(outliner::Candidate &C) const;
/// Adds an instruction which saves the link register on top of the stack into
/// the MachineBasicBlock \p MBB at position \p It. If \p Auth is true,
@@ -752,6 +757,26 @@ static inline bool isValidCoprocessorNumber(unsigned Num,
return true;
}
+static inline bool isSEHInstruction(const MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case ARM::SEH_StackAlloc:
+ case ARM::SEH_SaveRegs:
+ case ARM::SEH_SaveRegs_Ret:
+ case ARM::SEH_SaveSP:
+ case ARM::SEH_SaveFRegs:
+ case ARM::SEH_SaveLR:
+ case ARM::SEH_Nop:
+ case ARM::SEH_Nop_Ret:
+ case ARM::SEH_PrologEnd:
+ case ARM::SEH_EpilogStart:
+ case ARM::SEH_EpilogEnd:
+ return true;
+ default:
+ return false;
+ }
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index c543d02ff75a..1d0e743b94db 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -63,28 +63,26 @@ const MCPhysReg*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
bool UseSplitPush = STI.splitFramePushPop(*MF);
- const MCPhysReg *RegList =
- STI.isTargetDarwin()
- ? CSR_iOS_SaveList
- : (UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList);
-
const Function &F = MF->getFunction();
+
if (F.getCallingConv() == CallingConv::GHC) {
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
return CSR_NoRegs_SaveList;
+ } else if (STI.splitFramePointerPush(*MF)) {
+ return CSR_Win_SplitFP_SaveList;
} else if (F.getCallingConv() == CallingConv::CFGuard_Check) {
return CSR_Win_AAPCS_CFGuard_Check_SaveList;
} else if (F.getCallingConv() == CallingConv::SwiftTail) {
return STI.isTargetDarwin()
? CSR_iOS_SwiftTail_SaveList
- : (UseSplitPush ? CSR_AAPCS_SplitPush_SwiftTail_SaveList
+ : (UseSplitPush ? CSR_ATPCS_SplitPush_SwiftTail_SaveList
: CSR_AAPCS_SwiftTail_SaveList);
} else if (F.hasFnAttribute("interrupt")) {
if (STI.isMClass()) {
// M-class CPUs have hardware which saves the registers needed to allow a
// function conforming to the AAPCS to function as a handler.
- return UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList;
+ return UseSplitPush ? CSR_ATPCS_SplitPush_SaveList : CSR_AAPCS_SaveList;
} else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
// Fast interrupt mode gives the handler a private copy of R8-R14, so less
// need to be saved to restore user-mode state.
@@ -101,7 +99,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (STI.isTargetDarwin())
return CSR_iOS_SwiftError_SaveList;
- return UseSplitPush ? CSR_AAPCS_SplitPush_SwiftError_SaveList :
+ return UseSplitPush ? CSR_ATPCS_SplitPush_SwiftError_SaveList :
CSR_AAPCS_SwiftError_SaveList;
}
@@ -109,7 +107,15 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return MF->getInfo<ARMFunctionInfo>()->isSplitCSR()
? CSR_iOS_CXX_TLS_PE_SaveList
: CSR_iOS_CXX_TLS_SaveList;
- return RegList;
+
+ if (STI.isTargetDarwin())
+ return CSR_iOS_SaveList;
+
+ if (UseSplitPush)
+ return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList
+ : CSR_ATPCS_SplitPush_SaveList;
+
+ return CSR_AAPCS_SaveList;
}
const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy(
@@ -238,7 +244,7 @@ bool ARMBaseRegisterInfo::isInlineAsmReadOnlyReg(const MachineFunction &MF,
BitVector Reserved(getNumRegs());
markSuperRegs(Reserved, ARM::PC);
- if (TFI->hasFP(MF))
+ if (TFI->isFPReserved(MF))
markSuperRegs(Reserved, STI.getFramePointerReg());
if (hasBasePointer(MF))
markSuperRegs(Reserved, BasePtr);
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 57d7842c63ca..73ed300ccff4 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -43,7 +43,7 @@ namespace ARMRI {
/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
/// or a stack/pc register that we should push/pop.
-static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
+static inline bool isARMArea1Register(unsigned Reg, bool SplitFramePushPop) {
using namespace ARM;
switch (Reg) {
@@ -53,25 +53,52 @@ static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
return true;
case R8: case R9: case R10: case R11: case R12:
// For iOS we want r7 and lr to be next to each other.
- return !isIOS;
+ return !SplitFramePushPop;
default:
return false;
}
}
-static inline bool isARMArea2Register(unsigned Reg, bool isIOS) {
+static inline bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop) {
using namespace ARM;
switch (Reg) {
case R8: case R9: case R10: case R11: case R12:
// iOS has this second area.
- return isIOS;
+ return SplitFramePushPop;
default:
return false;
}
}
-static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
+static inline bool isSplitFPArea1Register(unsigned Reg,
+ bool SplitFramePushPop) {
+ using namespace ARM;
+
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ case R8: case R9: case R10: case R12:
+ case SP: case PC:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool isSplitFPArea2Register(unsigned Reg,
+ bool SplitFramePushPop) {
+ using namespace ARM;
+
+ switch (Reg) {
+ case R11: case LR:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop) {
using namespace ARM;
switch (Reg) {
@@ -214,6 +241,8 @@ public:
unsigned DefSubReg,
const TargetRegisterClass *SrcRC,
unsigned SrcSubReg) const override;
+
+ int getSEHRegNum(unsigned i) const { return getEncodingValue(i); }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
index ddbd6702e528..b2d291bbe7ff 100644
--- a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
+++ b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
@@ -16,6 +16,7 @@
#include "ARMBasicBlockInfo.h"
#include "ARMSubtarget.h"
#include "MVETailPredUtils.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -212,7 +213,7 @@ bool ARMBlockPlacement::processPostOrderLoops(MachineLoop *ML) {
bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
- const ARMSubtarget &ST = static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
if (!ST.hasLOB())
return false;
LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Running on " << MF.getName() << "\n");
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td
index a6dbe563a4ab..d14424c2deca 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -284,19 +284,32 @@ def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>;
// The order of callee-saved registers needs to match the order we actually push
// them in FrameLowering, because this order is what's used by
// PrologEpilogInserter to allocate frame index slots. So when R7 is the frame
-// pointer, we use this AAPCS alternative.
-def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
+// pointer, we use this ATPCS alternative.
+def CSR_ATPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
R11, R10, R9, R8,
(sequence "D%u", 15, 8))>;
+def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
+ (sequence "D%u", 15, 8),
+ LR, R11)>;
+
// R8 is used to pass swifterror, remove it from CSR.
-def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush,
+def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
R8)>;
// R10 is used to pass swifterror, remove it from CSR.
-def CSR_AAPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush,
+def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
R10)>;
+// When enforcing an AAPCS compliant frame chain, R11 is used as the frame
+// pointer even for Thumb targets, where split pushes are necessary.
+// This AAPCS alternative makes sure the frame index slots match the push
+// order in that case.
+def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11,
+ R7, R6, R5, R4,
+ R10, R9, R8,
+ (sequence "D%u", 15, 8))>;
+
// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
// and the pointer return value are both passed in R0 in these cases, this can
// be partially modelled by treating R0 as a callee-saved register
diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index a2a4f1f3bdfd..d77c3afd05e5 100644
--- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -396,7 +396,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
<< MCP->getConstants().size() << " CP entries, aligned to "
<< MCP->getConstantPoolAlign().value() << " bytes *****\n");
- STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget());
+ STI = &MF->getSubtarget<ARMSubtarget>();
TII = STI->getInstrInfo();
isPositionIndependentOrROPI =
STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 2f083561bbd4..613904f702f0 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -2107,6 +2108,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::TCRETURNdi:
case ARM::TCRETURNri: {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ if (MBBI->getOpcode() == ARM::SEH_EpilogEnd)
+ MBBI--;
+ if (MBBI->getOpcode() == ARM::SEH_Nop_Ret)
+ MBBI--;
assert(MBBI->isReturn() &&
"Can only insert epilog into returning blocks");
unsigned RetOpcode = MBBI->getOpcode();
@@ -2116,13 +2121,21 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Tail call return: adjust the stack pointer and jump to callee.
MBBI = MBB.getLastNonDebugInstr();
+ if (MBBI->getOpcode() == ARM::SEH_EpilogEnd)
+ MBBI--;
+ if (MBBI->getOpcode() == ARM::SEH_Nop_Ret)
+ MBBI--;
MachineOperand &JumpTarget = MBBI->getOperand(0);
// Jump to label or value in register.
if (RetOpcode == ARM::TCRETURNdi) {
+ MachineFunction *MF = MBB.getParent();
+ bool NeedsWinCFI = MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ MF->getFunction().needsUnwindTableEntry();
unsigned TCOpcode =
STI->isThumb()
- ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
+ ? ((STI->isTargetMachO() || NeedsWinCFI) ? ARM::tTAILJMPd
+ : ARM::tTAILJMPdND)
: ARM::TAILJMPd;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
if (JumpTarget.isGlobal())
@@ -3132,7 +3145,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
}
bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
- STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ STI = &MF.getSubtarget<ARMSubtarget>();
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
AFI = MF.getInfo<ARMFunctionInfo>();
diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp
index 5d94b99d4c5d..a167225e2743 100644
--- a/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -122,8 +122,7 @@ class ARMFastISel final : public FastISel {
explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
: FastISel(funcInfo, libInfo),
- Subtarget(
- &static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget())),
+ Subtarget(&funcInfo.MF->getSubtarget<ARMSubtarget>()),
M(const_cast<Module &>(*funcInfo.Fn->getParent())),
TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()),
TLI(*Subtarget->getTargetLowering()) {
@@ -156,7 +155,7 @@ class ARMFastISel final : public FastISel {
const LoadInst *LI) override;
bool fastLowerArguments() override;
- #include "ARMGenFastISel.inc"
+#include "ARMGenFastISel.inc"
// Instruction selection routines.
@@ -189,10 +188,10 @@ class ARMFastISel final : public FastISel {
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt);
bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
- unsigned Alignment = 0, bool isZExt = true,
+ MaybeAlign Alignment = None, bool isZExt = true,
bool allocReg = true);
bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
- unsigned Alignment = 0);
+ MaybeAlign Alignment = None);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
bool ARMIsMemCpySmall(uint64_t Len);
@@ -602,8 +601,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
}
if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) ||
- (Subtarget->isTargetMachO() && IsIndirect) ||
- Subtarget->genLongCalls()) {
+ (Subtarget->isTargetMachO() && IsIndirect)) {
MachineInstrBuilder MIB;
Register NewDestReg = createResultReg(TLI.getRegClassFor(VT));
if (isThumb2)
@@ -898,7 +896,8 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
}
bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
- unsigned Alignment, bool isZExt, bool allocReg) {
+ MaybeAlign Alignment, bool isZExt,
+ bool allocReg) {
unsigned Opc;
bool useAM3 = false;
bool needVMOV = false;
@@ -924,7 +923,8 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
break;
case MVT::i16:
- if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
+ if (Alignment && *Alignment < Align(2) &&
+ !Subtarget->allowsUnalignedMem())
return false;
if (isThumb2) {
@@ -939,7 +939,8 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
break;
case MVT::i32:
- if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
+ if (Alignment && *Alignment < Align(4) &&
+ !Subtarget->allowsUnalignedMem())
return false;
if (isThumb2) {
@@ -955,7 +956,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
case MVT::f32:
if (!Subtarget->hasVFP2Base()) return false;
// Unaligned loads need special handling. Floats require word-alignment.
- if (Alignment && Alignment < 4) {
+ if (Alignment && *Alignment < Align(4)) {
needVMOV = true;
VT = MVT::i32;
Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
@@ -970,7 +971,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
if (!Subtarget->hasVFP2Base()) return false;
// FIXME: Unaligned loads need special handling. Doublewords require
// word-alignment.
- if (Alignment && Alignment < 4)
+ if (Alignment && *Alignment < Align(4))
return false;
Opc = ARM::VLDRD;
@@ -1030,14 +1031,14 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
Register ResultReg;
- if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+ if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlign()))
return false;
updateValueMap(I, ResultReg);
return true;
}
bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
- unsigned Alignment) {
+ MaybeAlign Alignment) {
unsigned StrOpc;
bool useAM3 = false;
switch (VT.SimpleTy) {
@@ -1065,7 +1066,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
}
break;
case MVT::i16:
- if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
+ if (Alignment && *Alignment < Align(2) &&
+ !Subtarget->allowsUnalignedMem())
return false;
if (isThumb2) {
@@ -1079,7 +1081,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
}
break;
case MVT::i32:
- if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
+ if (Alignment && *Alignment < Align(4) &&
+ !Subtarget->allowsUnalignedMem())
return false;
if (isThumb2) {
@@ -1094,7 +1097,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
case MVT::f32:
if (!Subtarget->hasVFP2Base()) return false;
// Unaligned stores need special handling. Floats require word-alignment.
- if (Alignment && Alignment < 4) {
+ if (Alignment && *Alignment < Align(4)) {
Register MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::VMOVRS), MoveReg)
@@ -1111,8 +1114,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
if (!Subtarget->hasVFP2Base()) return false;
// FIXME: Unaligned stores need special handling. Doublewords require
// word-alignment.
- if (Alignment && Alignment < 4)
- return false;
+ if (Alignment && *Alignment < Align(4))
+ return false;
StrOpc = ARM::VSTRD;
break;
@@ -1166,7 +1169,7 @@ bool ARMFastISel::SelectStore(const Instruction *I) {
if (!ARMComputeAddress(I->getOperand(1), Addr))
return false;
- if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+ if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlign()))
return false;
return true;
}
@@ -2939,7 +2942,7 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
Register ResultReg = MI->getOperand(0).getReg();
- if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
+ if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlign(), isZExt, false))
return false;
MachineBasicBlock::iterator I(MI);
removeDeadCode(I, std::next(I));
diff --git a/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp b/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp
new file mode 100644
index 000000000000..77c8f7134a55
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp
@@ -0,0 +1,432 @@
+//===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This pass works around a Cortex Core Fused AES erratum:
+// - Cortex-A57 Erratum 1742098
+// - Cortex-A72 Erratum 1655431
+//
+// The erratum may be triggered if an input vector register to AESE or AESD was
+// last written by an instruction that only updated 32 bits of it. This can
+// occur for either of the input registers.
+//
+// The workaround chosen is to update the input register using `r = VORRq r, r`,
+// as this updates all 128 bits of the register unconditionally, but does not
+// change the values observed in `r`, making the input safe.
+//
+// This pass has to be conservative in a few cases:
+// - an input vector register to the AES instruction is defined outside the
+// current function, where we have to assume the register was updated in an
+// unsafe way; and
+// - an input vector register to the AES instruction is updated along multiple
+// different control-flow paths, where we have to ensure all the register
+// updating instructions are safe.
+//
+// Both of these cases may apply to a input vector register. In either case, we
+// need to ensure that, when the pass is finished, there exists a safe
+// instruction between every unsafe register updating instruction and the AES
+// instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "Utils/ARMBaseInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/ReachingDefAnalysis.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <assert.h>
+#include <stdint.h>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098"
+
+//===----------------------------------------------------------------------===//
+
+namespace {
+class ARMFixCortexA57AES1742098 : public MachineFunctionPass {
+public:
+ static char ID;
+ explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) {
+ initializeARMFixCortexA57AES1742098Pass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ StringRef getPassName() const override {
+ return "ARM fix for Cortex-A57 AES Erratum 1742098";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ReachingDefAnalysis>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ // This is the information needed to insert the fixup in the right place.
+ struct AESFixupLocation {
+ MachineBasicBlock *Block;
+ // The fixup instruction will be inserted *before* InsertionPt.
+ MachineInstr *InsertionPt;
+ MachineOperand *MOp;
+ };
+
+ void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA,
+ const ARMBaseRegisterInfo *TRI,
+ SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const;
+
+ void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
+ const ARMBaseRegisterInfo *TRI) const;
+
+ static bool isFirstAESPairInstr(MachineInstr &MI);
+ static bool isSafeAESInput(MachineInstr &MI);
+};
+char ARMFixCortexA57AES1742098::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE,
+ "ARM fix for Cortex-A57 AES Erratum 1742098", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis);
+INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE,
+ "ARM fix for Cortex-A57 AES Erratum 1742098", false, false)
+
+//===----------------------------------------------------------------------===//
+
+bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ return Opc == ARM::AESD || Opc == ARM::AESE;
+}
+
+bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) {
+ auto CondCodeIsAL = [](MachineInstr &MI) -> bool {
+ int CCIdx = MI.findFirstPredOperandIdx();
+ if (CCIdx == -1)
+ return false;
+ return MI.getOperand(CCIdx).getImm() == (int64_t)ARMCC::AL;
+ };
+
+ switch (MI.getOpcode()) {
+ // Unknown: Assume not safe.
+ default:
+ return false;
+ // 128-bit wide AES instructions
+ case ARM::AESD:
+ case ARM::AESE:
+ case ARM::AESMC:
+ case ARM::AESIMC:
+ // No CondCode.
+ return true;
+ // 128-bit and 64-bit wide bitwise ops (when condition = al)
+ case ARM::VANDd:
+ case ARM::VANDq:
+ case ARM::VORRd:
+ case ARM::VORRq:
+ case ARM::VEORd:
+ case ARM::VEORq:
+ case ARM::VMVNd:
+ case ARM::VMVNq:
+ // VMOV of 64-bit value between D registers (when condition = al)
+ case ARM::VMOVD:
+ // VMOV of 64 bit value from GPRs (when condition = al)
+ case ARM::VMOVDRR:
+ // VMOV of immediate into D or Q registers (when condition = al)
+ case ARM::VMOVv2i64:
+ case ARM::VMOVv1i64:
+ case ARM::VMOVv2f32:
+ case ARM::VMOVv4f32:
+ case ARM::VMOVv2i32:
+ case ARM::VMOVv4i32:
+ case ARM::VMOVv4i16:
+ case ARM::VMOVv8i16:
+ case ARM::VMOVv8i8:
+ case ARM::VMOVv16i8:
+ // Loads (when condition = al)
+ // VLD Dn, [Rn, #imm]
+ case ARM::VLDRD:
+ // VLDM
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDIA:
+ // VLDn to all lanes.
+ case ARM::VLD1d64:
+ case ARM::VLD1q64:
+ case ARM::VLD1d32:
+ case ARM::VLD1q32:
+ case ARM::VLD2b32:
+ case ARM::VLD2d32:
+ case ARM::VLD2q32:
+ case ARM::VLD1d16:
+ case ARM::VLD1q16:
+ case ARM::VLD2d16:
+ case ARM::VLD2q16:
+ case ARM::VLD1d8:
+ case ARM::VLD1q8:
+ case ARM::VLD2b8:
+ case ARM::VLD2d8:
+ case ARM::VLD2q8:
+ case ARM::VLD3d32:
+ case ARM::VLD3q32:
+ case ARM::VLD3d16:
+ case ARM::VLD3q16:
+ case ARM::VLD3d8:
+ case ARM::VLD3q8:
+ case ARM::VLD4d32:
+ case ARM::VLD4q32:
+ case ARM::VLD4d16:
+ case ARM::VLD4q16:
+ case ARM::VLD4d8:
+ case ARM::VLD4q8:
+ // VLD1 (single element to one lane)
+ case ARM::VLD1LNd32:
+ case ARM::VLD1LNd32_UPD:
+ case ARM::VLD1LNd8:
+ case ARM::VLD1LNd8_UPD:
+ case ARM::VLD1LNd16:
+ case ARM::VLD1LNd16_UPD:
+ // VLD1 (single element to all lanes)
+ case ARM::VLD1DUPd32:
+ case ARM::VLD1DUPd32wb_fixed:
+ case ARM::VLD1DUPd32wb_register:
+ case ARM::VLD1DUPd16:
+ case ARM::VLD1DUPd16wb_fixed:
+ case ARM::VLD1DUPd16wb_register:
+ case ARM::VLD1DUPd8:
+ case ARM::VLD1DUPd8wb_fixed:
+ case ARM::VLD1DUPd8wb_register:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ // VMOV
+ case ARM::VSETLNi32:
+ case ARM::VSETLNi16:
+ case ARM::VSETLNi8:
+ return CondCodeIsAL(MI);
+ };
+
+ return false;
+}
+
+bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) {
+ LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n");
+ auto &STI = F.getSubtarget<ARMSubtarget>();
+
+ // Fix not requested or AES instructions not present: skip pass.
+ if (!STI.hasAES() || !STI.fixCortexA57AES1742098())
+ return false;
+
+ const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo();
+ const ARMBaseInstrInfo *TII = STI.getInstrInfo();
+
+ auto &RDA = getAnalysis<ReachingDefAnalysis>();
+
+ // Analyze whole function to find instructions which need fixing up...
+ SmallVector<AESFixupLocation> FixupLocsForFn{};
+ analyzeMF(F, RDA, TRI, FixupLocsForFn);
+
+ // ... and fix the instructions up all at the same time.
+ bool Changed = false;
+ LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n");
+ for (AESFixupLocation &FixupLoc : FixupLocsForFn) {
+ insertAESFixup(FixupLoc, TII, TRI);
+ Changed |= true;
+ }
+
+ return Changed;
+}
+
+void ARMFixCortexA57AES1742098::analyzeMF(
+ MachineFunction &MF, ReachingDefAnalysis &RDA,
+ const ARMBaseRegisterInfo *TRI,
+ SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const {
+ unsigned MaxAllowedFixups = 0;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!isFirstAESPairInstr(MI))
+ continue;
+
+ // Found an instruction to check the operands of.
+ LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI);
+ assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 &&
+ "Unknown AES Instruction Format. Expected 1 def, 2 uses.");
+
+ // A maximum of two fixups should be inserted for each AES pair (one per
+ // register use).
+ MaxAllowedFixups += 2;
+
+ // Inspect all operands, choosing whether to insert a fixup.
+ for (MachineOperand &MOp : MI.uses()) {
+ SmallPtrSet<MachineInstr *, 1> AllDefs{};
+ RDA.getGlobalReachingDefs(&MI, MOp.getReg(), AllDefs);
+
+ // Planned Fixup: This should be added to FixupLocsForFn at most once.
+ AESFixupLocation NewLoc{&MBB, &MI, &MOp};
+
+ // In small functions with loops, this operand may be both a live-in and
+ // have definitions within the function itself. These will need a fixup.
+ bool IsLiveIn = MF.front().isLiveIn(MOp.getReg());
+
+ // If the register doesn't have defining instructions, and is not a
+ // live-in, then something is wrong and the fixup must always be
+ // inserted to be safe.
+ if (!IsLiveIn && AllDefs.size() == 0) {
+ LLVM_DEBUG(dbgs()
+ << "Fixup Planned: No Defining Instrs found, not live-in: "
+ << printReg(MOp.getReg(), TRI) << "\n");
+ FixupLocsForFn.emplace_back(NewLoc);
+ continue;
+ }
+
+ auto IsUnsafe = [](MachineInstr *MI) -> bool {
+ return !isSafeAESInput(*MI);
+ };
+ size_t UnsafeCount = llvm::count_if(AllDefs, IsUnsafe);
+
+ // If there are no unsafe definitions...
+ if (UnsafeCount == 0) {
+ // ... and the register is not live-in ...
+ if (!IsLiveIn) {
+ // ... then skip the fixup.
+ LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: "
+ << printReg(MOp.getReg(), TRI) << "\n");
+ continue;
+ }
+
+ // Otherwise, the only unsafe "definition" is a live-in, so insert the
+ // fixup at the start of the function.
+ LLVM_DEBUG(dbgs()
+ << "Fixup Planned: Live-In (with safe defining instrs): "
+ << printReg(MOp.getReg(), TRI) << "\n");
+ NewLoc.Block = &MF.front();
+ NewLoc.InsertionPt = &*NewLoc.Block->begin();
+ LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before "
+ << *NewLoc.InsertionPt);
+ FixupLocsForFn.emplace_back(NewLoc);
+ continue;
+ }
+
+ // If a fixup is needed in more than one place, then the best place to
+ // insert it is adjacent to the use rather than introducing a fixup
+ // adjacent to each def.
+ //
+ // FIXME: It might be better to hoist this to the start of the BB, if
+ // possible.
+ if (IsLiveIn || UnsafeCount > 1) {
+ LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs "
+ "(including live-ins): "
+ << printReg(MOp.getReg(), TRI) << "\n");
+ FixupLocsForFn.emplace_back(NewLoc);
+ continue;
+ }
+
+ assert(UnsafeCount == 1 && !IsLiveIn &&
+ "At this point, there should be one unsafe defining instrs "
+ "and the defined register should not be a live-in.");
+ SmallPtrSetIterator<MachineInstr *> It =
+ llvm::find_if(AllDefs, IsUnsafe);
+ assert(It != AllDefs.end() &&
+ "UnsafeCount == 1 but No Unsafe MachineInstr found.");
+ MachineInstr *DefMI = *It;
+
+ LLVM_DEBUG(
+ dbgs() << "Fixup Planned: Found single unsafe defining instrs for "
+ << printReg(MOp.getReg(), TRI) << ": " << *DefMI);
+
+ // There is one unsafe defining instruction, which needs a fixup. It is
+ // generally good to hoist the fixup to be adjacent to the defining
+ // instruction rather than the using instruction, as the using
+ // instruction may be inside a loop when the defining instruction is
+ // not.
+ MachineBasicBlock::iterator DefIt = DefMI;
+ ++DefIt;
+ if (DefIt != DefMI->getParent()->end()) {
+ LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI
+ << "And immediately before " << *DefIt);
+ NewLoc.Block = DefIt->getParent();
+ NewLoc.InsertionPt = &*DefIt;
+ }
+
+ FixupLocsForFn.emplace_back(NewLoc);
+ }
+ }
+ }
+
+ assert(FixupLocsForFn.size() <= MaxAllowedFixups &&
+ "Inserted too many fixups for this function.");
+ (void)MaxAllowedFixups;
+}
+
+void ARMFixCortexA57AES1742098::insertAESFixup(
+ AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
+ const ARMBaseRegisterInfo *TRI) const {
+ MachineOperand *OperandToFixup = FixupLoc.MOp;
+
+ assert(OperandToFixup->isReg() && "OperandToFixup must be a register");
+ Register RegToFixup = OperandToFixup->getReg();
+
+ LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI)
+ << " before: " << *FixupLoc.InsertionPt);
+
+ // Insert the new `VORRq qN, qN, qN`. There are a few details here:
+ //
+ // The uses are marked as killed, even if the original use of OperandToFixup
+ // is not killed, as the new instruction is clobbering the register. This is
+ // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op
+ // (it is inserted for microarchitectural reasons).
+ //
+ // The def and the uses are still marked as Renamable if the original register
+ // was, to avoid having to rummage through all the other uses and defs and
+ // unset their renamable bits.
+ unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0;
+ BuildMI(*FixupLoc.Block, FixupLoc.InsertionPt, DebugLoc(),
+ TII->get(ARM::VORRq))
+ .addReg(RegToFixup, RegState::Define | Renamable)
+ .addReg(RegToFixup, RegState::Kill | Renamable)
+ .addReg(RegToFixup, RegState::Kill | Renamable)
+ .addImm((uint64_t)ARMCC::AL)
+ .addReg(ARM::NoRegister);
+}
+
+// Factory function used by AArch64TargetMachine to add the pass to
+// the passmanager.
+FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() {
+ return new ARMFixCortexA57AES1742098();
+}
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 1f2f6f7497e0..48b4d266b41a 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -47,7 +47,8 @@
// | |
// |-----------------------------------|
// | |
-// | prev_fp, prev_lr |
+// | prev_lr |
+// | prev_fp |
// | (a.k.a. "frame record") |
// | |
// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
@@ -138,6 +139,7 @@
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -210,6 +212,12 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
MFI.isFrameAddressTaken());
}
+/// isFPReserved - Return true if the frame pointer register should be
+/// considered a reserved register on the scope of the specified function.
+bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const {
+ return hasFP(MF) || MF.getSubtarget<ARMSubtarget>().createAAPCSFrameChain();
+}
+
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
/// not required, we reserve argument space for call sites in the function
/// immediately on entry to the current function. This eliminates the need for
@@ -272,6 +280,230 @@ static int getArgumentStackToRestore(MachineFunction &MF,
return ArgumentPopSize;
}
+static bool needsWinCFI(const MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ F.needsUnwindTableEntry();
+}
+
+// Given a load or a store instruction, generate an appropriate unwinding SEH
+// code on Windows.
+static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo &TII,
+ unsigned Flags) {
+ unsigned Opc = MBBI->getOpcode();
+ MachineBasicBlock *MBB = MBBI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ DebugLoc DL = MBBI->getDebugLoc();
+ MachineInstrBuilder MIB;
+ const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
+ const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+
+ Flags |= MachineInstr::NoMerge;
+
+ switch (Opc) {
+ default:
+ report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
+ break;
+ case ARM::t2ADDri: // add.w r11, sp, #xx
+ case ARM::t2ADDri12: // add.w r11, sp, #xx
+ case ARM::t2MOVTi16: // movt r4, #xx
+ case ARM::tBL: // bl __chkstk
+ // These are harmless if used for just setting up a frame pointer,
+ // but that frame pointer can't be relied upon for unwinding, unless
+ // set up with SEH_SaveSP.
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
+ .addImm(/*Wide=*/1)
+ .setMIFlags(Flags);
+ break;
+
+ case ARM::t2MOVi16: { // mov(w) r4, #xx
+ bool Wide = MBBI->getOperand(1).getImm() >= 256;
+ if (!Wide) {
+ MachineInstrBuilder NewInstr =
+ BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
+ NewInstr.add(MBBI->getOperand(0));
+ NewInstr.add(t1CondCodeOp(/*isDead=*/true));
+ for (unsigned i = 1, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
+ NewInstr.add(MBBI->getOperand(i));
+ MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
+ MBB->erase(MBBI);
+ MBBI = NewMBBI;
+ }
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
+ break;
+ }
+
+ case ARM::tBLXr: // blx r12 (__chkstk)
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
+ .addImm(/*Wide=*/0)
+ .setMIFlags(Flags);
+ break;
+
+ case ARM::t2MOVi32imm: // movw+movt
+ // This pseudo instruction expands into two mov instructions. If the
+ // second operand is a symbol reference, this will stay as two wide
+ // instructions, movw+movt. If they're immediates, the first one can
+ // end up as a narrow mov though.
+ // As two SEH instructions are appended here, they won't get interleaved
+ // between the two final movw/movt instructions, but it doesn't make any
+ // practical difference.
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
+ .addImm(/*Wide=*/1)
+ .setMIFlags(Flags);
+ MBB->insertAfter(MBBI, MIB);
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
+ .addImm(/*Wide=*/1)
+ .setMIFlags(Flags);
+ break;
+
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2STMDB_UPD: {
+ unsigned Mask = 0;
+ bool Wide = false;
+ for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
+ const MachineOperand &MO = MBBI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
+ if (Reg == 15)
+ Reg = 14;
+ if (Reg >= 8 && Reg <= 13)
+ Wide = true;
+ else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
+ Wide = true;
+ Mask |= 1 << Reg;
+ }
+ if (!Wide) {
+ unsigned NewOpc;
+ switch (Opc) {
+ case ARM::t2LDMIA_RET:
+ NewOpc = ARM::tPOP_RET;
+ break;
+ case ARM::t2LDMIA_UPD:
+ NewOpc = ARM::tPOP;
+ break;
+ case ARM::t2STMDB_UPD:
+ NewOpc = ARM::tPUSH;
+ break;
+ default:
+ llvm_unreachable("");
+ }
+ MachineInstrBuilder NewInstr =
+ BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
+ for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
+ NewInstr.add(MBBI->getOperand(i));
+ MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
+ MBB->erase(MBBI);
+ MBBI = NewMBBI;
+ }
+ unsigned SEHOpc =
+ (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
+ MIB = BuildMI(MF, DL, TII.get(SEHOpc))
+ .addImm(Mask)
+ .addImm(Wide ? 1 : 0)
+ .setMIFlags(Flags);
+ break;
+ }
+ case ARM::VSTMDDB_UPD:
+ case ARM::VLDMDIA_UPD: {
+ int First = -1, Last = 0;
+ for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
+ const MachineOperand &MO = MBBI->getOperand(i);
+ unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
+ if (First == -1)
+ First = Reg;
+ Last = Reg;
+ }
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
+ .addImm(First)
+ .addImm(Last)
+ .setMIFlags(Flags);
+ break;
+ }
+ case ARM::tSUBspi:
+ case ARM::tADDspi:
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
+ .addImm(MBBI->getOperand(2).getImm() * 4)
+ .addImm(/*Wide=*/0)
+ .setMIFlags(Flags);
+ break;
+ case ARM::t2SUBspImm:
+ case ARM::t2SUBspImm12:
+ case ARM::t2ADDspImm:
+ case ARM::t2ADDspImm12:
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
+ .addImm(MBBI->getOperand(2).getImm())
+ .addImm(/*Wide=*/1)
+ .setMIFlags(Flags);
+ break;
+
+ case ARM::tMOVr:
+ if (MBBI->getOperand(1).getReg() == ARM::SP &&
+ (Flags & MachineInstr::FrameSetup)) {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
+ .addImm(Reg)
+ .setMIFlags(Flags);
+ } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
+ (Flags & MachineInstr::FrameDestroy)) {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
+ .addImm(Reg)
+ .setMIFlags(Flags);
+ } else {
+ report_fatal_error("No SEH Opcode for MOV");
+ }
+ break;
+
+ case ARM::tBX_RET:
+ case ARM::TCRETURNri:
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
+ .addImm(/*Wide=*/0)
+ .setMIFlags(Flags);
+ break;
+
+ case ARM::TCRETURNdi:
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
+ .addImm(/*Wide=*/1)
+ .setMIFlags(Flags);
+ break;
+ }
+ return MBB->insertAfter(MBBI, MIB);
+}
+
+static MachineBasicBlock::iterator
+initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {
+ if (MBBI == MBB.begin())
+ return MachineBasicBlock::iterator();
+ return std::prev(MBBI);
+}
+
+static void insertSEHRange(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator Start,
+ const MachineBasicBlock::iterator &End,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ if (Start.isValid())
+ Start = std::next(Start);
+ else
+ Start = MBB.begin();
+
+ for (auto MI = Start; MI != End;) {
+ auto Next = std::next(MI);
+ // Check if this instruction already has got a SEH opcode added. In that
+ // case, don't do this generic mapping.
+ if (Next != End && isSEHInstruction(*Next)) {
+ MI = std::next(Next);
+ while (MI != End && isSEHInstruction(*MI))
+ ++MI;
+ continue;
+ }
+ insertSEH(MI, TII, MIFlags);
+ MI = Next;
+ }
+}
+
static void emitRegPlusImmediate(
bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
@@ -392,8 +624,7 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
const DebugLoc &DL, const unsigned Reg,
const Align Alignment,
const bool MustBeSingleInstruction) {
- const ARMSubtarget &AST =
- static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
const unsigned AlignMask = Alignment.value() - 1U;
const unsigned NrBitsToZero = Log2(Alignment);
@@ -452,15 +683,23 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
/// this to produce a conservative estimate that we check in an assert() later.
-static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI) {
+static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
+ const MachineFunction &MF) {
// For Thumb1, push.w isn't available, so the first push will always push
// r7 and lr onto the stack first.
if (AFI.isThumb1OnlyFunction())
return -AFI.getArgRegsSaveSize() - (2 * 4);
// This is a conservative estimation: Assume the frame pointer being r7 and
// pc("r15") up to r8 getting spilled before (= 8 registers).
- int FPCXTSaveSize = (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
- return - FPCXTSaveSize - AFI.getArgRegsSaveSize() - (8 * 4);
+ int MaxRegBytes = 8 * 4;
+ if (STI.splitFramePointerPush(MF)) {
+ // Here, r11 can be stored below all of r4-r15 (3 registers more than
+ // above), plus d8-d15.
+ MaxRegBytes = 11 * 4 + 8 * 8;
+ }
+ int FPCXTSaveSize =
+ (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
+ return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
}
void ARMFrameLowering::emitPrologue(MachineFunction &MF,
@@ -482,6 +721,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned NumBytes = MFI.getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
int FPCXTSaveSize = 0;
+ bool NeedsWinCFI = needsWinCFI(MF);
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -510,47 +750,92 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
MachineInstr::FrameSetup);
DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
}
- DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
+ if (!NeedsWinCFI)
+ DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
+ if (NeedsWinCFI && MBBI != MBB.begin()) {
+ insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ MF.setHasWinCFI(true);
+ }
return;
}
// Determine spill area sizes.
- for (const CalleeSavedInfo &I : CSI) {
- Register Reg = I.getReg();
- int FI = I.getFrameIdx();
- switch (Reg) {
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R11:
- case ARM::R12:
- if (STI.splitFramePushPop(MF)) {
+ if (STI.splitFramePointerPush(MF)) {
+ for (const CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ int FI = I.getFrameIdx();
+ switch (Reg) {
+ case ARM::R11:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
GPRCS2Size += 4;
break;
+ case ARM::R0:
+ case ARM::R1:
+ case ARM::R2:
+ case ARM::R3:
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R12:
+ GPRCS1Size += 4;
+ break;
+ case ARM::FPCXTNS:
+ FPCXTSaveSize = 4;
+ break;
+ default:
+ // This is a DPR. Exclude the aligned DPRCS2 spills.
+ if (Reg == ARM::D8)
+ D8SpillFI = FI;
+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
+ DPRCSSize += 8;
+ }
+ }
+ } else {
+ for (const CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ int FI = I.getFrameIdx();
+ switch (Reg) {
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ case ARM::R12:
+ if (STI.splitFramePushPop(MF)) {
+ GPRCS2Size += 4;
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case ARM::R0:
+ case ARM::R1:
+ case ARM::R2:
+ case ARM::R3:
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ GPRCS1Size += 4;
+ break;
+ case ARM::FPCXTNS:
+ FPCXTSaveSize = 4;
+ break;
+ default:
+ // This is a DPR. Exclude the aligned DPRCS2 spills.
+ if (Reg == ARM::D8)
+ D8SpillFI = FI;
+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
+ DPRCSSize += 8;
}
- LLVM_FALLTHROUGH;
- case ARM::R0:
- case ARM::R1:
- case ARM::R2:
- case ARM::R3:
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::LR:
- if (Reg == FramePtr)
- FramePtrSpillFI = FI;
- GPRCS1Size += 4;
- break;
- case ARM::FPCXTNS:
- FPCXTSaveSize = 4;
- break;
- default:
- // This is a DPR. Exclude the aligned DPRCS2 spills.
- if (Reg == ARM::D8)
- D8SpillFI = FI;
- if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
- DPRCSSize += 8;
}
}
@@ -585,15 +870,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
- unsigned DPRGapSize =
- (GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) %
- DPRAlign.value();
+ unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
+ if (!STI.splitFramePointerPush(MF)) {
+ DPRGapSize += GPRCS2Size;
+ }
+ DPRGapSize %= DPRAlign.value();
- unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
+ unsigned DPRCSOffset;
+ if (STI.splitFramePointerPush(MF)) {
+ DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
+ GPRCS2Offset = DPRCSOffset - GPRCS2Size;
+ } else {
+ DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
+ }
int FramePtrOffsetInPush = 0;
if (HasFP) {
int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
- assert(getMaxFPOffset(STI, *AFI) <= FPOffset &&
+ assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
"Max FP estimation is wrong");
FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
@@ -604,7 +897,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
// Move past area 2.
- if (GPRCS2Size > 0) {
+ if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
GPRCS2Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
}
@@ -644,18 +937,37 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
} else
NumBytes = DPRCSOffset;
+ if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
+ GPRCS2Push = LastPush = MBBI++;
+ DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
+ }
+
+ bool NeedsWinCFIStackAlloc = NeedsWinCFI;
+ if (STI.splitFramePointerPush(MF) && HasFP)
+ NeedsWinCFIStackAlloc = false;
+
if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
uint32_t NumWords = NumBytes >> 2;
- if (NumWords < 65536)
+ if (NumWords < 65536) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
.addImm(NumWords)
.setMIFlags(MachineInstr::FrameSetup)
.add(predOps(ARMCC::AL));
- else
- BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
- .addImm(NumWords)
- .setMIFlags(MachineInstr::FrameSetup);
+ } else {
+ // Split into two instructions here, instead of using t2MOVi32imm,
+ // to allow inserting accurate SEH instructions (including accurate
+ // instruction size for each of them).
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
+ .addImm(NumWords & 0xffff)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
+ .addReg(ARM::R4)
+ .addImm(NumWords >> 16)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
+ }
switch (TM.getCodeModel()) {
case CodeModel::Tiny:
@@ -682,12 +994,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
break;
}
- BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
- .addReg(ARM::SP, RegState::Kill)
- .addReg(ARM::R4, RegState::Kill)
- .setMIFlags(MachineInstr::FrameSetup)
- .add(predOps(ARMCC::AL))
- .add(condCodeOp());
+ MachineInstrBuilder Instr, SEH;
+ Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
+ .addReg(ARM::SP, RegState::Kill)
+ .addReg(ARM::R4, RegState::Kill)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ if (NeedsWinCFIStackAlloc) {
+ SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
+ .addImm(NumBytes)
+ .addImm(/*Wide=*/1)
+ .setMIFlags(MachineInstr::FrameSetup);
+ MBB.insertAfter(Instr, SEH);
+ }
NumBytes = 0;
}
@@ -720,34 +1040,58 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// into spill area 1, including the FP in R11. In either case, it
// is in area one and the adjustment needs to take place just after
// that push.
+ // FIXME: The above is not necessary true when PACBTI is enabled.
+ // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
+ // so FP ends up on area two.
+ MachineBasicBlock::iterator AfterPush;
if (HasFP) {
- MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
+ AfterPush = std::next(GPRCS1Push);
unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
- emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
- dl, TII, FramePtr, ARM::SP,
- PushSize + FramePtrOffsetInPush,
- MachineInstr::FrameSetup);
- if (FramePtrOffsetInPush + PushSize != 0) {
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
- nullptr, MRI->getDwarfRegNum(FramePtr, true),
- FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
- BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
+ int FPOffset = PushSize + FramePtrOffsetInPush;
+ if (STI.splitFramePointerPush(MF)) {
+ AfterPush = std::next(GPRCS2Push);
+ emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
+ FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
} else {
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
- nullptr, MRI->getDwarfRegNum(FramePtr, true)));
- BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
+ emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
+ FramePtr, ARM::SP, FPOffset,
+ MachineInstr::FrameSetup);
}
+ if (!NeedsWinCFI) {
+ if (FramePtrOffsetInPush + PushSize != 0) {
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
+ nullptr, MRI->getDwarfRegNum(FramePtr, true),
+ FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ } else {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+ nullptr, MRI->getDwarfRegNum(FramePtr, true)));
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+ }
+
+ // Emit a SEH opcode indicating the prologue end. The rest of the prologue
+ // instructions below don't need to be replayed to unwind the stack.
+ if (NeedsWinCFI && MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator End = MBBI;
+ if (HasFP && STI.splitFramePointerPush(MF))
+ End = AfterPush;
+ insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup);
+ BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ MF.setHasWinCFI(true);
}
// Now that the prologue's actual instructions are finalised, we can insert
// the necessary DWARF cf instructions to describe the situation. Start by
// recording where each register ended up:
- if (GPRCS1Size > 0) {
+ if (GPRCS1Size > 0 && !NeedsWinCFI) {
MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
int CFIIndex;
for (const auto &Entry : CSI) {
@@ -781,7 +1125,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
}
- if (GPRCS2Size > 0) {
+ if (GPRCS2Size > 0 && !NeedsWinCFI) {
MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
for (const auto &Entry : CSI) {
Register Reg = Entry.getReg();
@@ -807,7 +1151,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
}
- if (DPRCSSize > 0) {
+ if (DPRCSSize > 0 && !NeedsWinCFI) {
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
MachineBasicBlock::iterator Pos = std::next(LastPush);
@@ -831,7 +1175,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// throughout the process. If we have a frame pointer, it takes over the job
// half-way through, so only the first few .cfi_def_cfa_offset instructions
// actually get emitted.
- DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
+ if (!NeedsWinCFI)
+ DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
if (STI.isTargetELF() && hasFP(MF))
MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
@@ -928,7 +1273,14 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ MachineBasicBlock::iterator RangeStart;
if (!AFI->hasStackFrame()) {
+ if (MF.hasWinCFI()) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ RangeStart = initMBBRange(MBB, MBBI);
+ }
+
if (NumBytes + IncomingArgStackToRestore != 0)
emitSPUpdate(isARM, MBB, MBBI, dl, TII,
NumBytes + IncomingArgStackToRestore,
@@ -944,6 +1296,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
++MBBI;
}
+ if (MF.hasWinCFI()) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ RangeStart = initMBBRange(MBB, MBBI);
+ }
+
// Move SP to start of FP callee save spill area.
NumBytes -= (ReservedArgStack +
AFI->getFPCXTSaveAreaSize() +
@@ -998,6 +1356,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineInstr::FrameDestroy);
// Increment past our save areas.
+ if (AFI->getGPRCalleeSavedArea2Size() && STI.splitFramePointerPush(MF))
+ MBBI++;
+
if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
MBBI++;
// Since vpop register list cannot have gaps, there may be multiple vpop
@@ -1012,7 +1373,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineInstr::FrameDestroy);
}
- if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
+ if (AFI->getGPRCalleeSavedArea2Size() && !STI.splitFramePointerPush(MF))
+ MBBI++;
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
if (ReservedArgStack || IncomingArgStackToRestore) {
@@ -1030,6 +1392,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
}
+
+ if (MF.hasWinCFI()) {
+ insertSEHRange(MBB, RangeStart, MBB.end(), TII, MachineInstr::FrameDestroy);
+ BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -1245,7 +1613,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
continue;
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
- STI.hasV5TOps() && MBB.succ_empty() && !hasPAC) {
+ STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
+ !STI.splitFramePointerPush(MF)) {
Reg = ARM::PC;
// Fold the return instruction into the LDM.
DeleteRet = true;
@@ -1609,12 +1978,21 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
.addImm(-4)
.add(predOps(ARMCC::AL));
}
- emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
- MachineInstr::FrameSetup);
- emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
- MachineInstr::FrameSetup);
- emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
- NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
+ if (STI.splitFramePointerPush(MF)) {
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
+ &isSplitFPArea1Register, 0, MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
+ NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
+ &isSplitFPArea2Register, 0, MachineInstr::FrameSetup);
+ } else {
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
+ 0, MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
+ 0, MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
+ NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
+ }
// The code above does not insert spill code for the aligned DPRCS2 registers.
// The stack realignment code will be inserted between the push instructions
@@ -1642,14 +2020,24 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
- unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
+ unsigned LdrOpc =
+ AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
unsigned FltOpc = ARM::VLDMDIA_UPD;
- emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
- NumAlignedDPRCS2Regs);
- emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
- &isARMArea2Register, 0);
- emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
- &isARMArea1Register, 0);
+ if (STI.splitFramePointerPush(MF)) {
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isSplitFPArea2Register, 0);
+ emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
+ NumAlignedDPRCS2Regs);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isSplitFPArea1Register, 0);
+ } else {
+ emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
+ NumAlignedDPRCS2Regs);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea2Register, 0);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea1Register, 0);
+ }
return true;
}
@@ -1768,7 +2156,7 @@ checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
return;
// We are planning to use NEON instructions vst1 / vld1.
- if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
+ if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
return;
// Don't bother if the default stack alignment is sufficiently high.
@@ -1818,6 +2206,34 @@ bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
return true;
}
+static bool requiresAAPCSFrameRecord(const MachineFunction &MF) {
+ const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
+ return Subtarget.createAAPCSFrameChainLeaf() ||
+ (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls());
+}
+
+// Thumb1 may require a spill when storing to a frame index through FP, for
+// cases where FP is a high register (R11). This scans the function for cases
+// where this may happen.
+static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,
+ const TargetFrameLowering &TFI) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->isThumb1OnlyFunction())
+ return false;
+
+ for (const auto &MBB : MF)
+ for (const auto &MI : MBB)
+ if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi)
+ for (const auto &Op : MI.operands())
+ if (Op.isFI()) {
+ Register Reg;
+ TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
+ if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
+ return true;
+ }
+ return false;
+}
+
void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
@@ -1826,7 +2242,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// to take advantage the eliminateFrameIndex machinery. This also ensures it
// is spilled in the order specified by getCalleeSavedRegs() to make it easier
// to combine multiple loads / stores.
- bool CanEliminateFrame = true;
+ bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));
bool CS1Spilled = false;
bool LRSpilled = false;
unsigned NumGPRSpills = 0;
@@ -2021,6 +2437,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Functions with VLAs or extremely large call frames are rare, and
// if a function is allocating more than 1KB of stack, an extra 4-byte
// slot probably isn't relevant.
+ //
+ // A special case is the scenario where r11 is used as FP, where accesses
+ // to a frame index will require its value to be moved into a low reg.
+ // This is handled later on, once we are able to determine if we have any
+ // fp-relative accesses.
if (RegInfo->hasBasePointer(MF))
EstimatedRSStackSizeLimit = (1U << 5) * 4;
else
@@ -2049,7 +2470,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
//
// We could do slightly better on Thumb1; in some cases, an sp-relative
// offset would be legal even though an fp-relative offset is not.
- int MaxFPOffset = getMaxFPOffset(STI, *AFI);
+ int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
bool HasLargeArgumentList =
HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
@@ -2067,7 +2488,9 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(FramePtr);
// If the frame pointer is required by the ABI, also spill LR so that we
// emit a complete frame record.
- if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
+ if ((requiresAAPCSFrameRecord(MF) ||
+ MF.getTarget().Options.DisableFramePointerElim(MF)) &&
+ !LRSpilled) {
SavedRegs.set(ARM::LR);
LRSpilled = true;
NumGPRSpills++;
@@ -2149,7 +2572,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
// r7 can be used if it is not being used as the frame pointer.
- if (!HasFP) {
+ if (!HasFP || FramePtr != ARM::R7) {
if (SavedRegs.test(ARM::R7)) {
--RegDeficit;
LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
@@ -2270,8 +2693,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// to materialize a stack offset. If so, either spill one additional
// callee-saved register or reserve a special spill slot to facilitate
// register scavenging. Thumb1 needs a spill slot for stack pointer
- // adjustments also, even when the frame itself is small.
- if (BigFrameOffsets && !ExtraCSSpill) {
+ // adjustments and for frame index accesses when FP is high register,
+ // even when the frame itself is small.
+ if (!ExtraCSSpill &&
+ (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this))) {
// If any non-reserved CS register isn't spilled, just spill one or two
// extra. That should take care of it!
unsigned NumExtras = TargetAlign.value() / 4;
@@ -2488,6 +2913,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
unsigned CFIIndex;
const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
bool Thumb = ST->isThumb();
+ bool Thumb2 = ST->isThumb2();
// Sadly, this currently doesn't support varargs, platforms other than
// android/linux. Note that thumb1/thumb2 are support for android/linux.
@@ -2505,19 +2931,10 @@ void ARMFrameLowering::adjustForSegmentedStacks(
ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL;
- uint64_t StackSize = MFI.getStackSize();
-
- // Do not generate a prologue for leaf functions with a stack of size zero.
- // For non-leaf functions we have to allow for the possibility that the
- // callis to a non-split function, as in PR37807. This function could also
- // take the address of a non-split function. When the linker tries to adjust
- // its non-existent prologue, it would fail with an error. Mark the object
- // file so that such failures are not errors. See this Go language bug-report
- // https://go-review.googlesource.com/c/go/+/148819/
- if (StackSize == 0 && !MFI.hasTailCall()) {
- MF.getMMI().setHasNosplitStack(true);
+ if (!MFI.needsSplitStackProlog())
return;
- }
+
+ uint64_t StackSize = MFI.getStackSize();
// Use R4 and R5 as scratch registers.
// We save R4 and R5 before use and restore them before leaving the function.
@@ -2570,8 +2987,9 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Make sure the LiveIns are still sorted and unique.
MBB->sortUniqueLiveIns();
// Replace the edges to PrologueMBB by edges to the sequences
- // we are about to add.
- MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
+ // we are about to add, but only update for immediate predecessors.
+ if (MBB->isSuccessor(&PrologueMBB))
+ MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
}
// The required stack size that is aligned to ARM constant criterion.
@@ -2604,17 +3022,19 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Emit the relevant DWARF information about the change in stack pointer as
// well as where to find both r4 and r5 (the callee-save registers)
- CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
- BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
- BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
- BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
+ BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
+ BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
+ BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
// mov SR1, sp
if (Thumb) {
@@ -2630,17 +3050,46 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// sub SR1, sp, #StackSize
if (!CompareStackPointer && Thumb) {
- BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
- .add(condCodeOp())
- .addReg(ScratchReg1)
- .addImm(AlignedStackSize)
- .add(predOps(ARMCC::AL));
+ if (AlignedStackSize < 256) {
+ BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
+ .add(condCodeOp())
+ .addReg(ScratchReg1)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL));
+ } else {
+ if (Thumb2) {
+ BuildMI(McrMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0)
+ .addImm(AlignedStackSize);
+ } else {
+ auto MBBI = McrMBB->end();
+ auto RegInfo = STI.getRegisterInfo();
+ RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
+ AlignedStackSize);
+ }
+ BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
+ .add(condCodeOp())
+ .addReg(ScratchReg1)
+ .addReg(ScratchReg0)
+ .add(predOps(ARMCC::AL));
+ }
} else if (!CompareStackPointer) {
- BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
- .addReg(ARM::SP)
- .addImm(AlignedStackSize)
- .add(predOps(ARMCC::AL))
- .add(condCodeOp());
+ if (AlignedStackSize < 256) {
+ BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
+ .addReg(ARM::SP)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ } else {
+ auto MBBI = McrMBB->end();
+ auto RegInfo = STI.getRegisterInfo();
+ RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
+ AlignedStackSize);
+ BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
+ .addReg(ARM::SP)
+ .addReg(ScratchReg0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ }
}
if (Thumb && ST->isThumb1Only()) {
@@ -2707,28 +3156,69 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Pass first argument for the __morestack by Scratch Register #0.
// The amount size of stack required
if (Thumb) {
- BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
- .add(condCodeOp())
- .addImm(AlignedStackSize)
- .add(predOps(ARMCC::AL));
+ if (AlignedStackSize < 256) {
+ BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
+ .add(condCodeOp())
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL));
+ } else {
+ if (Thumb2) {
+ BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0)
+ .addImm(AlignedStackSize);
+ } else {
+ auto MBBI = AllocMBB->end();
+ auto RegInfo = STI.getRegisterInfo();
+ RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
+ AlignedStackSize);
+ }
+ }
} else {
- BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
- .addImm(AlignedStackSize)
- .add(predOps(ARMCC::AL))
- .add(condCodeOp());
+ if (AlignedStackSize < 256) {
+ BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ } else {
+ auto MBBI = AllocMBB->end();
+ auto RegInfo = STI.getRegisterInfo();
+ RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
+ AlignedStackSize);
+ }
}
+
// Pass second argument for the __morestack by Scratch Register #1.
// The amount size of stack consumed to save function arguments.
if (Thumb) {
- BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
- .add(condCodeOp())
- .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
- .add(predOps(ARMCC::AL));
+ if (ARMFI->getArgumentStackSize() < 256) {
+ BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
+ .add(condCodeOp())
+ .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
+ .add(predOps(ARMCC::AL));
+ } else {
+ if (Thumb2) {
+ BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg1)
+ .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()));
+ } else {
+ auto MBBI = AllocMBB->end();
+ auto RegInfo = STI.getRegisterInfo();
+ RegInfo->emitLoadConstPool(
+ *AllocMBB, MBBI, DL, ScratchReg1, 0,
+ alignToARMConstant(ARMFI->getArgumentStackSize()));
+ }
+ }
} else {
- BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
- .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
- .add(predOps(ARMCC::AL))
- .add(condCodeOp());
+ if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
+ BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
+ .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ } else {
+ auto MBBI = AllocMBB->end();
+ auto RegInfo = STI.getRegisterInfo();
+ RegInfo->emitLoadConstPool(
+ *AllocMBB, MBBI, DL, ScratchReg1, 0,
+ alignToARMConstant(ARMFI->getArgumentStackSize()));
+ }
}
// push {lr} - Save return address of this function.
@@ -2746,13 +3236,15 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Emit the DWARF info about the change in stack as well as where to find the
// previous link register
- CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
- BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
+ BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
- BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
// Call __morestack().
if (Thumb) {
@@ -2808,9 +3300,11 @@ void ARMFrameLowering::adjustForSegmentedStacks(
}
// Update the CFA offset now that we've popped
- CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
- BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
+ BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
// Return from this function.
BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
@@ -2832,20 +3326,22 @@ void ARMFrameLowering::adjustForSegmentedStacks(
}
// Update the CFA offset now that we've popped
- CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
- BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- // Tell debuggers that r4 and r5 are now the same as they were in the
- // previous function, that they're the "Same Value".
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
- nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
- BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
- nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
- BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
+ BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // Tell debuggers that r4 and r5 are now the same as they were in the
+ // previous function, that they're the "Same Value".
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
+ nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
+ BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
+ nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
+ BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
// Organizing MBB lists
PostStackMBB->addSuccessor(&PrologueMBB);
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h
index 9822e2321bb4..16f2ce6bea6f 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -46,6 +46,7 @@ public:
bool enableCalleeSaveSkip(const MachineFunction &MF) const override;
bool hasFP(const MachineFunction &MF) const override;
+ bool isFPReserved(const MachineFunction &MF) const;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
diff --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
index 0d201a67af46..9b26aac6c0b7 100644
--- a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -11,6 +11,8 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMSubtarget.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 98c8133282a2..e0e4ffd90e0e 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1058,15 +1058,15 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
// This case occurs only for VLD1-lane/dup and VST1-lane instructions.
// The maximum alignment is equal to the memory size being referenced.
- unsigned MMOAlign = MemN->getAlignment();
+ llvm::Align MMOAlign = MemN->getAlign();
unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
- if (MMOAlign >= MemSize && MemSize > 1)
+ if (MMOAlign.value() >= MemSize && MemSize > 1)
Alignment = MemSize;
} else {
// All other uses of addrmode6 are for intrinsics. For now just record
// the raw alignment value; it will be refined later based on the legal
// alignment operands for the intrinsic.
- Alignment = MemN->getAlignment();
+ Alignment = MemN->getAlign().value();
}
Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
@@ -3464,40 +3464,39 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
return false;
}
-/// Target-specific DAG combining for ISD::XOR.
+/// Target-specific DAG combining for ISD::SUB.
/// Target-independent combining lowers SELECT_CC nodes of the form
/// select_cc setg[ge] X, 0, X, -X
/// select_cc setgt X, -1, X, -X
/// select_cc setl[te] X, 0, -X, X
/// select_cc setlt X, 1, -X, X
/// which represent Integer ABS into:
-/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
/// ARM instruction selection detects the latter and matches it to
/// ARM::ABS or ARM::t2ABS machine node.
bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
- SDValue XORSrc0 = N->getOperand(0);
- SDValue XORSrc1 = N->getOperand(1);
+ SDValue SUBSrc0 = N->getOperand(0);
+ SDValue SUBSrc1 = N->getOperand(1);
EVT VT = N->getValueType(0);
if (Subtarget->isThumb1Only())
return false;
- if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
+ if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
return false;
- SDValue ADDSrc0 = XORSrc0.getOperand(0);
- SDValue ADDSrc1 = XORSrc0.getOperand(1);
- SDValue SRASrc0 = XORSrc1.getOperand(0);
- SDValue SRASrc1 = XORSrc1.getOperand(1);
+ SDValue XORSrc0 = SUBSrc0.getOperand(0);
+ SDValue XORSrc1 = SUBSrc0.getOperand(1);
+ SDValue SRASrc0 = SUBSrc1.getOperand(0);
+ SDValue SRASrc1 = SUBSrc1.getOperand(1);
ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
EVT XType = SRASrc0.getValueType();
unsigned Size = XType.getSizeInBits() - 1;
- if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
- XType.isInteger() && SRAConstant != nullptr &&
- Size == SRAConstant->getZExtValue()) {
+ if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
+ SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
- CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
+ CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
return true;
}
@@ -3673,8 +3672,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
if (tryInlineAsm(N))
return;
break;
- case ISD::XOR:
- // Select special operations if XOR node forms integer ABS pattern
+ case ISD::SUB:
+ // Select special operations if SUB node forms integer ABS pattern
if (tryABSOp(N))
return;
// Other cases are autogenerated.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 1b41427a1cab..85e32c08c74c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -273,6 +273,10 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::USUBSAT, VT, Legal);
setOperationAction(ISD::ABDS, VT, Legal);
setOperationAction(ISD::ABDU, VT, Legal);
+ setOperationAction(ISD::AVGFLOORS, VT, Legal);
+ setOperationAction(ISD::AVGFLOORU, VT, Legal);
+ setOperationAction(ISD::AVGCEILS, VT, Legal);
+ setOperationAction(ISD::AVGCEILU, VT, Legal);
// No native support for these.
setOperationAction(ISD::UDIV, VT, Expand);
@@ -392,6 +396,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
@@ -476,7 +481,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
- !Subtarget->isTargetWatchOS()) {
+ !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
@@ -809,8 +814,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Combine low-overhead loop intrinsics so that we can lower i1 types.
if (Subtarget->hasLOB()) {
- setTargetDAGCombine(ISD::BRCOND);
- setTargetDAGCombine(ISD::BR_CC);
+ setTargetDAGCombine({ISD::BRCOND, ISD::BR_CC});
}
if (Subtarget->hasNEON()) {
@@ -982,13 +986,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMA, MVT::v4f32, Expand);
}
- setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::SRL);
- setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::FP_TO_SINT);
- setTargetDAGCombine(ISD::FP_TO_UINT);
- setTargetDAGCombine(ISD::FDIV);
- setTargetDAGCombine(ISD::LOAD);
+ setTargetDAGCombine({ISD::SHL, ISD::SRL, ISD::SRA, ISD::FP_TO_SINT,
+ ISD::FP_TO_UINT, ISD::FDIV, ISD::LOAD});
// It is legal to extload from v4i8 to v4i16 or v4i32.
for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
@@ -1002,32 +1001,17 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
- setTargetDAGCombine(ISD::BUILD_VECTOR);
- setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
- setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
- setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
- setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
- setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
- setTargetDAGCombine(ISD::STORE);
- setTargetDAGCombine(ISD::SIGN_EXTEND);
- setTargetDAGCombine(ISD::ZERO_EXTEND);
- setTargetDAGCombine(ISD::ANY_EXTEND);
- setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
- setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
- setTargetDAGCombine(ISD::INTRINSIC_VOID);
- setTargetDAGCombine(ISD::VECREDUCE_ADD);
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::BITCAST);
+ setTargetDAGCombine(
+ {ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE, ISD::INSERT_SUBVECTOR,
+ ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
+ ISD::SIGN_EXTEND_INREG, ISD::STORE, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,
+ ISD::ANY_EXTEND, ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
+ ISD::INTRINSIC_VOID, ISD::VECREDUCE_ADD, ISD::ADD, ISD::BITCAST});
}
if (Subtarget->hasMVEIntegerOps()) {
- setTargetDAGCombine(ISD::SMIN);
- setTargetDAGCombine(ISD::UMIN);
- setTargetDAGCombine(ISD::SMAX);
- setTargetDAGCombine(ISD::UMAX);
- setTargetDAGCombine(ISD::FP_EXTEND);
- setTargetDAGCombine(ISD::SELECT);
- setTargetDAGCombine(ISD::SELECT_CC);
- setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX,
+ ISD::FP_EXTEND, ISD::SELECT, ISD::SELECT_CC,
+ ISD::SETCC});
}
if (Subtarget->hasMVEFloatOps()) {
setTargetDAGCombine(ISD::FADD);
@@ -1364,6 +1348,29 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
}
+ // Compute supported atomic widths.
+ if (Subtarget->isTargetLinux() ||
+ (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
+ // For targets where __sync_* routines are reliably available, we use them
+ // if necessary.
+ //
+ // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
+ // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
+ //
+ // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
+ // such targets should provide __sync_* routines, which use the ARM mode
+ // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
+ // encoding; see ARMISD::MEMBARRIER_MCR.)
+ setMaxAtomicSizeInBitsSupported(64);
+ } else if (Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) {
+ // Cortex-M (besides Cortex-M0) have 32-bit atomics.
+ setMaxAtomicSizeInBitsSupported(32);
+ } else {
+ // We can't assume anything about other targets; just use libatomic
+ // routines.
+ setMaxAtomicSizeInBitsSupported(0);
+ }
+
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
// Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
@@ -1545,12 +1552,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::SUB);
- setTargetDAGCombine(ISD::MUL);
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::OR);
- setTargetDAGCombine(ISD::XOR);
+ setTargetDAGCombine(
+ {ISD::ADD, ISD::SUB, ISD::MUL, ISD::AND, ISD::OR, ISD::XOR});
if (Subtarget->hasMVEIntegerOps())
setTargetDAGCombine(ISD::VSELECT);
@@ -1559,6 +1562,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SRL);
if (Subtarget->isThumb1Only())
setTargetDAGCombine(ISD::SHL);
+ // Attempt to lower smin/smax to ssat/usat
+ if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
+ Subtarget->isThumb2()) {
+ setTargetDAGCombine({ISD::SMIN, ISD::SMAX});
+ }
setStackPointerRegisterToSaveRestore(ARM::SP);
@@ -1901,13 +1909,14 @@ ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
// source/dest is aligned and the copy size is large enough. We therefore want
// to align such objects passed to memory intrinsics.
bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
- unsigned &PrefAlign) const {
+ Align &PrefAlign) const {
if (!isa<MemIntrinsic>(CI))
return false;
MinSize = 8;
// On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
// cycle faster than 4-byte aligned LDM.
- PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
+ PrefAlign =
+ (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
return true;
}
@@ -2326,7 +2335,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Lower 'returns_twice' calls to a pseudo-instruction.
if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
- !Subtarget->getNoBTIAtReturnTwice())
+ !Subtarget->noBTIAtReturnTwice())
GuardWithBTI = AFI->branchTargetEnforcement();
// Determine whether this is a non-secure function call.
@@ -2778,25 +2787,23 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- if (!isTailCall) {
- const uint32_t *Mask;
- const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
- if (isThisReturn) {
- // For 'this' returns, use the R0-preserving mask if applicable
- Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
- if (!Mask) {
- // Set isThisReturn to false if the calling convention is not one that
- // allows 'returned' to be modeled in this way, so LowerCallResult does
- // not try to pass 'this' straight through
- isThisReturn = false;
- Mask = ARI->getCallPreservedMask(MF, CallConv);
- }
- } else
+ const uint32_t *Mask;
+ const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
+ if (isThisReturn) {
+ // For 'this' returns, use the R0-preserving mask if applicable
+ Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
+ if (!Mask) {
+ // Set isThisReturn to false if the calling convention is not one that
+ // allows 'returned' to be modeled in this way, so LowerCallResult does
+ // not try to pass 'this' straight through
+ isThisReturn = false;
Mask = ARI->getCallPreservedMask(MF, CallConv);
+ }
+ } else
+ Mask = ARI->getCallPreservedMask(MF, CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
- }
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -4379,7 +4386,7 @@ void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
bool ARMTargetLowering::splitValueIntoRegisterParts(
SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
- bool IsABIRegCopy = CC.hasValue();
+ bool IsABIRegCopy = CC.has_value();
EVT ValueVT = Val.getValueType();
if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
PartVT == MVT::f32) {
@@ -4397,7 +4404,7 @@ bool ARMTargetLowering::splitValueIntoRegisterParts(
SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
- bool IsABIRegCopy = CC.hasValue();
+ bool IsABIRegCopy = CC.has_value();
if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
PartVT == MVT::f32) {
unsigned ValueBits = ValueVT.getSizeInBits();
@@ -5547,7 +5554,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(), Ld->getAlignment(),
+ Ld->getPointerInfo(), Ld->getAlign(),
Ld->getMemOperand()->getFlags());
llvm_unreachable("Unknown VFP cmp argument!");
@@ -5567,14 +5574,14 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
SDValue Ptr = Ld->getBasePtr();
RetVal1 =
DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
- Ld->getAlignment(), Ld->getMemOperand()->getFlags());
+ Ld->getAlign(), Ld->getMemOperand()->getFlags());
EVT PtrType = Ptr.getValueType();
- unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
- Ld->getPointerInfo().getWithOffset(4), NewAlign,
+ Ld->getPointerInfo().getWithOffset(4),
+ commonAlignment(Ld->getAlign(), 4),
Ld->getMemOperand()->getFlags());
return;
}
@@ -5801,8 +5808,7 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
return DAG.UnrollVectorOp(Op.getNode());
}
- const bool HasFullFP16 =
- static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
+ const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
EVT NewTy;
const EVT OpTy = Op.getOperand(0).getValueType();
@@ -5912,8 +5918,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(0).getValueType() == MVT::v8i16) &&
"Invalid type for custom lowering!");
- const bool HasFullFP16 =
- static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
+ const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
EVT DestVecType;
if (VT == MVT::v4f32)
@@ -9359,15 +9364,15 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
// The load already has the right type.
if (ExtendedTy == LD->getMemoryVT())
return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
- LD->getBasePtr(), LD->getPointerInfo(),
- LD->getAlignment(), LD->getMemOperand()->getFlags());
+ LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
+ LD->getMemOperand()->getFlags());
// We need to create a zextload/sextload. We cannot just create a load
// followed by a zext/zext node because LowerMUL is also run during normal
// operation legalization where we can't create illegal types.
return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
- LD->getMemoryVT(), LD->getAlignment(),
+ LD->getMemoryVT(), LD->getAlign(),
LD->getMemOperand()->getFlags());
}
@@ -9876,7 +9881,7 @@ ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
if (N->getOpcode() != ISD::SDIV)
return SDValue();
- const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
+ const auto &ST = DAG.getSubtarget<ARMSubtarget>();
const bool MinSize = ST.hasMinSize();
const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
: ST.hasDivideInARMMode();
@@ -10311,6 +10316,15 @@ SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues({Result, Chain}, dl);
}
+SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+
+ EVT VT = getPointerTy(DAG.getDataLayout());
+ SDLoc DL(Op);
+ int FI = MFI.CreateFixedObject(4, 0, false);
+ return DAG.getFrameIndex(FI, VT);
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
switch (Op.getOpcode()) {
@@ -10424,6 +10438,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
+ case ISD::SPONENTRY:
+ return LowerSPONENTRY(Op, DAG);
case ARMISD::WIN__DBZCHK: return SDValue();
}
}
@@ -10509,9 +10525,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
return;
case ISD::INTRINSIC_WO_CHAIN:
return ReplaceLongIntrinsic(N, Results, DAG);
- case ISD::ABS:
- lowerABS(N, Results, DAG);
- return ;
case ISD::LOAD:
LowerLOAD(N, Results, DAG);
break;
@@ -12170,7 +12183,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
if (Subtarget->isThumb1Only()) {
for (unsigned c = MCID->getNumOperands() - 4; c--;) {
MI.addOperand(MI.getOperand(1));
- MI.RemoveOperand(1);
+ MI.removeOperand(1);
}
// Restore the ties
@@ -12208,7 +12221,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
definesCPSR = true;
if (MO.isDead())
deadCPSR = true;
- MI.RemoveOperand(i);
+ MI.removeOperand(i);
break;
}
}
@@ -14775,14 +14788,14 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
SDValue BasePtr = LD->getBasePtr();
SDValue NewLD1 =
DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
- LD->getAlignment(), LD->getMemOperand()->getFlags());
+ LD->getAlign(), LD->getMemOperand()->getFlags());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, DL, MVT::i32));
SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
LD->getPointerInfo().getWithOffset(4),
- std::min(4U, LD->getAlignment()),
+ commonAlignment(LD->getAlign(), 4),
LD->getMemOperand()->getFlags());
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
@@ -15352,6 +15365,10 @@ static SDValue FlattenVectorShuffle(ShuffleVectorSDNode *N, SelectionDAG &DAG) {
case ISD::MULHU:
case ISD::ABDS:
case ISD::ABDU:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
break;
default:
return SDValue();
@@ -15721,7 +15738,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
// Now, create a _UPD node, taking care of not breaking alignment.
EVT AlignedVecTy = VecTy;
- unsigned Alignment = MemN->getAlignment();
+ Align Alignment = MemN->getAlign();
// If this is a less-than-standard-aligned load/store, change the type to
// match the standard alignment.
@@ -15738,10 +15755,8 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
// memory type to match the explicit alignment. That way, we don't
// generate non-standard-aligned ARMISD::VLDx nodes.
if (isa<LSBaseSDNode>(N)) {
- if (Alignment == 0)
- Alignment = 1;
- if (Alignment < VecTy.getScalarSizeInBits() / 8) {
- MVT EltTy = MVT::getIntegerVT(Alignment * 8);
+ if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
+ MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
assert(!isLaneOp && "Unexpected generic load/store lane.");
unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
@@ -15754,7 +15769,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
// alignment of the memory type.
// Intrinsics, however, always get an explicit alignment, set to the
// alignment of the MMO.
- Alignment = 1;
+ Alignment = Align(1);
}
// Create the new updating load/store node.
@@ -15787,7 +15802,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
}
// For all node types, the alignment operand is always the last one.
- Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
+ Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
// If this is a non-standard-aligned STORE, the penultimate operand is the
// stored value. Bitcast it to the aligned type.
@@ -15965,10 +15980,10 @@ static SDValue CombineBaseUpdate(SDNode *N,
// Try to fold with other users. Non-constant updates are considered
// first, and constant updates are sorted to not break a sequence of
// strided accesses (if there is any).
- std::sort(BaseUpdates.begin(), BaseUpdates.end(),
- [](BaseUpdateUser &LHS, BaseUpdateUser &RHS) {
- return LHS.ConstInc < RHS.ConstInc;
- });
+ std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(),
+ [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
+ return LHS.ConstInc < RHS.ConstInc;
+ });
for (BaseUpdateUser &User : BaseUpdates) {
if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
return SDValue();
@@ -16258,7 +16273,7 @@ static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG,
if (LD && Op.hasOneUse() && LD->isUnindexed() &&
LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
- DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32)};
+ DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
SDValue VLDDup =
DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,
@@ -16360,7 +16375,7 @@ static SDValue PerformTruncatingStoreCombine(StoreSDNode *St,
ShuffWide, DAG.getIntPtrConstant(I, DL));
SDValue Ch =
DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
- St->getAlignment(), St->getMemOperand()->getFlags());
+ St->getAlign(), St->getMemOperand()->getFlags());
BasePtr =
DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
Chains.push_back(Ch);
@@ -16608,7 +16623,7 @@ static SDValue PerformSTORECombine(SDNode *N,
DCI.AddToWorklist(ExtElt.getNode());
DCI.AddToWorklist(V.getNode());
return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
- St->getPointerInfo(), St->getAlignment(),
+ St->getPointerInfo(), St->getAlign(),
St->getMemOperand()->getFlags(), St->getAAInfo());
}
@@ -16690,14 +16705,16 @@ static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDLoc DL(N);
- // The identity element for a fadd is -0.0, which these VMOV's represent.
- auto isNegativeZeroSplat = [&](SDValue Op) {
+ // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
+ // which these VMOV's represent.
+ auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
if (Op.getOpcode() != ISD::BITCAST ||
Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
return false;
- if (VT == MVT::v4f32 && Op.getOperand(0).getConstantOperandVal(0) == 1664)
+ uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
+ if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
return true;
- if (VT == MVT::v8f16 && Op.getOperand(0).getConstantOperandVal(0) == 2688)
+ if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
return true;
return false;
};
@@ -16705,12 +16722,17 @@ static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG,
if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
std::swap(Op0, Op1);
- if (Op1.getOpcode() != ISD::VSELECT ||
- !isNegativeZeroSplat(Op1.getOperand(2)))
+ if (Op1.getOpcode() != ISD::VSELECT)
+ return SDValue();
+
+ SDNodeFlags FaddFlags = N->getFlags();
+ bool NSZ = FaddFlags.hasNoSignedZeros();
+ if (!isIdentitySplat(Op1.getOperand(2), NSZ))
return SDValue();
+
SDValue FAdd =
- DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), N->getFlags());
- return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0);
+ DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
+ return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
}
/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
@@ -17060,13 +17082,10 @@ static SDValue PerformVMOVNCombine(SDNode *N,
IsTop ? Op1DemandedElts
: APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
- APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, KnownUndef,
- KnownZero, DCI))
+ if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
return SDValue(N, 0);
- if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, KnownUndef,
- KnownZero, DCI))
+ if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
return SDValue(N, 0);
return SDValue();
@@ -17082,10 +17101,8 @@ static SDValue PerformVQMOVNCombine(SDNode *N,
APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
: APInt::getHighBitsSet(2, 1));
- APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, KnownUndef,
- KnownZero, DCI))
+ if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
return SDValue(N, 0);
return SDValue();
}
@@ -17390,7 +17407,7 @@ static SDValue PerformShiftCombine(SDNode *N,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!VT.isVector() || !TLI.isTypeLegal(VT))
return SDValue();
- if (ST->hasMVEIntegerOps() && VT == MVT::v2i64)
+ if (ST->hasMVEIntegerOps())
return SDValue();
int64_t Cnt;
@@ -17556,12 +17573,57 @@ static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
+// constant bounds.
+static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
+ !Subtarget->isThumb2())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+
+ if (VT != MVT::i32 ||
+ (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)) ||
+ !isa<ConstantSDNode>(Op0.getOperand(1)))
+ return SDValue();
+
+ SDValue Min = Op;
+ SDValue Max = Op0;
+ SDValue Input = Op0.getOperand(0);
+ if (Min.getOpcode() == ISD::SMAX)
+ std::swap(Min, Max);
+
+ APInt MinC = Min.getConstantOperandAPInt(1);
+ APInt MaxC = Max.getConstantOperandAPInt(1);
+
+ if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
+ !(MinC + 1).isPowerOf2())
+ return SDValue();
+
+ SDLoc DL(Op);
+ if (MinC == ~MaxC)
+ return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
+ DAG.getConstant(MinC.countTrailingOnes(), DL, VT));
+ if (MaxC == 0)
+ return DAG.getNode(ARMISD::USAT, DL, VT, Input,
+ DAG.getConstant(MinC.countTrailingOnes(), DL, VT));
+
+ return SDValue();
+}
+
/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
/// saturates.
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
+
+ if (VT == MVT::i32)
+ return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
+
if (!ST->hasMVEIntegerOps())
return SDValue();
@@ -19354,8 +19416,8 @@ bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
// Return false to prevent folding
// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
// if the folding leads to worse code.
-bool ARMTargetLowering::isMulAddWithConstProfitable(
- const SDValue &AddNode, const SDValue &ConstNode) const {
+bool ARMTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
+ SDValue ConstNode) const {
// Let the DAGCombiner decide for vector types and large types.
const EVT VT = AddNode.getValueType();
if (VT.isVector() || VT.getScalarSizeInBits() > 32)
@@ -20537,38 +20599,6 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
}
-void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const {
- assert(N->getValueType(0) == MVT::i64 && "Unexpected type (!= i64) on ABS.");
- MVT HalfT = MVT::i32;
- SDLoc dl(N);
- SDValue Hi, Lo, Tmp;
-
- if (!isOperationLegalOrCustom(ISD::ADDCARRY, HalfT) ||
- !isOperationLegalOrCustom(ISD::UADDO, HalfT))
- return ;
-
- unsigned OpTypeBits = HalfT.getScalarSizeInBits();
- SDVTList VTList = DAG.getVTList(HalfT, MVT::i1);
-
- Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
- DAG.getConstant(0, dl, HalfT));
- Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
- DAG.getConstant(1, dl, HalfT));
-
- Tmp = DAG.getNode(ISD::SRA, dl, HalfT, Hi,
- DAG.getConstant(OpTypeBits - 1, dl,
- getShiftAmountTy(HalfT, DAG.getDataLayout())));
- Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo);
- Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi,
- SDValue(Lo.getNode(), 1));
- Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi);
- Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo);
-
- Results.push_back(Lo);
- Results.push_back(Hi);
-}
-
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
@@ -20787,24 +20817,24 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::arm_ldaex:
case Intrinsic::arm_ldrex: {
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
- PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+ Type *ValTy = I.getParamElementType(0);
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
+ Info.memVT = MVT::getVT(ValTy);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
+ Info.align = DL.getABITypeAlign(ValTy);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::arm_stlex:
case Intrinsic::arm_strex: {
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
- PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
+ Type *ValTy = I.getParamElementType(1);
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
+ Info.memVT = MVT::getVT(ValTy);
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
+ Info.align = DL.getABITypeAlign(ValTy);
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
}
@@ -20932,9 +20962,19 @@ Instruction *ARMTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
// anything for those.
-bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+TargetLoweringBase::AtomicExpansionKind
+ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+ bool has64BitAtomicStore;
+ if (Subtarget->isMClass())
+ has64BitAtomicStore = false;
+ else if (Subtarget->isThumb())
+ has64BitAtomicStore = Subtarget->hasV7Ops();
+ else
+ has64BitAtomicStore = Subtarget->hasV6Ops();
+
unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
- return (Size == 64) && !Subtarget->isMClass();
+ return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
+ : AtomicExpansionKind::None;
}
// Loads and stores less than 64-bits are already atomic; ones above that
@@ -20946,9 +20986,17 @@ bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
// sections A8.8.72-74 LDRD)
TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+ bool has64BitAtomicLoad;
+ if (Subtarget->isMClass())
+ has64BitAtomicLoad = false;
+ else if (Subtarget->isThumb())
+ has64BitAtomicLoad = Subtarget->hasV7Ops();
+ else
+ has64BitAtomicLoad = Subtarget->hasV6Ops();
+
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
- return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
- : AtomicExpansionKind::None;
+ return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
+ : AtomicExpansionKind::None;
}
// For the real atomic operations, we have ldrex/strex up to 32 bits,
@@ -20958,19 +21006,25 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
if (AI->isFloatingPointOperation())
return AtomicExpansionKind::CmpXChg;
- // At -O0, fast-regalloc cannot cope with the live vregs necessary to
- // implement atomicrmw without spilling. If the target address is also on the
- // stack and close enough to the spill slot, this can lead to a situation
- // where the monitor always gets cleared and the atomic operation can never
- // succeed. So at -O0 lower this operation to a CAS loop.
- if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
- return AtomicExpansionKind::CmpXChg;
-
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
- return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
- ? AtomicExpansionKind::LLSC
- : AtomicExpansionKind::None;
+ bool hasAtomicRMW;
+ if (Subtarget->isMClass())
+ hasAtomicRMW = Subtarget->hasV8MBaselineOps();
+ else if (Subtarget->isThumb())
+ hasAtomicRMW = Subtarget->hasV7Ops();
+ else
+ hasAtomicRMW = Subtarget->hasV6Ops();
+ if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
+ // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+ // implement atomicrmw without spilling. If the target address is also on
+ // the stack and close enough to the spill slot, this can lead to a
+ // situation where the monitor always gets cleared and the atomic operation
+ // can never succeed. So at -O0 lower this operation to a CAS loop.
+ if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ return AtomicExpansionKind::CmpXChg;
+ return AtomicExpansionKind::LLSC;
+ }
+ return AtomicExpansionKind::None;
}
// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
@@ -20983,8 +21037,13 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
- bool HasAtomicCmpXchg =
- !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
+ bool HasAtomicCmpXchg;
+ if (Subtarget->isMClass())
+ HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
+ else if (Subtarget->isThumb())
+ HasAtomicCmpXchg = Subtarget->hasV7Ops();
+ else
+ HasAtomicCmpXchg = Subtarget->hasV6Ops();
if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
Size <= (Subtarget->isMClass() ? 32U : 64U))
return AtomicExpansionKind::LLSC;
@@ -21099,8 +21158,11 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
+ CallInst *CI = Builder.CreateCall(Ldrex, Addr);
- return Builder.CreateTruncOrBitCast(Builder.CreateCall(Ldrex, Addr), ValueTy);
+ CI->addParamAttr(
+ 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
+ return Builder.CreateTruncOrBitCast(CI, ValueTy);
}
void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
@@ -21138,10 +21200,13 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
Type *Tys[] = { Addr->getType() };
Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
- return Builder.CreateCall(
+ CallInst *CI = Builder.CreateCall(
Strex, {Builder.CreateZExtOrBitCast(
Val, Strex->getFunctionType()->getParamType(0)),
Addr});
+ CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
+ Val->getType()));
+ return CI;
}
@@ -21273,7 +21338,7 @@ bool ARMTargetLowering::lowerInterleavedLoad(
SmallVector<Value *, 2> Ops;
Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
- Ops.push_back(Builder.getInt32(LI->getAlignment()));
+ Ops.push_back(Builder.getInt32(LI->getAlign().value()));
return Builder.CreateCall(VldnFunc, Ops, "vldN");
} else {
@@ -21443,7 +21508,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
SmallVector<Value *, 6> Ops;
Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
append_range(Ops, Shuffles);
- Ops.push_back(Builder.getInt32(SI->getAlignment()));
+ Ops.push_back(Builder.getInt32(SI->getAlign().value()));
Builder.CreateCall(VstNFunc, Ops);
} else {
assert((Factor == 2 || Factor == 4) &&
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 1c5f8389f57c..10f60ab93ae3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -581,7 +581,7 @@ class VectorType;
getRegClassFor(MVT VT, bool isDivergent = false) const override;
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
- unsigned &PrefAlign) const override;
+ Align &PrefAlign) const override;
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
@@ -665,7 +665,8 @@ class VectorType;
bool shouldInsertFencesForAtomic(const Instruction *I) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
- bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
TargetLoweringBase::AtomicExpansionKind
@@ -713,8 +714,8 @@ class VectorType;
Align Alignment,
const DataLayout &DL) const;
- bool isMulAddWithConstProfitable(const SDValue &AddNode,
- const SDValue &ConstNode) const override;
+ bool isMulAddWithConstProfitable(SDValue AddNode,
+ SDValue ConstNode) const override;
bool alignLoopsWithOptSize() const override;
@@ -845,8 +846,7 @@ class VectorType;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSETCC(SDValue Op, SelectionDAG &DAG) const;
- void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const;
+ SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td
index ff5afd787c82..c9a2d21bec53 100644
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -1589,9 +1589,9 @@ class VFPXI<dag oops, dag iops, AddrMode am, int sz,
}
class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+ string opc, string asm, string cstr, list<dag> pattern>
: VFPI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
- opc, asm, "", pattern> {
+ opc, asm, cstr, pattern> {
let PostEncoderMethod = "VFPThumb2PostEncoder";
}
@@ -1751,8 +1751,8 @@ class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
// Double precision, unary
class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
- string asm, list<dag> pattern>
- : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ string asm, string cstr, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, cstr, pattern> {
// Instruction operands.
bits<5> Dd;
bits<5> Dm;
@@ -1804,7 +1804,7 @@ class ADuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
dag iops, InstrItinClass itin, string opc, string asm,
list<dag> pattern>
- : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, "", pattern> {
// Instruction operands.
bits<5> Dd;
bits<5> Dn;
@@ -1862,8 +1862,8 @@ class ADbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops,
// Single precision, unary, predicated
class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
- string asm, list<dag> pattern>
- : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ string asm, string cstr, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, cstr, pattern> {
// Instruction operands.
bits<5> Sd;
bits<5> Sm;
@@ -1916,14 +1916,14 @@ class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
string asm, list<dag> pattern>
: ASuI<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc, asm,
- pattern> {
+ "", pattern> {
list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
}
// Single precision, binary
class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
InstrItinClass itin, string opc, string asm, list<dag> pattern>
- : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, "", pattern> {
// Instruction operands.
bits<5> Sd;
bits<5> Sn;
@@ -2000,7 +2000,7 @@ class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
class AHuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
string asm, list<dag> pattern>
- : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, "", pattern> {
list<Predicate> Predicates = [HasFullFP16];
// Instruction operands.
@@ -2056,7 +2056,7 @@ class AHuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
// Half precision, binary
class AHbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
InstrItinClass itin, string opc, string asm, list<dag> pattern>
- : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, "", pattern> {
list<Predicate> Predicates = [HasFullFP16];
// Instruction operands.
@@ -2116,7 +2116,7 @@ class AHbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops,
class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
dag oops, dag iops, InstrItinClass itin, string opc, string asm,
list<dag> pattern>
- : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, pattern> {
+ : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, "", pattern> {
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{19-16} = opcod3;
@@ -2149,7 +2149,7 @@ class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f,
InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : VFPAI<oops, iops, f, itin, opc, asm, pattern> {
+ : VFPAI<oops, iops, f, itin, opc, asm, "", pattern> {
let Inst{27-20} = opcod1;
let Inst{11-8} = opcod2;
let Inst{4} = 1;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 32a3911d3369..88bb74d1fc54 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -5129,6 +5129,7 @@ let hasNoSchedulingInfo = 1 in
def TSB : AInoP<(outs), (ins tsb_opt:$opt), MiscFrm, NoItinerary,
"tsb", "\t$opt", []>, Requires<[IsARM, HasV8_4a]> {
let Inst{31-0} = 0xe320f012;
+ let DecoderMethod = "DecodeTSBInstruction";
}
}
@@ -6387,7 +6388,7 @@ def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
(RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>;
// Pre-v6, 'mov r0, r0' was used as a NOP encoding.
-def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>,
+def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg), 0>,
Requires<[IsARM, NoV6]>;
// MUL/UMLAL/SMLAL/UMULL/SMULL are available on all arches, but
@@ -6415,8 +6416,7 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
// 'it' blocks in ARM mode just validate the predicates. The IT itself
// is discarded.
-def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>,
- ComplexDeprecationPredicate<"IT">;
+def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>;
let mayLoad = 1, mayStore =1, hasSideEffects = 1, hasNoSchedulingInfo = 1 in
def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
@@ -6476,3 +6476,24 @@ def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary,
let AsmString = "@ COMPILER BARRIER";
let hasNoSchedulingInfo = 1;
}
+
+//===----------------------------------------------------------------------===//
+// Instructions used for emitting unwind opcodes on Windows.
+//===----------------------------------------------------------------------===//
+let isPseudo = 1 in {
+ def SEH_StackAlloc : PseudoInst<(outs), (ins i32imm:$size, i32imm:$wide), NoItinerary, []>, Sched<[]>;
+ def SEH_SaveRegs : PseudoInst<(outs), (ins i32imm:$mask, i32imm:$wide), NoItinerary, []>, Sched<[]>;
+ let isTerminator = 1 in
+ def SEH_SaveRegs_Ret : PseudoInst<(outs), (ins i32imm:$mask, i32imm:$wide), NoItinerary, []>, Sched<[]>;
+ def SEH_SaveSP : PseudoInst<(outs), (ins i32imm:$reg), NoItinerary, []>, Sched<[]>;
+ def SEH_SaveFRegs : PseudoInst<(outs), (ins i32imm:$first, i32imm:$last), NoItinerary, []>, Sched<[]>;
+ let isTerminator = 1 in
+ def SEH_SaveLR : PseudoInst<(outs), (ins i32imm:$offst), NoItinerary, []>, Sched<[]>;
+ def SEH_Nop : PseudoInst<(outs), (ins i32imm:$wide), NoItinerary, []>, Sched<[]>;
+ let isTerminator = 1 in
+ def SEH_Nop_Ret : PseudoInst<(outs), (ins i32imm:$wide), NoItinerary, []>, Sched<[]>;
+ def SEH_PrologEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+ def SEH_EpilogStart : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+ let isTerminator = 1 in
+ def SEH_EpilogEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+}
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 1ae0354ffc37..15c33014e988 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2192,36 +2192,29 @@ def subnsw : PatFrag<(ops node:$lhs, node:$rhs),
return N->getFlags().hasNoSignedWrap();
}]>;
-multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int> {
+multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI, SDNode Op,
+ SDNode unpred_op, Intrinsic PredInt> {
def "" : MVE_VRHADD_Base<VTI.Suffix, VTI.Unsigned, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>;
let Predicates = [HasMVEInt] in {
- // Unpredicated rounding add-with-divide-by-two
+ // Unpredicated rounding add-with-divide-by-two intrinsic
def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
(i32 VTI.Unsigned))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
-
- // Predicated add-with-divide-by-two
- def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
- (VTI.Vec MQPR:$inactive)))>;
}
}
-multiclass MVE_VRHADD<MVEVectorVTInfo VTI>
- : MVE_VRHADD_m<VTI, int_arm_mve_vrhadd, int_arm_mve_rhadd_predicated>;
+multiclass MVE_VRHADD<MVEVectorVTInfo VTI, SDNode rhadd>
+ : MVE_VRHADD_m<VTI, rhadd, int_arm_mve_vrhadd, int_arm_mve_rhadd_predicated>;
-defm MVE_VRHADDs8 : MVE_VRHADD<MVE_v16s8>;
-defm MVE_VRHADDs16 : MVE_VRHADD<MVE_v8s16>;
-defm MVE_VRHADDs32 : MVE_VRHADD<MVE_v4s32>;
-defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8>;
-defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16>;
-defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32>;
+defm MVE_VRHADDs8 : MVE_VRHADD<MVE_v16s8, avgceils>;
+defm MVE_VRHADDs16 : MVE_VRHADD<MVE_v8s16, avgceils>;
+defm MVE_VRHADDs32 : MVE_VRHADD<MVE_v4s32, avgceils>;
+defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8, avgceilu>;
+defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16, avgceilu>;
+defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32, avgceilu>;
// Rounding Halving Add perform the arithemtic operation with an extra bit of
// precision, before performing the shift, to void clipping errors. We're not
@@ -2303,11 +2296,12 @@ class MVE_VHSUB_<string suffix, bit U, bits<2> size,
list<dag> pattern=[]>
: MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
-multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int, PatFrag add_op,
+multiclass MVE_VHADD_m<MVEVectorVTInfo VTI, SDNode Op,
+ SDNode unpred_op, Intrinsic PredInt, PatFrag add_op,
SDNode shift_op> {
def "" : MVE_VHADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>;
let Predicates = [HasMVEInt] in {
// Unpredicated add-and-divide-by-two
@@ -2316,30 +2310,23 @@ multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (shift_op (add_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)), (i32 1))),
(Inst MQPR:$Qm, MQPR:$Qn)>;
-
- // Predicated add-and-divide-by-two
- def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned),
- (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
- (VTI.Vec MQPR:$inactive)))>;
}
}
-multiclass MVE_VHADD<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op>
- : MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, add_op,
+multiclass MVE_VHADD<MVEVectorVTInfo VTI, SDNode Op, PatFrag add_op, SDNode shift_op>
+ : MVE_VHADD_m<VTI, Op, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, add_op,
shift_op>;
// Halving add/sub perform the arithemtic operation with an extra bit of
// precision, before performing the shift, to void clipping errors. We're not
// modelling that here with these patterns, but we're using no wrap forms of
// add/sub to ensure that the extra bit of information is not needed.
-defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8, addnsw, ARMvshrsImm>;
-defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16, addnsw, ARMvshrsImm>;
-defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32, addnsw, ARMvshrsImm>;
-defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8, addnuw, ARMvshruImm>;
-defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16, addnuw, ARMvshruImm>;
-defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32, addnuw, ARMvshruImm>;
+defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8, avgfloors, addnsw, ARMvshrsImm>;
+defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16, avgfloors, addnsw, ARMvshrsImm>;
+defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32, avgfloors, addnsw, ARMvshrsImm>;
+defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8, avgflooru, addnuw, ARMvshruImm>;
+defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16, avgflooru, addnuw, ARMvshruImm>;
+defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32, avgflooru, addnuw, ARMvshruImm>;
multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
SDNode unpred_op, Intrinsic pred_int, PatFrag sub_op,
@@ -5372,10 +5359,10 @@ class MVE_VxADDSUB_qr<string iname, string suffix,
let validForTailPredication = 1;
}
-multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
- Intrinsic unpred_int, Intrinsic pred_int, PatFrag add_op,
- SDNode shift_op> {
+multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract, SDNode Op,
+ Intrinsic unpred_int, Intrinsic pred_int, PatFrag add_op, PatFrag shift_op> {
def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, subtract, VTI.Size>;
+ defm : MVE_TwoOpPatternDup<VTI, Op, pred_int, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>;
defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME),
VTI, unpred_int, pred_int, 1, 1>;
defvar Inst = !cast<Instruction>(NAME);
@@ -5386,20 +5373,20 @@ multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
}
}
-multiclass MVE_VHADD_qr_m<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op> :
- MVE_VHADDSUB_qr_m<"vhadd", VTI, 0b0, int_arm_mve_vhadd, int_arm_mve_hadd_predicated,
- add_op, shift_op>;
+multiclass MVE_VHADD_qr_m<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op, SDNode Op> :
+ MVE_VHADDSUB_qr_m<"vhadd", VTI, 0b0, Op, int_arm_mve_vhadd,
+ int_arm_mve_hadd_predicated, add_op, shift_op>;
multiclass MVE_VHSUB_qr_m<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op> :
- MVE_VHADDSUB_qr_m<"vhsub", VTI, 0b1, int_arm_mve_vhsub, int_arm_mve_hsub_predicated,
- add_op, shift_op>;
-
-defm MVE_VHADD_qr_s8 : MVE_VHADD_qr_m<MVE_v16s8, addnsw, ARMvshrsImm>;
-defm MVE_VHADD_qr_s16 : MVE_VHADD_qr_m<MVE_v8s16, addnsw, ARMvshrsImm>;
-defm MVE_VHADD_qr_s32 : MVE_VHADD_qr_m<MVE_v4s32, addnsw, ARMvshrsImm>;
-defm MVE_VHADD_qr_u8 : MVE_VHADD_qr_m<MVE_v16u8, addnuw, ARMvshruImm>;
-defm MVE_VHADD_qr_u16 : MVE_VHADD_qr_m<MVE_v8u16, addnuw, ARMvshruImm>;
-defm MVE_VHADD_qr_u32 : MVE_VHADD_qr_m<MVE_v4u32, addnuw, ARMvshruImm>;
+ MVE_VHADDSUB_qr_m<"vhsub", VTI, 0b1, null_frag, int_arm_mve_vhsub,
+ int_arm_mve_hsub_predicated, add_op, shift_op>;
+
+defm MVE_VHADD_qr_s8 : MVE_VHADD_qr_m<MVE_v16s8, addnsw, ARMvshrsImm, avgfloors>;
+defm MVE_VHADD_qr_s16 : MVE_VHADD_qr_m<MVE_v8s16, addnsw, ARMvshrsImm, avgfloors>;
+defm MVE_VHADD_qr_s32 : MVE_VHADD_qr_m<MVE_v4s32, addnsw, ARMvshrsImm, avgfloors>;
+defm MVE_VHADD_qr_u8 : MVE_VHADD_qr_m<MVE_v16u8, addnuw, ARMvshruImm, avgflooru>;
+defm MVE_VHADD_qr_u16 : MVE_VHADD_qr_m<MVE_v8u16, addnuw, ARMvshruImm, avgflooru>;
+defm MVE_VHADD_qr_u32 : MVE_VHADD_qr_m<MVE_v4u32, addnuw, ARMvshruImm, avgflooru>;
defm MVE_VHSUB_qr_s8 : MVE_VHSUB_qr_m<MVE_v16s8, subnsw, ARMvshrsImm>;
defm MVE_VHSUB_qr_s16 : MVE_VHSUB_qr_m<MVE_v8s16, subnsw, ARMvshrsImm>;
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 357aa6d062e9..cdad8e106de6 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -6946,6 +6946,9 @@ def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
v4f32, v4i16, int_arm_neon_vcvthf2fp>,
Requires<[HasNEON, HasFP16]>;
+def : Pat<(v4f16 (fpround (v4f32 QPR:$src))), (VCVTf2h QPR:$src)>;
+def : Pat<(v4f32 (fpextend (v4f16 DPR:$src))), (VCVTh2f DPR:$src)>;
+
// Vector Reverse.
// VREV64 : Vector Reverse elements within 64-bit doublewords
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index f80b9a5053f7..20d8a45aaf49 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -3561,6 +3561,7 @@ let hasNoSchedulingInfo = 1 in
def t2TSB : T2I<(outs), (ins tsb_opt:$opt), NoItinerary,
"tsb", "\t$opt", []>, Requires<[IsThumb, HasV8_4a]> {
let Inst{31-0} = 0xf3af8012;
+ let DecoderMethod = "DecodeTSBInstruction";
}
}
@@ -3950,6 +3951,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
// Tail calls. The MachO version of thumb tail calls uses a t2 branch, so
// it goes here.
+// Windows SEH unwinding also needs a strict t2 branch for tail calls.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// IOS version.
let Uses = [SP] in
@@ -3957,15 +3959,14 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
(ins thumb_br_target:$dst, pred:$p),
4, IIC_Br, [],
(t2B thumb_br_target:$dst, pred:$p)>,
- Requires<[IsThumb2, IsMachO]>, Sched<[WriteBr]>;
+ Requires<[IsThumb2]>, Sched<[WriteBr]>;
}
// IT block
let Defs = [ITSTATE] in
def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
AddrModeNone, 2, IIC_iALUx,
- "it$mask\t$cc", "", []>,
- ComplexDeprecationPredicate<"IT"> {
+ "it$mask\t$cc", "", []> {
// 16-bit instruction.
let Inst{31-16} = 0x0000;
let Inst{15-8} = 0b10111111;
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index dc5f1b92a6c2..b233555d5225 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -584,12 +584,12 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b),
let Defs = [FPSCR_NZCV] in {
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
- IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", "",
[(arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm))]>;
def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
- IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", "",
[(arm_cmpfpe SPR:$Sd, SPR:$Sm)]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -603,12 +603,12 @@ def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
- IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", "",
[(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
- IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", "",
[(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -627,7 +627,7 @@ def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0,
def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
- IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm",
+ IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm", "",
[(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>;
def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
@@ -647,7 +647,7 @@ def VABSH : AHuI<0b11101, 0b11, 0b0000, 0b11, 0,
let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
- IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", "",
[(arm_cmpfpe0 (f64 DPR:$Dd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -655,7 +655,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins SPR:$Sd),
- IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", "",
[(arm_cmpfpe0 SPR:$Sd)]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -675,7 +675,7 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins DPR:$Dd),
- IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", "",
[(arm_cmpfp0 (f64 DPR:$Dd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -683,7 +683,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins SPR:$Sd),
- IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", "",
[(arm_cmpfp0 SPR:$Sd)]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -704,7 +704,7 @@ def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
- IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
+ IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", "",
[(set DPR:$Dd, (fpextend SPR:$Sm))]>,
Sched<[WriteFPCVT]> {
// Instruction operands.
@@ -723,7 +723,7 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
// Special case encoding: bits 11-8 is 0b1011.
def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
- IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
+ IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", "",
[(set SPR:$Sd, (fpround DPR:$Dm))]>,
Sched<[WriteFPCVT]> {
// Instruction operands.
@@ -749,7 +749,7 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
// Between half, single and double-precision.
let hasSideEffects = 0 in
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
- /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
+ /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", "",
[/* Intentionally left blank, see patterns below */]>,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
@@ -760,26 +760,30 @@ def : FP16Pat<(f16_to_fp GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
let hasSideEffects = 0 in
-def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
- /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda, SPR:$Sm),
+ /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", "$Sd = $Sda",
[/* Intentionally left blank, see patterns below */]>,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
def : FP16Pat<(f16 (fpround SPR:$Sm)),
- (COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>;
+ (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>;
def : FP16Pat<(fp_to_f16 SPR:$a),
- (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+ (i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>;
def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane),
- (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTBSH SPR:$src2),
+ (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1),
+ (VCVTBSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)),
+ SPR:$src2),
(SSubReg_f16_reg imm:$lane)))>;
def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane),
- (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTBSH SPR:$src2),
+ (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1),
+ (VCVTBSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)),
+ SPR:$src2),
(SSubReg_f16_reg imm:$lane)))>;
let hasSideEffects = 0 in
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
- /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
+ /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", "",
[/* Intentionally left blank, see patterns below */]>,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
@@ -792,22 +796,26 @@ def : FP16Pat<(f32 (fpextend (extractelt (v4f16 DPR:$src), imm_odd:$lane))),
(SSubReg_f16_reg imm_odd:$lane)))>;
let hasSideEffects = 0 in
-def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
- /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sda, SPR:$Sm),
+ /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", "$Sd = $Sda",
[/* Intentionally left blank, see patterns below */]>,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane),
- (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTTSH SPR:$src2),
+ (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1),
+ (VCVTTSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)),
+ SPR:$src2),
(SSubReg_f16_reg imm:$lane)))>;
def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane),
- (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTTSH SPR:$src2),
+ (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1),
+ (VCVTTSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)),
+ SPR:$src2),
(SSubReg_f16_reg imm:$lane)))>;
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
- NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
+ NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", "",
[/* Intentionally left blank, see patterns below */]>,
Requires<[HasFPARMv8, HasDPVFP]>,
Sched<[WriteFPCVT]> {
@@ -829,8 +837,8 @@ def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
Requires<[HasFPARMv8, HasDPVFP]>;
def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
- (outs SPR:$Sd), (ins DPR:$Dm),
- NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm",
+ (outs SPR:$Sd), (ins SPR:$Sda, DPR:$Dm),
+ NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", "$Sd = $Sda",
[/* Intentionally left blank, see patterns below */]>,
Requires<[HasFPARMv8, HasDPVFP]> {
// Instruction operands.
@@ -847,15 +855,15 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
}
def : FullFP16Pat<(f16 (fpround DPR:$Dm)),
- (COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>,
+ (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>,
Requires<[HasFPARMv8, HasDPVFP]>;
def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
- (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>,
+ (i32 (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$a), GPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
- NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm",
+ NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", "",
[]>, Requires<[HasFPARMv8, HasDPVFP]> {
// Instruction operands.
bits<5> Sm;
@@ -868,8 +876,8 @@ def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
}
def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
- (outs SPR:$Sd), (ins DPR:$Dm),
- NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm",
+ (outs SPR:$Sd), (ins SPR:$Sda, DPR:$Dm),
+ NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", "$Sd = $Sda",
[]>, Requires<[HasFPARMv8, HasDPVFP]> {
// Instruction operands.
bits<5> Sd;
@@ -990,7 +998,7 @@ defm VCVTM : vcvt_inst<"m", 0b11, ffloor>;
def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
- IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+ IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", "",
[(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>;
def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
@@ -1019,7 +1027,7 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
- NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm",
+ NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", "",
[(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>,
Requires<[HasFPARMv8]> {
let Inst{7} = op2;
@@ -1027,7 +1035,7 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
}
def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
- NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm",
+ NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", "",
[(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>,
Requires<[HasFPARMv8, HasDPVFP]> {
let Inst{7} = op2;
@@ -1094,13 +1102,13 @@ defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
- IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
+ IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", "",
[(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>,
Sched<[WriteFPSQRT64]>;
def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
- IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
+ IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", "",
[(set SPR:$Sd, (fsqrt SPR:$Sm))]>,
Sched<[WriteFPSQRT32]>;
@@ -1113,12 +1121,12 @@ let hasSideEffects = 0 in {
let isMoveReg = 1 in {
def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
- IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>,
+ IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", "", []>,
Requires<[HasFPRegs64]>;
def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
- IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>,
+ IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", "", []>,
Requires<[HasFPRegs]>;
} // isMoveReg
@@ -1984,7 +1992,7 @@ def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
class BF16_VCVT<string opc, bits<2> op7_6>
: VFPAI<(outs SPR:$Sd), (ins SPR:$dst, SPR:$Sm),
VFPUnaryFrm, NoItinerary,
- opc, ".bf16.f32\t$Sd, $Sm", []>,
+ opc, ".bf16.f32\t$Sd, $Sm", "", []>,
RegConstraint<"$dst = $Sd">,
Requires<[HasBF16]>,
Sched<[]> {
@@ -2440,7 +2448,7 @@ def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p),
class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
list<dag> pattern>:
- VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, "", pattern> {
// Instruction operand.
bits<4> Rt;
@@ -2525,7 +2533,7 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
list<dag> pattern>:
- VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, "", pattern> {
// Instruction operand.
bits<4> Rt;
@@ -2598,7 +2606,7 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
let isReMaterializable = 1 in {
def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
VFPMiscFrm, IIC_fpUNA64,
- "vmov", ".f64\t$Dd, $imm",
+ "vmov", ".f64\t$Dd, $imm", "",
[(set DPR:$Dd, vfp_f64imm:$imm)]>,
Requires<[HasVFP3,HasDPVFP]> {
bits<5> Dd;
@@ -2617,7 +2625,7 @@ def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
VFPMiscFrm, IIC_fpUNA32,
- "vmov", ".f32\t$Sd, $imm",
+ "vmov", ".f32\t$Sd, $imm", "",
[(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
bits<5> Sd;
bits<8> imm;
@@ -2635,7 +2643,7 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm),
VFPMiscFrm, IIC_fpUNA16,
- "vmov", ".f16\t$Sd, $imm",
+ "vmov", ".f16\t$Sd, $imm", "",
[(set (f16 HPR:$Sd), vfp_f16imm:$imm)]>,
Requires<[HasFullFP16]> {
bits<5> Sd;
diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index 188b5562cac9..1c44893581f9 100644
--- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -624,12 +624,12 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
bool UseMovt = STI.useMovt();
- unsigned Size = TM.getPointerSize(0);
+ LLT PtrTy = MRI.getType(MIB->getOperand(0).getReg());
const Align Alignment(4);
- auto addOpsForConstantPoolLoad = [&MF, Alignment,
- Size](MachineInstrBuilder &MIB,
- const GlobalValue *GV, bool IsSBREL) {
+ auto addOpsForConstantPoolLoad = [&MF, Alignment, PtrTy](
+ MachineInstrBuilder &MIB,
+ const GlobalValue *GV, bool IsSBREL) {
assert((MIB->getOpcode() == ARM::LDRi12 ||
MIB->getOpcode() == ARM::t2LDRpci) &&
"Unsupported instruction");
@@ -644,7 +644,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
MIB.addConstantPoolIndex(CPIndex, /*Offset*/ 0, /*TargetFlags*/ 0)
.addMemOperand(MF.getMachineMemOperand(
MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
- Size, Alignment));
+ PtrTy, Alignment));
if (MIB->getOpcode() == ARM::LDRi12)
MIB.addImm(0);
MIB.add(predOps(ARMCC::AL));
@@ -733,7 +733,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
// Add the offset to the SB register.
MIB->setDesc(TII.get(Opcodes.ADDrr));
- MIB->RemoveOperand(1);
+ MIB->removeOperand(1);
MIB.addReg(ARM::R9) // FIXME: don't hardcode R9
.addReg(Offset)
.add(predOps(ARMCC::AL))
@@ -748,7 +748,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
} else {
// Load the global's address from the constant pool.
MIB->setDesc(TII.get(Opcodes.ConstPoolLoad));
- MIB->RemoveOperand(1);
+ MIB->removeOperand(1);
addOpsForConstantPoolLoad(MIB, GV, /*IsSBREL*/ false);
}
} else if (STI.isTargetMachO()) {
@@ -997,7 +997,7 @@ bool ARMInstructionSelector::select(MachineInstr &I) {
auto CPIndex =
ConstPool->getConstantPoolIndex(I.getOperand(1).getFPImm(), Alignment);
MIB->setDesc(TII.get(LoadOpcode));
- MIB->RemoveOperand(1);
+ MIB->removeOperand(1);
MIB.addConstantPoolIndex(CPIndex, /*Offset*/ 0, /*TargetFlags*/ 0)
.addMemOperand(
MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index de88ffab1c28..52b6b6f3bcf7 100644
--- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -14,6 +14,7 @@
#include "ARMCallLowering.h"
#include "ARMSubtarget.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index ef5fc12feb54..0a38f5633ae3 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -33,6 +34,7 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -2108,7 +2110,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
return false;
MF = &Fn;
- STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ STI = &Fn.getSubtarget<ARMSubtarget>();
TL = STI->getTargetLowering();
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = STI->getInstrInfo();
@@ -2199,7 +2201,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
return false;
TD = &Fn.getDataLayout();
- STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ STI = &Fn.getSubtarget<ARMSubtarget>();
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
MRI = &Fn.getRegInfo();
@@ -2894,10 +2896,12 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
<< Base.virtRegIndex() << "\n");
- // Make sure that Increment has no uses before BaseAccess.
+ // Make sure that Increment has no uses before BaseAccess that are not PHI
+ // uses.
for (MachineInstr &Use :
MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
- if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) {
+ if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI &&
+ !DT->dominates(BaseAccess, &Use))) {
LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
return false;
}
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index f822672c4477..aa739db44da2 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -59,8 +59,10 @@
#include "MVETailPredUtils.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineLoopUtils.h"
@@ -1297,7 +1299,7 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) {
}
bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
- const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(mf.getSubtarget());
+ const ARMSubtarget &ST = mf.getSubtarget<ARMSubtarget>();
if (!ST.hasLOB())
return false;
diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index 308d5e7889f2..9596e88deb18 100644
--- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -73,3 +73,10 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
std::tie(SignReturnAddress, SignReturnAddressAll) =
GetSignReturnAddress(MF.getFunction());
}
+
+MachineFunctionInfo *
+ARMFunctionInfo::clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *>
+ &Src2DstMBB) const {
+ return DestMF.cloneInfo<ARMFunctionInfo>(*this);
+}
diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index d8d937055d23..e906fea1a810 100644
--- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -86,6 +86,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
/// areas.
unsigned FPCXTSaveSize = 0;
+ unsigned FRSaveSize = 0;
unsigned GPRCS1Size = 0;
unsigned GPRCS2Size = 0;
unsigned DPRCSAlignGapSize = 0;
@@ -158,6 +159,11 @@ public:
explicit ARMFunctionInfo(MachineFunction &MF);
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
+
bool isThumbFunction() const { return isThumb; }
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
bool isThumb2Function() const { return isThumb && hasThumb2; }
@@ -198,12 +204,14 @@ public:
void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
unsigned getFPCXTSaveAreaSize() const { return FPCXTSaveSize; }
+ unsigned getFrameRecordSavedAreaSize() const { return FRSaveSize; }
unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; }
unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
void setFPCXTSaveAreaSize(unsigned s) { FPCXTSaveSize = s; }
+ void setFrameRecordSavedAreaSize(unsigned s) { FRSaveSize = s; }
void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; }
diff --git a/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/llvm/lib/Target/ARM/ARMParallelDSP.cpp
index 46baf8930939..6effd84041b5 100644
--- a/llvm/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/llvm/lib/Target/ARM/ARMParallelDSP.cpp
@@ -459,6 +459,10 @@ bool ARMParallelDSP::Search(Value *V, BasicBlock *BB, Reduction &R) {
if (ValidLHS && ValidRHS)
return true;
+ // Ensure we don't add the root as the incoming accumulator.
+ if (R.getRoot() == I)
+ return false;
+
return R.InsertAcc(I);
}
case Instruction::Mul: {
@@ -535,6 +539,7 @@ bool ARMParallelDSP::MatchSMLAD(Function &F) {
InsertParallelMACs(R);
Changed = true;
AllAdds.insert(R.getAdds().begin(), R.getAdds().end());
+ LLVM_DEBUG(dbgs() << "BB after inserting parallel MACs:\n" << BB);
}
}
diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 1a7f10a13ed3..527fefbd291e 100644
--- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -13,9 +13,9 @@
#include "ARMRegisterBankInfo.h"
#include "ARMInstrInfo.h" // For the register classes
#include "ARMSubtarget.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#define GET_TARGET_REGBANK_IMPL
@@ -129,8 +129,7 @@ static void checkValueMappings() {
} // end namespace arm
} // end namespace llvm
-ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
- : ARMGenRegisterBankInfo() {
+ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) {
// We have only one set of register banks, whatever the subtarget
// is. Therefore, the initialization of the RegBanks table should be
// done only once. Indeed the table of all register banks
diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.h b/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
index b8aff65a967e..c56134aab38c 100644
--- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
+++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
@@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMREGISTERBANKINFO_H
#define LLVM_LIB_TARGET_ARM_ARMREGISTERBANKINFO_H
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#define GET_REGBANK_DECLARATIONS
#include "ARMGenRegisterBank.inc"
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
index ff4647dd46fd..d1d30e614fc9 100644
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -15,4 +15,4 @@ using namespace llvm;
void ARMRegisterInfo::anchor() { }
-ARMRegisterInfo::ARMRegisterInfo() {}
+ARMRegisterInfo::ARMRegisterInfo() = default;
diff --git a/llvm/lib/Target/ARM/ARMSLSHardening.cpp b/llvm/lib/Target/ARM/ARMSLSHardening.cpp
index 332acb453124..fa80b75484e1 100644
--- a/llvm/lib/Target/ARM/ARMSLSHardening.cpp
+++ b/llvm/lib/Target/ARM/ARMSLSHardening.cpp
@@ -322,8 +322,8 @@ MachineBasicBlock &ARMSLSHardening::ConvertIndirectCallToIndirectJump(
assert(ImpSPOpIdx != -1);
int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx);
int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx);
- BL->RemoveOperand(FirstOpIdxToRemove);
- BL->RemoveOperand(SecondOpIdxToRemove);
+ BL->removeOperand(FirstOpIdxToRemove);
+ BL->removeOperand(SecondOpIdxToRemove);
// Now copy over the implicit operands from the original IndirectCall
BL->copyImplicitOps(MF, IndirectCall);
MF.moveCallSiteInfo(&IndirectCall, BL);
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 12d4ad889897..379521752261 100644
--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -296,7 +296,7 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, Align Alignment, bool isVolatile,
+ SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo) const {
const ARMSubtarget &Subtarget =
@@ -314,6 +314,9 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
DAG.getZExtOrTrunc(Size, dl, MVT::i32));
}
- return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
- Alignment.value(), RTLIB::MEMSET);
+ if (!AlwaysInline)
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
+ Alignment.value(), RTLIB::MEMSET);
+
+ return SDValue();
}
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
index 7aa831c09248..ffa8b5049351 100644
--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -55,6 +55,7 @@ public:
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Op1, SDValue Op2,
SDValue Op3, Align Alignment, bool isVolatile,
+ bool AlwaysInline,
MachinePointerInfo DstPtrInfo) const override;
SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl,
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 32160b109343..79244f634ce3 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -27,6 +27,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -52,19 +53,15 @@ UseFusedMulOps("arm-use-mulops",
enum ITMode {
DefaultIT,
- RestrictedIT,
- NoRestrictedIT
+ RestrictedIT
};
static cl::opt<ITMode>
-IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
- cl::ZeroOrMore,
- cl::values(clEnumValN(DefaultIT, "arm-default-it",
- "Generate IT block based on arch"),
- clEnumValN(RestrictedIT, "arm-restrict-it",
- "Disallow deprecated IT based on ARMv8"),
- clEnumValN(NoRestrictedIT, "arm-no-restrict-it",
- "Allow IT blocks based on ARMv7")));
+ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
+ cl::values(clEnumValN(DefaultIT, "arm-default-it",
+ "Generate any type of IT block"),
+ clEnumValN(RestrictedIT, "arm-restrict-it",
+ "Disallow complex IT blocks")));
/// ForceFastISel - Use the fast-isel, even for subtargets where it is not
/// currently supported (for testing only).
@@ -237,21 +234,18 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
switch (IT) {
case DefaultIT:
- RestrictIT = hasV8Ops() && !hasMinSize();
+ RestrictIT = false;
break;
case RestrictedIT:
RestrictIT = true;
break;
- case NoRestrictedIT:
- RestrictIT = false;
- break;
}
// NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
const FeatureBitset &Bits = getFeatureBits();
if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters
(Options.UnsafeFPMath || isTargetDarwin()))
- UseNEONForSinglePrecisionFP = true;
+ HasNEONForFP = true;
if (isRWPI())
ReserveR9 = true;
@@ -399,6 +393,14 @@ bool ARMSubtarget::enableSubRegLiveness() const {
return hasMVEIntegerOps();
}
+bool ARMSubtarget::enableMachinePipeliner() const {
+ // Enable the MachinePipeliner before register allocation for subtargets
+ // with the use-mipipeliner feature.
+ return getSchedModel().hasInstrSchedModel() && useMachinePipeliner();
+}
+
+bool ARMSubtarget::useDFAforSMS() const { return false; }
+
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
bool ARMSubtarget::enablePostRAScheduler() const {
if (enableMachineScheduler())
@@ -417,8 +419,6 @@ bool ARMSubtarget::enablePostRAMachineScheduler() const {
return !isThumb1Only();
}
-bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }
-
bool ARMSubtarget::useStride4VFPs() const {
// For general targets, the prologue can grow when VFPs are allocated with
// stride 4 (more vpush instructions). But WatchOS uses a compact unwind
@@ -491,3 +491,12 @@ bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF,
return isThumb2() && MF.getFunction().hasMinSize() &&
ARM::GPRRegClass.contains(PhysReg);
}
+
+bool ARMSubtarget::splitFramePointerPush(const MachineFunction &MF) const {
+ const Function &F = MF.getFunction();
+ if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() ||
+ !F.needsUnwindTableEntry())
+ return false;
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ return MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF);
+}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 7cbdc014299f..460ec62d5a33 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -25,8 +25,8 @@
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCSchedule.h"
@@ -150,6 +150,11 @@ public:
};
protected:
+// Bool members corresponding to the SubtargetFeatures defined in tablegen
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool ATTRIBUTE = DEFAULT;
+#include "ARMGenSubtargetInfo.inc"
+
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
ARMProcFamilyEnum ARMProcFamily = Others;
@@ -159,343 +164,22 @@ protected:
/// ARMArch - ARM architecture
ARMArchEnum ARMArch = ARMv4t;
- /// HasV4TOps, HasV5TOps, HasV5TEOps,
- /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
- /// Specify whether target support specific ARM ISA variants.
- bool HasV4TOps = false;
- bool HasV5TOps = false;
- bool HasV5TEOps = false;
- bool HasV6Ops = false;
- bool HasV6MOps = false;
- bool HasV6KOps = false;
- bool HasV6T2Ops = false;
- bool HasV7Ops = false;
- bool HasV8Ops = false;
- bool HasV8_1aOps = false;
- bool HasV8_2aOps = false;
- bool HasV8_3aOps = false;
- bool HasV8_4aOps = false;
- bool HasV8_5aOps = false;
- bool HasV8_6aOps = false;
- bool HasV8_8aOps = false;
- bool HasV8_7aOps = false;
- bool HasV9_0aOps = false;
- bool HasV9_1aOps = false;
- bool HasV9_2aOps = false;
- bool HasV9_3aOps = false;
- bool HasV8MBaselineOps = false;
- bool HasV8MMainlineOps = false;
- bool HasV8_1MMainlineOps = false;
- bool HasMVEIntegerOps = false;
- bool HasMVEFloatOps = false;
- bool HasCDEOps = false;
-
- /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
- /// floating point ISAs are supported.
- bool HasVFPv2 = false;
- bool HasVFPv3 = false;
- bool HasVFPv4 = false;
- bool HasFPARMv8 = false;
- bool HasNEON = false;
- bool HasFPRegs = false;
- bool HasFPRegs16 = false;
- bool HasFPRegs64 = false;
-
- /// Versions of the VFP flags restricted to single precision, or to
- /// 16 d-registers, or both.
- bool HasVFPv2SP = false;
- bool HasVFPv3SP = false;
- bool HasVFPv4SP = false;
- bool HasFPARMv8SP = false;
- bool HasVFPv3D16 = false;
- bool HasVFPv4D16 = false;
- bool HasFPARMv8D16 = false;
- bool HasVFPv3D16SP = false;
- bool HasVFPv4D16SP = false;
- bool HasFPARMv8D16SP = false;
-
- /// HasDotProd - True if the ARMv8.2A dot product instructions are supported.
- bool HasDotProd = false;
-
- /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
- /// specified. Use the method useNEONForSinglePrecisionFP() to
- /// determine if NEON should actually be used.
- bool UseNEONForSinglePrecisionFP = false;
-
/// UseMulOps - True if non-microcoded fused integer multiply-add and
/// multiply-subtract instructions should be used.
bool UseMulOps = false;
- /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
- /// whether the FP VML[AS] instructions are slow (if so, don't use them).
- bool SlowFPVMLx = false;
-
- /// SlowFPVFMx - If the VFP4 / NEON instructions are available, indicates
- /// whether the FP VFM[AS] instructions are slow (if so, don't use them).
- bool SlowFPVFMx = false;
-
- /// HasVMLxForwarding - If true, NEON has special multiplier accumulator
- /// forwarding to allow mul + mla being issued back to back.
- bool HasVMLxForwarding = false;
-
- /// SlowFPBrcc - True if floating point compare + branch is slow.
- bool SlowFPBrcc = false;
-
- /// InThumbMode - True if compiling for Thumb, false for ARM.
- bool InThumbMode = false;
-
- /// UseSoftFloat - True if we're using software floating point features.
- bool UseSoftFloat = false;
-
- /// UseMISched - True if MachineScheduler should be used for this subtarget.
- bool UseMISched = false;
-
- /// DisablePostRAScheduler - False if scheduling should happen again after
- /// register allocation.
- bool DisablePostRAScheduler = false;
-
- /// HasThumb2 - True if Thumb2 instructions are supported.
- bool HasThumb2 = false;
-
- /// NoARM - True if subtarget does not support ARM mode execution.
- bool NoARM = false;
-
- /// ReserveR9 - True if R9 is not available as a general purpose register.
- bool ReserveR9 = false;
-
- /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of
- /// 32-bit imms (including global addresses).
- bool NoMovt = false;
-
/// SupportsTailCall - True if the OS supports tail call. The dynamic linker
/// must be able to synthesize call stubs for interworking between ARM and
/// Thumb.
bool SupportsTailCall = false;
- /// HasFP16 - True if subtarget supports half-precision FP conversions
- bool HasFP16 = false;
-
- /// HasFullFP16 - True if subtarget supports half-precision FP operations
- bool HasFullFP16 = false;
-
- /// HasFP16FML - True if subtarget supports half-precision FP fml operations
- bool HasFP16FML = false;
-
- /// HasBF16 - True if subtarget supports BFloat16 floating point operations
- bool HasBF16 = false;
-
- /// HasMatMulInt8 - True if subtarget supports 8-bit integer matrix multiply
- bool HasMatMulInt8 = false;
-
- /// HasD32 - True if subtarget has the full 32 double precision
- /// FP registers for VFPv3.
- bool HasD32 = false;
-
- /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode
- bool HasHardwareDivideInThumb = false;
-
- /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
- bool HasHardwareDivideInARM = false;
-
- /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
- /// instructions.
- bool HasDataBarrier = false;
-
- /// HasFullDataBarrier - True if the subtarget supports DFB data barrier
- /// instruction.
- bool HasFullDataBarrier = false;
-
- /// HasV7Clrex - True if the subtarget supports CLREX instructions
- bool HasV7Clrex = false;
-
- /// HasAcquireRelease - True if the subtarget supports v8 atomics (LDA/LDAEX etc)
- /// instructions
- bool HasAcquireRelease = false;
-
- /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
- /// over 16-bit ones.
- bool Pref32BitThumb = false;
-
- /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions
- /// that partially update CPSR and add false dependency on the previous
- /// CPSR setting instruction.
- bool AvoidCPSRPartialUpdate = false;
-
- /// CheapPredicableCPSRDef - If true, disable +1 predication cost
- /// for instructions updating CPSR. Enabled for Cortex-A57.
- bool CheapPredicableCPSRDef = false;
-
- /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting
- /// movs with shifter operand (i.e. asr, lsl, lsr).
- bool AvoidMOVsShifterOperand = false;
-
- /// HasRetAddrStack - Some processors perform return stack prediction. CodeGen should
- /// avoid issue "normal" call instructions to callees which do not return.
- bool HasRetAddrStack = false;
-
- /// HasBranchPredictor - True if the subtarget has a branch predictor. Having
- /// a branch predictor or not changes the expected cost of taking a branch
- /// which affects the choice of whether to use predicated instructions.
- bool HasBranchPredictor = true;
-
- /// HasMPExtension - True if the subtarget supports Multiprocessing
- /// extension (ARMv7 only).
- bool HasMPExtension = false;
-
- /// HasVirtualization - True if the subtarget supports the Virtualization
- /// extension.
- bool HasVirtualization = false;
-
- /// HasFP64 - If true, the floating point unit supports double
- /// precision.
- bool HasFP64 = false;
-
- /// If true, the processor supports the Performance Monitor Extensions. These
- /// include a generic cycle-counter as well as more fine-grained (often
- /// implementation-specific) events.
- bool HasPerfMon = false;
-
- /// HasTrustZone - if true, processor supports TrustZone security extensions
- bool HasTrustZone = false;
-
- /// Has8MSecExt - if true, processor supports ARMv8-M Security Extensions
- bool Has8MSecExt = false;
-
- /// HasSHA2 - if true, processor supports SHA1 and SHA256
- bool HasSHA2 = false;
-
- /// HasAES - if true, processor supports AES
- bool HasAES = false;
-
- /// HasCrypto - if true, processor supports Cryptography extensions
- bool HasCrypto = false;
-
- /// HasCRC - if true, processor supports CRC instructions
- bool HasCRC = false;
-
- /// HasRAS - if true, the processor supports RAS extensions
- bool HasRAS = false;
-
- /// HasLOB - if true, the processor supports the Low Overhead Branch extension
- bool HasLOB = false;
-
- bool HasPACBTI = false;
-
- /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
- /// particularly effective at zeroing a VFP register.
- bool HasZeroCycleZeroing = false;
-
- /// HasFPAO - if true, processor does positive address offset computation faster
- bool HasFPAO = false;
-
- /// HasFuseAES - if true, processor executes back to back AES instruction
- /// pairs faster.
- bool HasFuseAES = false;
-
- /// HasFuseLiterals - if true, processor executes back to back
- /// bottom and top halves of literal generation faster.
- bool HasFuseLiterals = false;
-
- /// If true, if conversion may decide to leave some instructions unpredicated.
- bool IsProfitableToUnpredicate = false;
-
- /// If true, VMOV will be favored over VGETLNi32.
- bool HasSlowVGETLNi32 = false;
-
- /// If true, VMOV will be favored over VDUP.
- bool HasSlowVDUP32 = false;
-
- /// If true, VMOVSR will be favored over VMOVDRR.
- bool PreferVMOVSR = false;
-
- /// If true, ISHST barriers will be used for Release semantics.
- bool PreferISHST = false;
-
- /// If true, a VLDM/VSTM starting with an odd register number is considered to
- /// take more microops than single VLDRS/VSTRS.
- bool SlowOddRegister = false;
-
- /// If true, loading into a D subregister will be penalized.
- bool SlowLoadDSubregister = false;
-
- /// If true, use a wider stride when allocating VFP registers.
- bool UseWideStrideVFP = false;
-
- /// If true, the AGU and NEON/FPU units are multiplexed.
- bool HasMuxedUnits = false;
-
- /// If true, VMOVS will never be widened to VMOVD.
- bool DontWidenVMOVS = false;
-
- /// If true, splat a register between VFP and NEON instructions.
- bool SplatVFPToNeon = false;
-
- /// If true, run the MLx expansion pass.
- bool ExpandMLx = false;
-
- /// If true, VFP/NEON VMLA/VMLS have special RAW hazards.
- bool HasVMLxHazards = false;
-
- // If true, read thread pointer from coprocessor register.
- bool ReadTPHard = false;
-
- /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON.
- bool UseNEONForFPMovs = false;
-
- /// If true, VLDn instructions take an extra cycle for unaligned accesses.
- bool CheckVLDnAlign = false;
-
- /// If true, VFP instructions are not pipelined.
- bool NonpipelinedVFP = false;
-
- /// StrictAlign - If true, the subtarget disallows unaligned memory
- /// accesses for some types. For details, see
- /// ARMTargetLowering::allowsMisalignedMemoryAccesses().
- bool StrictAlign = false;
-
- /// RestrictIT - If true, the subtarget disallows generation of deprecated IT
- /// blocks to conform to ARMv8 rule.
+ /// RestrictIT - If true, the subtarget disallows generation of complex IT
+ /// blocks.
bool RestrictIT = false;
- /// HasDSP - If true, the subtarget supports the DSP (saturating arith
- /// and such) instructions.
- bool HasDSP = false;
-
- /// NaCl TRAP instruction is generated instead of the regular TRAP.
- bool UseNaClTrap = false;
-
- /// Generate calls via indirect call instructions.
- bool GenLongCalls = false;
-
- /// Generate code that does not contain data access to code sections.
- bool GenExecuteOnly = false;
-
- /// Target machine allowed unsafe FP math (such as use of NEON fp)
- bool UnsafeFPMath = false;
-
/// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
bool UseSjLjEH = false;
- /// Has speculation barrier
- bool HasSB = false;
-
- /// Implicitly convert an instruction to a different one if its immediates
- /// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
- bool NegativeImmediates = true;
-
- /// Mitigate against the cve-2021-35465 security vulnurability.
- bool FixCMSE_CVE_2021_35465 = false;
-
- /// Harden against Straight Line Speculation for Returns and Indirect
- /// Branches.
- bool HardenSlsRetBr = false;
-
- /// Harden against Straight Line Speculation for indirect calls.
- bool HardenSlsBlr = false;
-
- /// Generate thunk code for SLS mitigation in the normal text section.
- bool HardenSlsNoComdat = false;
-
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
Align stackAlignment = Align(4);
@@ -540,10 +224,6 @@ protected:
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
- /// NoBTIAtReturnTwice - Don't place a BTI instruction after
- /// return-twice constructs (setjmp)
- bool NoBTIAtReturnTwice = false;
-
/// Options passed via command line that could influence the target
const TargetOptions &Options;
@@ -622,38 +302,12 @@ private:
std::bitset<8> CoprocCDE = {};
public:
- void computeIssueWidth();
+// Getters for SubtargetFeatures defined in tablegen
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool GETTER() const { return ATTRIBUTE; }
+#include "ARMGenSubtargetInfo.inc"
- bool hasV4TOps() const { return HasV4TOps; }
- bool hasV5TOps() const { return HasV5TOps; }
- bool hasV5TEOps() const { return HasV5TEOps; }
- bool hasV6Ops() const { return HasV6Ops; }
- bool hasV6MOps() const { return HasV6MOps; }
- bool hasV6KOps() const { return HasV6KOps; }
- bool hasV6T2Ops() const { return HasV6T2Ops; }
- bool hasV7Ops() const { return HasV7Ops; }
- bool hasV8Ops() const { return HasV8Ops; }
- bool hasV8_1aOps() const { return HasV8_1aOps; }
- bool hasV8_2aOps() const { return HasV8_2aOps; }
- bool hasV8_3aOps() const { return HasV8_3aOps; }
- bool hasV8_4aOps() const { return HasV8_4aOps; }
- bool hasV8_5aOps() const { return HasV8_5aOps; }
- bool hasV8_6aOps() const { return HasV8_6aOps; }
- bool hasV8_7aOps() const { return HasV8_7aOps; }
- bool hasV8_8aOps() const { return HasV8_8aOps; }
- bool hasV9_0aOps() const { return HasV9_0aOps; }
- bool hasV9_1aOps() const { return HasV9_1aOps; }
- bool hasV9_2aOps() const { return HasV9_2aOps; }
- bool hasV9_3aOps() const { return HasV9_3aOps; }
- bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
- bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
- bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }
- bool hasMVEIntegerOps() const { return HasMVEIntegerOps; }
- bool hasMVEFloatOps() const { return HasMVEFloatOps; }
- bool hasCDEOps() const { return HasCDEOps; }
- bool hasFPRegs() const { return HasFPRegs; }
- bool hasFPRegs16() const { return HasFPRegs16; }
- bool hasFPRegs64() const { return HasFPRegs64; }
+ void computeIssueWidth();
/// @{
/// These functions are obsolete, please consider adding subtarget features
@@ -673,31 +327,14 @@ public:
bool hasARMOps() const { return !NoARM; }
- bool hasVFP2Base() const { return HasVFPv2SP; }
- bool hasVFP3Base() const { return HasVFPv3D16SP; }
- bool hasVFP4Base() const { return HasVFPv4D16SP; }
- bool hasFPARMv8Base() const { return HasFPARMv8D16SP; }
- bool hasNEON() const { return HasNEON; }
- bool hasSHA2() const { return HasSHA2; }
- bool hasAES() const { return HasAES; }
- bool hasCrypto() const { return HasCrypto; }
- bool hasDotProd() const { return HasDotProd; }
- bool hasCRC() const { return HasCRC; }
- bool hasRAS() const { return HasRAS; }
- bool hasLOB() const { return HasLOB; }
- bool hasPACBTI() const { return HasPACBTI; }
- bool hasVirtualization() const { return HasVirtualization; }
-
bool useNEONForSinglePrecisionFP() const {
- return hasNEON() && UseNEONForSinglePrecisionFP;
+ return hasNEON() && hasNEONForFP();
}
- bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; }
- bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
- bool hasDataBarrier() const { return HasDataBarrier; }
- bool hasFullDataBarrier() const { return HasFullDataBarrier; }
- bool hasV7Clrex() const { return HasV7Clrex; }
- bool hasAcquireRelease() const { return HasAcquireRelease; }
+ bool hasVFP2Base() const { return hasVFPv2SP(); }
+ bool hasVFP3Base() const { return hasVFPv3D16SP(); }
+ bool hasVFP4Base() const { return hasVFPv4D16SP(); }
+ bool hasFPARMv8Base() const { return hasFPARMv8D16SP(); }
bool hasAnyDataBarrier() const {
return HasDataBarrier || (hasV6Ops() && !isThumb());
@@ -710,43 +347,7 @@ public:
}
bool useFPVFMx16() const { return useFPVFMx() && hasFullFP16(); }
bool useFPVFMx64() const { return useFPVFMx() && hasFP64(); }
- bool hasVMLxForwarding() const { return HasVMLxForwarding; }
- bool isFPBrccSlow() const { return SlowFPBrcc; }
- bool hasFP64() const { return HasFP64; }
- bool hasPerfMon() const { return HasPerfMon; }
- bool hasTrustZone() const { return HasTrustZone; }
- bool has8MSecExt() const { return Has8MSecExt; }
- bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
- bool hasFPAO() const { return HasFPAO; }
- bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; }
- bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; }
- bool hasSlowVDUP32() const { return HasSlowVDUP32; }
- bool preferVMOVSR() const { return PreferVMOVSR; }
- bool preferISHSTBarriers() const { return PreferISHST; }
- bool expandMLx() const { return ExpandMLx; }
- bool hasVMLxHazards() const { return HasVMLxHazards; }
- bool hasSlowOddRegister() const { return SlowOddRegister; }
- bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }
- bool useWideStrideVFP() const { return UseWideStrideVFP; }
- bool hasMuxedUnits() const { return HasMuxedUnits; }
- bool dontWidenVMOVS() const { return DontWidenVMOVS; }
- bool useSplatVFPToNeon() const { return SplatVFPToNeon; }
- bool useNEONForFPMovs() const { return UseNEONForFPMovs; }
- bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; }
- bool nonpipelinedVFP() const { return NonpipelinedVFP; }
- bool prefers32BitThumb() const { return Pref32BitThumb; }
- bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
- bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; }
- bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
- bool hasRetAddrStack() const { return HasRetAddrStack; }
- bool hasBranchPredictor() const { return HasBranchPredictor; }
- bool hasMPExtension() const { return HasMPExtension; }
- bool hasDSP() const { return HasDSP; }
- bool useNaClTrap() const { return UseNaClTrap; }
bool useSjLjEH() const { return UseSjLjEH; }
- bool hasSB() const { return HasSB; }
- bool genLongCalls() const { return GenLongCalls; }
- bool genExecuteOnly() const { return GenExecuteOnly; }
bool hasBaseDSP() const {
if (isThumb())
return hasDSP();
@@ -754,25 +355,16 @@ public:
return hasV5TEOps();
}
- bool hasFP16() const { return HasFP16; }
- bool hasD32() const { return HasD32; }
- bool hasFullFP16() const { return HasFullFP16; }
- bool hasFP16FML() const { return HasFP16FML; }
- bool hasBF16() const { return HasBF16; }
-
- bool hasFuseAES() const { return HasFuseAES; }
- bool hasFuseLiterals() const { return HasFuseLiterals; }
/// Return true if the CPU supports any kind of instruction fusion.
bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); }
- bool hasMatMulInt8() const { return HasMatMulInt8; }
-
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetIOS() const { return TargetTriple.isiOS(); }
bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); }
+ bool isTargetDriverKit() const { return TargetTriple.isDriverKit(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
@@ -825,24 +417,21 @@ public:
bool isRWPI() const;
bool useMachineScheduler() const { return UseMISched; }
- bool disablePostRAScheduler() const { return DisablePostRAScheduler; }
- bool useSoftFloat() const { return UseSoftFloat; }
- bool isThumb() const { return InThumbMode; }
+ bool useMachinePipeliner() const { return UseMIPipeliner; }
bool hasMinSize() const { return OptMinSize; }
- bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
- bool isThumb2() const { return InThumbMode && HasThumb2; }
- bool hasThumb2() const { return HasThumb2; }
+ bool isThumb1Only() const { return isThumb() && !hasThumb2(); }
+ bool isThumb2() const { return isThumb() && hasThumb2(); }
bool isMClass() const { return ARMProcClass == MClass; }
bool isRClass() const { return ARMProcClass == RClass; }
bool isAClass() const { return ARMProcClass == AClass; }
- bool isReadTPHard() const { return ReadTPHard; }
bool isR9Reserved() const {
return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9;
}
MCPhysReg getFramePointerReg() const {
- if (isTargetDarwin() || (!isTargetWindows() && isThumb()))
+ if (isTargetDarwin() ||
+ (!isTargetWindows() && isThumb() && !createAAPCSFrameChain()))
return ARM::R7;
return ARM::R11;
}
@@ -859,6 +448,8 @@ public:
isThumb1Only();
}
+ bool splitFramePointerPush(const MachineFunction &MF) const;
+
bool useStride4VFPs() const;
bool useMovt() const;
@@ -878,6 +469,10 @@ public:
/// Returns true if machine scheduler should be enabled.
bool enableMachineScheduler() const override;
+ /// Returns true if machine pipeliner should be enabled.
+ bool enableMachinePipeliner() const override;
+ bool useDFAforSMS() const override;
+
/// True for some subtargets at > -O0.
bool enablePostRAScheduler() const override;
@@ -891,9 +486,6 @@ public:
/// scheduling, DAGCombine, etc.).
bool useAA() const override { return true; }
- // enableAtomicExpand- True if we need to expand our atomics.
- bool enableAtomicExpand() const override;
-
/// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
const InstrItineraryData *getInstrItineraryData() const override {
@@ -956,14 +548,6 @@ public:
bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
unsigned PhysReg) const override;
unsigned getGPRAllocationOrder(const MachineFunction &MF) const;
-
- bool fixCMSE_CVE_2021_35465() const { return FixCMSE_CVE_2021_35465; }
-
- bool hardenSlsRetBr() const { return HardenSlsRetBr; }
- bool hardenSlsBlr() const { return HardenSlsBlr; }
- bool hardenSlsNoComdat() const { return HardenSlsNoComdat; }
-
- bool getNoBTIAtReturnTwice() const { return NoBTIAtReturnTwice; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index c38970f8e341..d95c21d6504b 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/ExecutionDomainFix.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
@@ -30,20 +31,20 @@
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
+#include "llvm/Support/ARMTargetParser.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ARMTargetParser.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
@@ -106,6 +107,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
initializeMVEGatherScatterLoweringPass(Registry);
initializeARMSLSHardeningPass(Registry);
initializeMVELaneInterleavingPass(Registry);
+ initializeARMFixCortexA57AES1742098Pass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -194,7 +196,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
+ if (!RM)
// Default relocation model on Darwin is PIC.
return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static;
@@ -307,7 +309,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
}
TargetTransformInfo
-ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) {
+ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(ARMTTIImpl(this, F));
}
@@ -434,6 +436,9 @@ void ARMPassConfig::addIRPasses() {
// Add Control Flow Guard checks.
if (TM->getTargetTriple().isOSWindows())
addPass(createCFGuardCheckPass());
+
+ if (TM->Options.JMCInstrument)
+ addPass(createJMCInstrumenterPass());
}
void ARMPassConfig::addCodeGenPrepare() {
@@ -505,6 +510,9 @@ bool ARMPassConfig::addGlobalInstructionSelect() {
void ARMPassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() == CodeGenOpt::Aggressive)
+ addPass(&MachinePipelinerID);
+
addPass(createMVETPAndVPTOptimisationsPass());
addPass(createMLxExpansionPass());
@@ -573,8 +581,20 @@ void ARMPassConfig::addPreEmitPass() {
}
void ARMPassConfig::addPreEmitPass2() {
+ // Inserts fixup instructions before unsafe AES operations. Instructions may
+ // be inserted at the start of blocks and at within blocks so this pass has to
+ // come before those below.
+ addPass(createARMFixCortexA57AES1742098Pass());
+ // Inserts BTIs at the start of functions and indirectly-called basic blocks,
+ // so passes cannot add to the start of basic blocks once this has run.
addPass(createARMBranchTargetsPass());
+ // Inserts Constant Islands. Block sizes cannot be increased after this point,
+ // as this may push the branch ranges and load offsets of accessing constant
+ // pools out of range..
addPass(createARMConstantIslandPass());
+ // Finalises Low-Overhead Loops. This replaces pseudo instructions with real
+ // instructions, but the pseudos all have conservative sizes so that block
+ // sizes will only be decreased by this pass.
addPass(createARMLowOverheadLoopsPass());
if (TM->getTargetTriple().isOSWindows()) {
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.h b/llvm/lib/Target/ARM/ARMTargetMachine.h
index 8428092bf179..8d33a038deeb 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -52,7 +52,7 @@ public:
const ARMSubtarget *getSubtargetImpl() const = delete;
bool isLittleEndian() const { return isLittle; }
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index d9d563ead260..3a9946ee810b 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1202,7 +1202,8 @@ InstructionCost ARMTTIImpl::getMemcpyCost(const Instruction *I) {
InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp, ArrayRef<int> Mask,
- int Index, VectorType *SubTp) {
+ int Index, VectorType *SubTp,
+ ArrayRef<const Value *> Args) {
Kind = improveShuffleKindFromMask(Kind, Mask);
if (ST->hasNEON()) {
if (Kind == TTI::SK_Broadcast) {
@@ -1290,7 +1291,8 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (!Mask.empty()) {
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- if (Mask.size() <= LT.second.getVectorNumElements() &&
+ if (LT.second.isVector() &&
+ Mask.size() <= LT.second.getVectorNumElements() &&
(isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) ||
isVREVMask(Mask, LT.second, 64)))
return ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput) * LT.first;
@@ -1764,6 +1766,48 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return LT.first * ST->getMVEVectorCostFactor(CostKind);
break;
}
+ case Intrinsic::fptosi_sat:
+ case Intrinsic::fptoui_sat: {
+ if (ICA.getArgTypes().empty())
+ break;
+ bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
+ auto LT = TLI->getTypeLegalizationCost(DL, ICA.getArgTypes()[0]);
+ EVT MTy = TLI->getValueType(DL, ICA.getReturnType());
+ // Check for the legal types, with the corect subtarget features.
+ if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
+ (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) ||
+ (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))
+ return LT.first;
+
+ // Equally for MVE vector types
+ if (ST->hasMVEFloatOps() &&
+ (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) &&
+ LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits())
+ return LT.first * ST->getMVEVectorCostFactor(CostKind);
+
+ // Otherwise we use a legal convert followed by a min+max
+ if (((ST->hasVFP2Base() && LT.second == MVT::f32) ||
+ (ST->hasFP64() && LT.second == MVT::f64) ||
+ (ST->hasFullFP16() && LT.second == MVT::f16) ||
+ (ST->hasMVEFloatOps() &&
+ (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) &&
+ LT.second.getScalarSizeInBits() >= MTy.getScalarSizeInBits()) {
+ Type *LegalTy = Type::getIntNTy(ICA.getReturnType()->getContext(),
+ LT.second.getScalarSizeInBits());
+ InstructionCost Cost =
+ LT.second.isVector() ? ST->getMVEVectorCostFactor(CostKind) : 1;
+ IntrinsicCostAttributes Attrs1(IsSigned ? Intrinsic::smin
+ : Intrinsic::umin,
+ LegalTy, {LegalTy, LegalTy});
+ Cost += getIntrinsicInstrCost(Attrs1, CostKind);
+ IntrinsicCostAttributes Attrs2(IsSigned ? Intrinsic::smax
+ : Intrinsic::umax,
+ LegalTy, {LegalTy, LegalTy});
+ Cost += getIntrinsicInstrCost(Attrs2, CostKind);
+ return LT.first * Cost;
+ }
+ break;
+ }
}
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
@@ -1771,7 +1815,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
bool ARMTTIImpl::isLoweredToCall(const Function *F) {
if (!F->isIntrinsic())
- BaseT::isLoweredToCall(F);
+ return BaseT::isLoweredToCall(F);
// Assume all Arm-specific intrinsics map to an instruction.
if (F->getName().startswith("llvm.arm"))
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 5bb84899e5ef..d7a2bdb3db15 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -213,7 +213,8 @@ public:
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
- VectorType *SubTp);
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args = None);
bool preferInLoopReduction(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c7734cc2cf11..b725ea3a84e5 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -453,6 +453,7 @@ class ARMAsmParser : public MCTargetAsmParser {
bool AllowRAAC = false);
bool parseMemory(OperandVector &);
bool parseOperand(OperandVector &, StringRef Mnemonic);
+ bool parseImmExpr(int64_t &Out);
bool parsePrefix(ARMMCExpr::VariantKind &RefKind);
bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType,
unsigned &ShiftAmount);
@@ -488,6 +489,17 @@ class ARMAsmParser : public MCTargetAsmParser {
bool parseDirectiveAlign(SMLoc L);
bool parseDirectiveThumbSet(SMLoc L);
+ bool parseDirectiveSEHAllocStack(SMLoc L, bool Wide);
+ bool parseDirectiveSEHSaveRegs(SMLoc L, bool Wide);
+ bool parseDirectiveSEHSaveSP(SMLoc L);
+ bool parseDirectiveSEHSaveFRegs(SMLoc L);
+ bool parseDirectiveSEHSaveLR(SMLoc L);
+ bool parseDirectiveSEHPrologEnd(SMLoc L, bool Fragment);
+ bool parseDirectiveSEHNop(SMLoc L, bool Wide);
+ bool parseDirectiveSEHEpilogStart(SMLoc L, bool Condition);
+ bool parseDirectiveSEHEpilogEnd(SMLoc L);
+ bool parseDirectiveSEHCustom(SMLoc L);
+
bool isMnemonicVPTPredicable(StringRef Mnemonic, StringRef ExtraToken);
StringRef splitMnemonic(StringRef Mnemonic, StringRef ExtraToken,
unsigned &PredicationCode,
@@ -4528,9 +4540,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
if (Reg == EndReg)
continue;
// The register must be in the same register class as the first.
- if ((Reg == ARM::RA_AUTH_CODE &&
- RC != &ARMMCRegisterClasses[ARM::GPRRegClassID]) ||
- (Reg != ARM::RA_AUTH_CODE && !RC->contains(Reg)))
+ if (!RC->contains(Reg))
return Error(AfterMinusLoc, "invalid register in register list");
// Ranges must go from low to high.
if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg))
@@ -6319,6 +6329,18 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
}
}
+bool ARMAsmParser::parseImmExpr(int64_t &Out) {
+ const MCExpr *Expr = nullptr;
+ SMLoc L = getParser().getTok().getLoc();
+ if (check(getParser().parseExpression(Expr), L, "expected expression"))
+ return true;
+ const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr);
+ if (check(!Value, L, "expected constant expression"))
+ return true;
+ Out = Value->getValue();
+ return false;
+}
+
// parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
// :lower16: and :upper16:.
bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
@@ -6379,7 +6401,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
CurrentFormat = WASM;
break;
case MCContext::IsGOFF:
+ case MCContext::IsSPIRV:
case MCContext::IsXCOFF:
+ case MCContext::IsDXContainer:
llvm_unreachable("unexpected object format");
break;
}
@@ -10958,9 +10982,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
}
- { // processInstruction() updates inITBlock state, we need to save it away
- bool wasInITBlock = inITBlock();
-
+ {
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the
// individual transformations can chain off each other. E.g.,
@@ -10969,12 +10991,6 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
LLVM_DEBUG(dbgs() << "Changed to: ";
Inst.dump_pretty(dbgs(), MII.getName(Inst.getOpcode()));
dbgs() << "\n");
-
- // Only after the instruction is fully processed, we can validate it
- if (wasInITBlock && hasV8Ops() && isThumb() &&
- !isV8EligibleForIT(&Inst) && !getTargetOptions().MCNoDeprecatedWarn) {
- Warning(IDLoc, "deprecated instruction in IT block");
- }
}
// Only move forward at the very end so that everything in validate
@@ -11090,6 +11106,39 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveTLSDescSeq(DirectiveID.getLoc());
else
return true;
+ } else if (IsCOFF) {
+ if (IDVal == ".seh_stackalloc")
+ parseDirectiveSEHAllocStack(DirectiveID.getLoc(), /*Wide=*/false);
+ else if (IDVal == ".seh_stackalloc_w")
+ parseDirectiveSEHAllocStack(DirectiveID.getLoc(), /*Wide=*/true);
+ else if (IDVal == ".seh_save_regs")
+ parseDirectiveSEHSaveRegs(DirectiveID.getLoc(), /*Wide=*/false);
+ else if (IDVal == ".seh_save_regs_w")
+ parseDirectiveSEHSaveRegs(DirectiveID.getLoc(), /*Wide=*/true);
+ else if (IDVal == ".seh_save_sp")
+ parseDirectiveSEHSaveSP(DirectiveID.getLoc());
+ else if (IDVal == ".seh_save_fregs")
+ parseDirectiveSEHSaveFRegs(DirectiveID.getLoc());
+ else if (IDVal == ".seh_save_lr")
+ parseDirectiveSEHSaveLR(DirectiveID.getLoc());
+ else if (IDVal == ".seh_endprologue")
+ parseDirectiveSEHPrologEnd(DirectiveID.getLoc(), /*Fragment=*/false);
+ else if (IDVal == ".seh_endprologue_fragment")
+ parseDirectiveSEHPrologEnd(DirectiveID.getLoc(), /*Fragment=*/true);
+ else if (IDVal == ".seh_nop")
+ parseDirectiveSEHNop(DirectiveID.getLoc(), /*Wide=*/false);
+ else if (IDVal == ".seh_nop_w")
+ parseDirectiveSEHNop(DirectiveID.getLoc(), /*Wide=*/true);
+ else if (IDVal == ".seh_startepilogue")
+ parseDirectiveSEHEpilogStart(DirectiveID.getLoc(), /*Condition=*/false);
+ else if (IDVal == ".seh_startepilogue_cond")
+ parseDirectiveSEHEpilogStart(DirectiveID.getLoc(), /*Condition=*/true);
+ else if (IDVal == ".seh_endepilogue")
+ parseDirectiveSEHEpilogEnd(DirectiveID.getLoc());
+ else if (IDVal == ".seh_custom")
+ parseDirectiveSEHCustom(DirectiveID.getLoc());
+ else
+ return true;
} else
return true;
return false;
@@ -11113,8 +11162,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
/// parseDirectiveThumb
/// ::= .thumb
bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive") ||
- check(!hasThumb(), L, "target does not support Thumb mode"))
+ if (parseEOL() || check(!hasThumb(), L, "target does not support Thumb mode"))
return true;
if (!isThumb())
@@ -11127,8 +11175,7 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
/// parseDirectiveARM
/// ::= .arm
bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive") ||
- check(!hasARM(), L, "target does not support ARM mode"))
+ if (parseEOL() || check(!hasARM(), L, "target does not support ARM mode"))
return true;
if (isThumb())
@@ -11167,15 +11214,13 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
Parser.getTok().getIdentifier());
getParser().getStreamer().emitThumbFunc(Func);
Parser.Lex();
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.thumb_func' directive"))
+ if (parseEOL())
return true;
return false;
}
}
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.thumb_func' directive"))
+ if (parseEOL())
return true;
// .thumb_func implies .thumb
@@ -11204,7 +11249,7 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
"'.syntax divided' arm assembly not supported") ||
check(Mode != "unified" && Mode != "UNIFIED", L,
"unrecognized syntax mode in .syntax directive") ||
- parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ parseEOL())
return true;
// TODO tell the MC streamer the mode
@@ -11226,7 +11271,7 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
}
Parser.Lex();
- if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ if (parseEOL())
return true;
if (Val == 16) {
@@ -11257,8 +11302,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
SMLoc SRegLoc, ERegLoc;
if (check(ParseRegister(Reg, SRegLoc, ERegLoc), SRegLoc,
"register name expected") ||
- parseToken(AsmToken::EndOfStatement,
- "unexpected input in .req directive."))
+ parseEOL())
return true;
if (RegisterReqs.insert(std::make_pair(Name, Reg)).first->second != Reg)
@@ -11276,10 +11320,7 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
return Error(L, "unexpected input in .unreq directive.");
RegisterReqs.erase(Parser.getTok().getIdentifier().lower());
Parser.Lex(); // Eat the identifier.
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected input in '.unreq' directive"))
- return true;
- return false;
+ return parseEOL();
}
// After changing arch/CPU, try to put the ARM/Thumb mode back to what it was
@@ -11340,11 +11381,11 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
StringRef Name = Parser.getTok().getIdentifier();
Optional<unsigned> Ret = ELFAttrs::attrTypeFromString(
Name, ARMBuildAttrs::getARMAttributeTags());
- if (!Ret.hasValue()) {
+ if (!Ret) {
Error(TagLoc, "attribute name not recognised: " + Name);
return false;
}
- Tag = Ret.getValue();
+ Tag = *Ret;
Parser.Lex();
} else {
const MCExpr *AttrExpr;
@@ -11406,8 +11447,7 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
Parser.Lex();
}
- if (Parser.parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.eabi_attribute' directive"))
+ if (Parser.parseEOL())
return true;
if (IsIntegerValue && IsStringValue) {
@@ -11463,8 +11503,7 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
/// parseDirectiveFnStart
/// ::= .fnstart
bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.fnstart' directive"))
+ if (parseEOL())
return true;
if (UC.hasFnStart()) {
@@ -11485,8 +11524,7 @@ bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
/// parseDirectiveFnEnd
/// ::= .fnend
bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.fnend' directive"))
+ if (parseEOL())
return true;
// Check the ordering of unwind directives
if (!UC.hasFnStart())
@@ -11502,8 +11540,7 @@ bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) {
/// parseDirectiveCantUnwind
/// ::= .cantunwind
bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.cantunwind' directive"))
+ if (parseEOL())
return true;
UC.recordCantUnwind(L);
@@ -11538,8 +11575,7 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
StringRef Name(Parser.getTok().getIdentifier());
Parser.Lex();
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.personality' directive"))
+ if (parseEOL())
return true;
UC.recordPersonality(L);
@@ -11571,8 +11607,7 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
/// parseDirectiveHandlerData
/// ::= .handlerdata
bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.handlerdata' directive"))
+ if (parseEOL())
return true;
UC.recordHandlerData(L);
@@ -11670,8 +11705,7 @@ bool ARMAsmParser::parseDirectivePad(SMLoc L) {
if (!CE)
return Error(ExLoc, "pad offset must be an immediate");
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.pad' directive"))
+ if (parseEOL())
return true;
getTargetStreamer().emitPad(CE->getValue());
@@ -11692,8 +11726,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
// Parse the register list
- if (parseRegisterList(Operands, true, true) ||
- parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ if (parseRegisterList(Operands, true, true) || parseEOL())
return true;
ARMOperand &Op = (ARMOperand &)*Operands[0];
if (!IsVector && !Op.isRegList())
@@ -11776,7 +11809,7 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
/// parseDirectiveLtorg
/// ::= .ltorg | .pool
bool ARMAsmParser::parseDirectiveLtorg(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ if (parseEOL())
return true;
getTargetStreamer().emitCurrentConstantPool();
return false;
@@ -11785,7 +11818,7 @@ bool ARMAsmParser::parseDirectiveLtorg(SMLoc L) {
bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
const MCSection *Section = getStreamer().getCurrentSectionOnly();
- if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ if (parseEOL())
return true;
if (!Section) {
@@ -11794,7 +11827,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
}
assert(Section && "must have section to emit alignment");
- if (Section->UseCodeAlign())
+ if (Section->useCodeAlign())
getStreamer().emitCodeAlignment(2, &getSTI());
else
getStreamer().emitValueToAlignment(2);
@@ -11810,9 +11843,7 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) {
const MCExpr *IndexExpression;
SMLoc IndexLoc = Parser.getTok().getLoc();
- if (Parser.parseExpression(IndexExpression) ||
- parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.personalityindex' directive")) {
+ if (Parser.parseExpression(IndexExpression) || parseEOL()) {
return true;
}
@@ -11913,11 +11944,10 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
MCSymbolRefExpr::VK_ARM_TLSDESCSEQ, getContext());
Lex();
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.tlsdescseq' directive"))
+ if (parseEOL())
return true;
- getTargetStreamer().AnnotateTLSDescriptorSequence(SRE);
+ getTargetStreamer().annotateTLSDescriptorSequence(SRE);
return false;
}
@@ -11955,8 +11985,7 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
Offset = CE->getValue();
}
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.movsp' directive"))
+ if (parseEOL())
return true;
getTargetStreamer().emitMovSP(SPReg, Offset);
@@ -11996,7 +12025,7 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) {
// '.align' is target specifically handled to mean 2**2 byte alignment.
const MCSection *Section = getStreamer().getCurrentSectionOnly();
assert(Section && "must have section to emit alignment");
- if (Section->UseCodeAlign())
+ if (Section->useCodeAlign())
getStreamer().emitCodeAlignment(4, &getSTI(), 0);
else
getStreamer().emitValueToAlignment(4, 0, 1, 0);
@@ -12026,6 +12055,175 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
return false;
}
+/// parseDirectiveSEHAllocStack
+/// ::= .seh_stackalloc
+/// ::= .seh_stackalloc_w
+bool ARMAsmParser::parseDirectiveSEHAllocStack(SMLoc L, bool Wide) {
+ int64_t Size;
+ if (parseImmExpr(Size))
+ return true;
+ getTargetStreamer().emitARMWinCFIAllocStack(Size, Wide);
+ return false;
+}
+
+/// parseDirectiveSEHSaveRegs
+/// ::= .seh_save_regs
+/// ::= .seh_save_regs_w
+bool ARMAsmParser::parseDirectiveSEHSaveRegs(SMLoc L, bool Wide) {
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
+
+ if (parseRegisterList(Operands) || parseEOL())
+ return true;
+ ARMOperand &Op = (ARMOperand &)*Operands[0];
+ if (!Op.isRegList())
+ return Error(L, ".seh_save_regs{_w} expects GPR registers");
+ const SmallVectorImpl<unsigned> &RegList = Op.getRegList();
+ uint32_t Mask = 0;
+ for (size_t i = 0; i < RegList.size(); ++i) {
+ unsigned Reg = MRI->getEncodingValue(RegList[i]);
+ if (Reg == 15) // pc -> lr
+ Reg = 14;
+ if (Reg == 13)
+ return Error(L, ".seh_save_regs{_w} can't include SP");
+ assert(Reg < 16U && "Register out of range");
+ unsigned Bit = (1u << Reg);
+ Mask |= Bit;
+ }
+ if (!Wide && (Mask & 0x1f00) != 0)
+ return Error(L,
+ ".seh_save_regs cannot save R8-R12, needs .seh_save_regs_w");
+ getTargetStreamer().emitARMWinCFISaveRegMask(Mask, Wide);
+ return false;
+}
+
+/// parseDirectiveSEHSaveSP
+/// ::= .seh_save_sp
+bool ARMAsmParser::parseDirectiveSEHSaveSP(SMLoc L) {
+ int Reg = tryParseRegister();
+ if (Reg == -1 || !MRI->getRegClass(ARM::GPRRegClassID).contains(Reg))
+ return Error(L, "expected GPR");
+ unsigned Index = MRI->getEncodingValue(Reg);
+ if (Index > 14 || Index == 13)
+ return Error(L, "invalid register for .seh_save_sp");
+ getTargetStreamer().emitARMWinCFISaveSP(Index);
+ return false;
+}
+
+/// parseDirectiveSEHSaveFRegs
+/// ::= .seh_save_fregs
+bool ARMAsmParser::parseDirectiveSEHSaveFRegs(SMLoc L) {
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
+
+ if (parseRegisterList(Operands) || parseEOL())
+ return true;
+ ARMOperand &Op = (ARMOperand &)*Operands[0];
+ if (!Op.isDPRRegList())
+ return Error(L, ".seh_save_fregs expects DPR registers");
+ const SmallVectorImpl<unsigned> &RegList = Op.getRegList();
+ uint32_t Mask = 0;
+ for (size_t i = 0; i < RegList.size(); ++i) {
+ unsigned Reg = MRI->getEncodingValue(RegList[i]);
+ assert(Reg < 32U && "Register out of range");
+ unsigned Bit = (1u << Reg);
+ Mask |= Bit;
+ }
+
+ if (Mask == 0)
+ return Error(L, ".seh_save_fregs missing registers");
+
+ unsigned First = 0;
+ while ((Mask & 1) == 0) {
+ First++;
+ Mask >>= 1;
+ }
+ if (((Mask + 1) & Mask) != 0)
+ return Error(L,
+ ".seh_save_fregs must take a contiguous range of registers");
+ unsigned Last = First;
+ while ((Mask & 2) != 0) {
+ Last++;
+ Mask >>= 1;
+ }
+ if (First < 16 && Last >= 16)
+ return Error(L, ".seh_save_fregs must be all d0-d15 or d16-d31");
+ getTargetStreamer().emitARMWinCFISaveFRegs(First, Last);
+ return false;
+}
+
+/// parseDirectiveSEHSaveLR
+/// ::= .seh_save_lr
+bool ARMAsmParser::parseDirectiveSEHSaveLR(SMLoc L) {
+ int64_t Offset;
+ if (parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().emitARMWinCFISaveLR(Offset);
+ return false;
+}
+
+/// parseDirectiveSEHPrologEnd
+/// ::= .seh_endprologue
+/// ::= .seh_endprologue_fragment
+bool ARMAsmParser::parseDirectiveSEHPrologEnd(SMLoc L, bool Fragment) {
+ getTargetStreamer().emitARMWinCFIPrologEnd(Fragment);
+ return false;
+}
+
+/// parseDirectiveSEHNop
+/// ::= .seh_nop
+/// ::= .seh_nop_w
+bool ARMAsmParser::parseDirectiveSEHNop(SMLoc L, bool Wide) {
+ getTargetStreamer().emitARMWinCFINop(Wide);
+ return false;
+}
+
+/// parseDirectiveSEHEpilogStart
+/// ::= .seh_startepilogue
+/// ::= .seh_startepilogue_cond
+bool ARMAsmParser::parseDirectiveSEHEpilogStart(SMLoc L, bool Condition) {
+ unsigned CC = ARMCC::AL;
+ if (Condition) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return Error(S, ".seh_startepilogue_cond missing condition");
+ CC = ARMCondCodeFromString(Tok.getString());
+ if (CC == ~0U)
+ return Error(S, "invalid condition");
+ Parser.Lex(); // Eat the token.
+ }
+
+ getTargetStreamer().emitARMWinCFIEpilogStart(CC);
+ return false;
+}
+
+/// parseDirectiveSEHEpilogEnd
+/// ::= .seh_endepilogue
+bool ARMAsmParser::parseDirectiveSEHEpilogEnd(SMLoc L) {
+ getTargetStreamer().emitARMWinCFIEpilogEnd();
+ return false;
+}
+
+/// parseDirectiveSEHCustom
+/// ::= .seh_custom
+bool ARMAsmParser::parseDirectiveSEHCustom(SMLoc L) {
+ unsigned Opcode = 0;
+ do {
+ int64_t Byte;
+ if (parseImmExpr(Byte))
+ return true;
+ if (Byte > 0xff || Byte < 0)
+ return Error(L, "Invalid byte value in .seh_custom");
+ if (Opcode > 0x00ffffff)
+ return Error(L, "Too many bytes in .seh_custom");
+ // Store the bytes as one big endian number in Opcode. In a multi byte
+ // opcode sequence, the first byte can't be zero.
+ Opcode = (Opcode << 8) | Byte;
+ } while (parseOptionalToken(AsmToken::Comma));
+ getTargetStreamer().emitARMWinCFICustom(Opcode);
+ return false;
+}
+
/// Force static initialization.
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMAsmParser() {
RegisterMCAsmParser<ARMAsmParser> X(getTheARMLETarget());
@@ -12338,8 +12536,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
SMLoc ExtLoc = Parser.getTok().getLoc();
Lex();
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.arch_extension' directive"))
+ if (parseEOL())
return true;
if (Name == "nocrypto") {
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index c3df7dc88d79..9acd49292268 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -13,8 +13,8 @@
#include "TargetInfo/ARMTargetInfo.h"
#include "Utils/ARMBaseInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -175,408 +175,529 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
// Forward declare these because the autogenerated code will reference them.
// Definitions are further down.
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus
DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGPRwithZRRegisterClass(MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGPRwithZRnospRegisterClass(
- MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst,
- unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeDPairSpacedRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode2IdxInstruction(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst,unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeTSBInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst & Inst,
- unsigned Insn,
- uint64_t Adddress,
- const void *Decoder);
+static DecodeStatus
+DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Adddress,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst,unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst,unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-template<int shift>
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+template <int shift>
static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVCVTImmOperand(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeNEONComplexLane64Instruction(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder);
-static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder);
-static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-template<int shift>
-static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+template <int shift>
+static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-template<int shift>
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+template <int shift>
static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-template<int shift, int WriteBack>
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+template <int shift, int WriteBack>
static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst,unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst,unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst,unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <bool isSigned, bool isNeg, bool zeroPermitted, int size>
static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned val,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder);
-template<bool Writeback>
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+template <bool Writeback>
static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
-template<int shift>
+ const MCDisassembler *Decoder);
+template <int shift>
static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-template<int shift>
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+template <int shift>
static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-template<int shift>
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+template <int shift>
static DecodeStatus DecodeMVE_MEM_3_pre(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-template<unsigned MinLog, unsigned MaxLog>
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+template <unsigned MinLog, unsigned MaxLog>
static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder);
-template<unsigned start>
-static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
+template <unsigned start>
+static DecodeStatus
+DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
typedef DecodeStatus OperandDecoder(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-template<bool scalar, OperandDecoder predicate_decoder>
-static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+template <bool scalar, OperandDecoder predicate_decoder>
+static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn,
- uint64_t Address,
- const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
#include "ARMGenDisassemblerTables.inc"
@@ -710,11 +831,12 @@ extern const MCInstrDesc ARMInsts[];
/// operand to the MCInst and false otherwise.
static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
bool isBranch, uint64_t InstSize,
- MCInst &MI, const void *Decoder) {
- const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ MCInst &MI,
+ const MCDisassembler *Decoder) {
// FIXME: Does it make sense for value to be negative?
- return Dis->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address, isBranch,
- /* Offset */ 0, InstSize);
+ return Decoder->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address,
+ isBranch, /*Offset=*/0, /*OpSize=*/0,
+ InstSize);
}
/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
@@ -727,7 +849,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
/// a literal 'C' string if the referenced address of the literal pool's entry
/// is an address into a section with 'C' string literals.
static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
Dis->tryAddingPcLoadReferenceComment(Value, Address);
}
@@ -1142,7 +1264,8 @@ static const uint16_t CLRMGPRDecoderTable[] = {
};
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
@@ -1153,7 +1276,7 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
@@ -1165,9 +1288,9 @@ static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus
-DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (RegNo == 15)
@@ -1180,7 +1303,7 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (RegNo == 13)
@@ -1192,8 +1315,8 @@ static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus
-DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (RegNo == 15)
@@ -1207,8 +1330,8 @@ DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus
-DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (RegNo == 15)
@@ -1225,8 +1348,8 @@ DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus
-DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (RegNo == 13)
return MCDisassembler::Fail;
@@ -1235,7 +1358,8 @@ DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
@@ -1247,7 +1371,8 @@ static const uint16_t GPRPairDecoderTable[] = {
};
static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
// According to the Arm ARM RegNo = 14 is undefined, but we return fail
@@ -1263,8 +1388,9 @@ static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
return S;
}
-static DecodeStatus DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus
+DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 13)
return MCDisassembler::Fail;
@@ -1278,7 +1404,7 @@ static DecodeStatus DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo != 13)
return MCDisassembler::Fail;
@@ -1288,7 +1414,8 @@ static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Register = 0;
switch (RegNo) {
case 0:
@@ -1318,7 +1445,8 @@ static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
const FeatureBitset &featureBits =
@@ -1343,7 +1471,8 @@ static const uint16_t SPRDecoderTable[] = {
};
static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -1353,7 +1482,8 @@ static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeSPRRegisterClass(Inst, RegNo, Address, Decoder);
}
@@ -1369,7 +1499,8 @@ static const uint16_t DPRDecoderTable[] = {
};
static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
const FeatureBitset &featureBits =
((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
@@ -1384,22 +1515,24 @@ static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
}
static DecodeStatus DecodeSPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
return DecodeSPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static DecodeStatus
-DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
@@ -1413,7 +1546,8 @@ static const uint16_t QPRDecoderTable[] = {
};
static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 31 || (RegNo & 1) != 0)
return MCDisassembler::Fail;
RegNo >>= 1;
@@ -1433,7 +1567,8 @@ static const uint16_t DPairDecoderTable[] = {
};
static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 30)
return MCDisassembler::Fail;
@@ -1453,10 +1588,9 @@ static const uint16_t DPairSpacedDecoderTable[] = {
ARM::D28_D30, ARM::D29_D31
};
-static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeDPairSpacedRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 29)
return MCDisassembler::Fail;
@@ -1466,7 +1600,8 @@ static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
}
static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (Val == 0xF) return MCDisassembler::Fail;
// AL predicate is not allowed on Thumb1 branches.
@@ -1483,7 +1618,8 @@ static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (Val)
Inst.addOperand(MCOperand::createReg(ARM::CPSR));
else
@@ -1492,7 +1628,8 @@ static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rm = fieldFromInstruction(Val, 0, 4);
@@ -1529,7 +1666,8 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rm = fieldFromInstruction(Val, 0, 4);
@@ -1564,7 +1702,8 @@ static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
bool NeedDisjointWriteback = false;
@@ -1611,7 +1750,8 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Vd = fieldFromInstruction(Val, 8, 5);
@@ -1635,7 +1775,8 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Vd = fieldFromInstruction(Val, 8, 5);
@@ -1660,7 +1801,8 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// This operand encodes a mask of contiguous zeros between a specified MSB
// and LSB. To decode it, we create the mask of all bits MSB-and-lower,
// the mask of all bits LSB-and-lower, and then xor them to create
@@ -1687,7 +1829,8 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned pred = fieldFromInstruction(Insn, 28, 4);
@@ -1865,8 +2008,8 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus
-DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -1971,7 +2114,8 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 13, 4);
@@ -2013,9 +2157,22 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus
-DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeTSBInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (Inst.getOpcode() != ARM::TSB && Inst.getOpcode() != ARM::t2TSB)
+ return MCDisassembler::Fail;
+
+ // The "csync" operand is not encoded into the "tsb" instruction (as this is
+ // the only available operand), but LLVM expects the instruction to have one
+ // operand, so we need to add the csync when decoding.
+ Inst.addOperand(MCOperand::createImm(ARM_TSB::CSYNC));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
@@ -2206,7 +2363,8 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -2235,7 +2393,8 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -2257,9 +2416,10 @@ static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
- unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus
+DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -2350,7 +2510,8 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
// Check for UNPREDICTABLE predicated ESB instruction
static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned pred = fieldFromInstruction(Insn, 28, 4);
unsigned imm8 = fieldFromInstruction(Insn, 0, 8);
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
@@ -2372,7 +2533,8 @@ static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned imod = fieldFromInstruction(Insn, 18, 2);
unsigned M = fieldFromInstruction(Insn, 17, 1);
unsigned iflags = fieldFromInstruction(Insn, 6, 3);
@@ -2419,7 +2581,8 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned imod = fieldFromInstruction(Insn, 9, 2);
unsigned M = fieldFromInstruction(Insn, 8, 1);
unsigned iflags = fieldFromInstruction(Insn, 5, 3);
@@ -2460,9 +2623,9 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned imm = fieldFromInstruction(Insn, 0, 8);
unsigned Opcode = ARM::t2HINT;
@@ -2486,7 +2649,8 @@ static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 8, 4);
@@ -2510,7 +2674,8 @@ static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -2537,7 +2702,8 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 16, 4);
@@ -2565,7 +2731,8 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Pred = fieldFromInstruction(Insn, 28, 4);
@@ -2586,7 +2753,8 @@ static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Imm = fieldFromInstruction(Insn, 9, 1);
@@ -2614,7 +2782,8 @@ static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned add = fieldFromInstruction(Val, 12, 1);
@@ -2634,7 +2803,8 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 9, 4);
@@ -2654,7 +2824,8 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 9, 4);
@@ -2674,13 +2845,14 @@ static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeGPRRegisterClass(Inst, Val, Address, Decoder);
}
-static DecodeStatus
-DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus Status = MCDisassembler::Success;
// Note the J1 and J2 values are from the encoded instruction. So here
@@ -2705,9 +2877,9 @@ DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
return Status;
}
-static DecodeStatus
-DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned pred = fieldFromInstruction(Insn, 28, 4);
@@ -2736,7 +2908,8 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rm = fieldFromInstruction(Val, 0, 4);
@@ -2753,7 +2926,8 @@ static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3029,7 +3203,8 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned type = fieldFromInstruction(Insn, 8, 4);
unsigned align = fieldFromInstruction(Insn, 4, 2);
if (type == 6 && (align & 2)) return MCDisassembler::Fail;
@@ -3042,7 +3217,8 @@ static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned size = fieldFromInstruction(Insn, 6, 2);
if (size == 3) return MCDisassembler::Fail;
@@ -3057,7 +3233,8 @@ static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned size = fieldFromInstruction(Insn, 6, 2);
if (size == 3) return MCDisassembler::Fail;
@@ -3070,7 +3247,8 @@ static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned size = fieldFromInstruction(Insn, 6, 2);
if (size == 3) return MCDisassembler::Fail;
@@ -3080,7 +3258,8 @@ static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3350,7 +3529,8 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3397,7 +3577,8 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3445,7 +3626,8 @@ static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3480,7 +3662,8 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3531,9 +3714,9 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus
-DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3577,9 +3760,9 @@ DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus
-DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) |
@@ -3607,7 +3790,8 @@ DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Qd = fieldFromInstruction(Insn, 13, 3);
@@ -3632,7 +3816,8 @@ static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3651,31 +3836,36 @@ static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(8 - Val));
return MCDisassembler::Success;
}
static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(16 - Val));
return MCDisassembler::Success;
}
static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(32 - Val));
return MCDisassembler::Success;
}
static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(64 - Val));
return MCDisassembler::Success;
}
static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3711,7 +3901,8 @@ static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned dst = fieldFromInstruction(Insn, 8, 3);
@@ -3735,7 +3926,8 @@ static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
}
static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4,
true, 2, Inst, Decoder))
Inst.addOperand(MCOperand::createImm(SignExtend32<12>(Val << 1)));
@@ -3743,7 +3935,8 @@ static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<21>(Val) + 4,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::createImm(SignExtend32<21>(Val)));
@@ -3751,7 +3944,8 @@ static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4,
true, 2, Inst, Decoder))
Inst.addOperand(MCOperand::createImm(Val << 1));
@@ -3759,7 +3953,8 @@ static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 0, 3);
@@ -3774,7 +3969,8 @@ static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 0, 3);
@@ -3788,7 +3984,8 @@ static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned imm = Val << 2;
Inst.addOperand(MCOperand::createImm(imm));
@@ -3798,7 +3995,8 @@ static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createReg(ARM::SP));
Inst.addOperand(MCOperand::createImm(Val));
@@ -3806,7 +4004,8 @@ static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 6, 4);
@@ -3835,7 +4034,8 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
@@ -3918,7 +4118,8 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -4002,7 +4203,8 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -4081,8 +4283,8 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder) {
+static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -4121,7 +4323,8 @@ static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
@@ -4173,8 +4376,8 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
if (Val == 0)
Inst.addOperand(MCOperand::createImm(INT32_MIN));
else {
@@ -4188,7 +4391,7 @@ static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (Val == 0)
Inst.addOperand(MCOperand::createImm(INT32_MIN));
else {
@@ -4203,7 +4406,8 @@ static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address,
}
static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 9, 4);
@@ -4219,7 +4423,7 @@ static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 8, 4);
@@ -4233,8 +4437,9 @@ static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 8, 4);
@@ -4248,8 +4453,8 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
return S;
}
-static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
int imm = Val & 0xFF;
if (Val == 0)
imm = INT32_MIN;
@@ -4260,9 +4465,9 @@ static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-template<int shift>
-static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+template <int shift>
+static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
int imm = Val & 0x7F;
if (Val == 0)
imm = INT32_MIN;
@@ -4276,7 +4481,8 @@ static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 9, 4);
@@ -4321,10 +4527,10 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
return S;
}
-template<int shift>
+template <int shift>
static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 8, 3);
@@ -4338,10 +4544,10 @@ static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val,
return S;
}
-template<int shift, int WriteBack>
+template <int shift, int WriteBack>
static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 8, 4);
@@ -4358,7 +4564,8 @@ static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
@@ -4419,7 +4626,8 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 13, 4);
@@ -4445,7 +4653,8 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned imm = fieldFromInstruction(Insn, 0, 7);
Inst.addOperand(MCOperand::createReg(ARM::SP));
@@ -4456,7 +4665,8 @@ static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
}
static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (Inst.getOpcode() == ARM::tADDrSP) {
@@ -4481,7 +4691,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
}
static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned imod = fieldFromInstruction(Insn, 4, 1) | 0x2;
unsigned flags = fieldFromInstruction(Insn, 0, 3);
@@ -4492,7 +4703,8 @@ static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
}
static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rm = fieldFromInstruction(Insn, 0, 4);
unsigned add = fieldFromInstruction(Insn, 4, 1);
@@ -4505,7 +4717,8 @@ static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 3, 4);
unsigned Qm = fieldFromInstruction(Insn, 0, 3);
@@ -4518,9 +4731,10 @@ static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn,
return S;
}
-template<int shift>
+template <int shift>
static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Qm = fieldFromInstruction(Insn, 8, 3);
int imm = fieldFromInstruction(Insn, 0, 7);
@@ -4542,7 +4756,8 @@ static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// Val is passed in as S:J1:J2:imm10H:imm10L:'0'
// Note only one trailing zero not two. Also the J1 and J2 values are from
// the encoded instruction. So here change to I1 and I2 values via:
@@ -4566,7 +4781,8 @@ static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (Val == 0xA || Val == 0xB)
return MCDisassembler::Fail;
@@ -4580,9 +4796,9 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus
-DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
const FeatureBitset &FeatureBits =
((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
DecodeStatus S = MCDisassembler::Success;
@@ -4598,9 +4814,9 @@ DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus
-DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned pred = fieldFromInstruction(Insn, 22, 4);
@@ -4641,8 +4857,8 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
// Decode a shifted immediate operand. These basically consist
// of an 8-bit value, and a 4-bit directive that specifies either
// a splat operation or a rotation.
-static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned ctrl = fieldFromInstruction(Val, 10, 2);
if (ctrl == 0) {
unsigned byte = fieldFromInstruction(Val, 8, 2);
@@ -4672,9 +4888,9 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus
-DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4,
true, 2, Inst, Decoder))
Inst.addOperand(MCOperand::createImm(SignExtend32<9>(Val << 1)));
@@ -4683,7 +4899,7 @@ DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// Val is passed in as S:J1:J2:imm10:imm11
// Note no trailing zero after imm11. Also the J1 and J2 values are from
// the encoded instruction. So here change to I1 and I2 values via:
@@ -4706,7 +4922,8 @@ static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (Val & ~0xf)
return MCDisassembler::Fail;
@@ -4715,7 +4932,8 @@ static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (Val & ~0xf)
return MCDisassembler::Fail;
@@ -4723,8 +4941,8 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
const FeatureBitset &FeatureBits =
((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
@@ -4825,7 +5043,8 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned R = fieldFromInstruction(Val, 5, 1);
unsigned SysM = fieldFromInstruction(Val, 0, 5);
@@ -4840,7 +5059,8 @@ static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
@@ -4862,7 +5082,7 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -4887,7 +5107,8 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -4912,7 +5133,8 @@ static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -4939,7 +5161,8 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -4964,7 +5187,8 @@ static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -4988,8 +5212,8 @@ static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5055,8 +5279,8 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5120,8 +5344,8 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5187,8 +5411,8 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5250,8 +5474,8 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5320,8 +5544,8 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5383,8 +5607,8 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5464,8 +5688,8 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5536,8 +5760,8 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
@@ -5562,8 +5786,8 @@ static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
@@ -5588,8 +5812,8 @@ static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned pred = fieldFromInstruction(Insn, 4, 4);
unsigned mask = fieldFromInstruction(Insn, 0, 4);
@@ -5617,9 +5841,9 @@ static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus
-DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
@@ -5654,9 +5878,9 @@ DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus
-DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
@@ -5689,8 +5913,8 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned sign1 = fieldFromInstruction(Insn, 21, 1);
unsigned sign2 = fieldFromInstruction(Insn, 23, 1);
if (sign1 != sign2) return MCDisassembler::Fail;
@@ -5717,7 +5941,7 @@ static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
// Shift of "asr #32" is not allowed in Thumb2 mode.
@@ -5726,8 +5950,8 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
return S;
}
-static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
unsigned Rt2 = fieldFromInstruction(Insn, 0, 4);
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5753,8 +5977,8 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
const FeatureBitset &featureBits =
((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
@@ -5812,8 +6036,8 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
const FeatureBitset &featureBits =
((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
@@ -5871,10 +6095,10 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeNEONComplexLane64Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
unsigned Vn = (fieldFromInstruction(Insn, 16, 4) << 0);
@@ -5904,8 +6128,8 @@ static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst,
return S;
}
-static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 16, 4);
@@ -5932,7 +6156,8 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned CRm = fieldFromInstruction(Val, 0, 4);
@@ -5978,7 +6203,7 @@ static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
const FeatureBitset &featureBits =
((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
DecodeStatus S = MCDisassembler::Success;
@@ -6030,7 +6255,7 @@ static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
template <bool isSigned, bool isNeg, bool zeroPermitted, int size>
static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (Val == 0 && !zeroPermitted)
S = MCDisassembler::Fail;
@@ -6049,7 +6274,7 @@ static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
uint64_t LocImm = Inst.getOperand(0).getImm();
Val = LocImm + (2 << Val);
@@ -6061,7 +6286,7 @@ static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (Val >= ARMCC::AL) // also exclude the non-condition NV
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(Val));
@@ -6069,7 +6294,7 @@ static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (Inst.getOpcode() == ARM::MVE_LCTP)
@@ -6132,7 +6357,7 @@ static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address,
static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (Val == 0)
@@ -6144,7 +6369,8 @@ static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if ((RegNo) + 1 > 11)
return MCDisassembler::Fail;
@@ -6154,7 +6380,8 @@ static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if ((RegNo) > 14)
return MCDisassembler::Fail;
@@ -6165,7 +6392,8 @@ static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus
DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo == 15) {
Inst.addOperand(MCOperand::createReg(ARM::APSR_NZCV));
return MCDisassembler::Success;
@@ -6181,7 +6409,7 @@ DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
Inst.addOperand(MCOperand::createImm(ARMCC::AL));
@@ -6207,8 +6435,8 @@ static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
}
static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
@@ -6224,7 +6452,7 @@ static const uint16_t QQPRDecoderTable[] = {
static DecodeStatus DecodeMQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 6)
return MCDisassembler::Fail;
@@ -6240,7 +6468,7 @@ static const uint16_t QQQQPRDecoderTable[] = {
static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 4)
return MCDisassembler::Fail;
@@ -6251,7 +6479,7 @@ static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
// Parse VPT mask and encode it in the MCInst as an immediate with the same
@@ -6281,7 +6509,8 @@ static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// The vpred_r operand type includes an MQPR register field derived
// from the encoding. But we don't actually want to add an operand
// to the MCInst at this stage, because AddThumbPredicate will do it
@@ -6292,18 +6521,16 @@ static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm((Val & 0x1) == 0 ? ARMCC::EQ : ARMCC::NE));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Code;
switch (Val & 0x3) {
case 0:
@@ -6323,17 +6550,16 @@ static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeRestrictedUPredicateOperand(MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm((Val & 0x1) == 0 ? ARMCC::HS : ARMCC::HI));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Code;
switch (Val) {
default:
@@ -6363,7 +6589,8 @@ static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Va
}
static DecodeStatus DecodeVCVTImmOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned DecodedVal = 64 - Val;
@@ -6404,10 +6631,10 @@ static unsigned FixedRegForVSTRVLDR_SYSREG(unsigned Opcode) {
}
}
-template<bool Writeback>
+template <bool Writeback>
static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
switch (Inst.getOpcode()) {
case ARM::VSTR_FPSCR_pre:
case ARM::VSTR_FPSCR_NZCVQC_pre:
@@ -6448,9 +6675,10 @@ static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Val,
return S;
}
-static inline DecodeStatus DecodeMVE_MEM_pre(
- MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder,
- unsigned Rn, OperandDecoder RnDecoder, OperandDecoder AddrDecoder) {
+static inline DecodeStatus
+DecodeMVE_MEM_pre(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder, unsigned Rn,
+ OperandDecoder RnDecoder, OperandDecoder AddrDecoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Qd = fieldFromInstruction(Val, 13, 3);
@@ -6469,7 +6697,8 @@ static inline DecodeStatus DecodeMVE_MEM_pre(
template <int shift>
static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder,
fieldFromInstruction(Val, 16, 3),
DecodetGPRRegisterClass,
@@ -6478,7 +6707,8 @@ static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val,
template <int shift>
static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder,
fieldFromInstruction(Val, 16, 4),
DecoderGPRRegisterClass,
@@ -6487,17 +6717,18 @@ static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val,
template <int shift>
static DecodeStatus DecodeMVE_MEM_3_pre(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder,
fieldFromInstruction(Val, 17, 3),
DecodeMQPRRegisterClass,
DecodeMveAddrModeQ<shift>);
}
-template<unsigned MinLog, unsigned MaxLog>
+template <unsigned MinLog, unsigned MaxLog>
static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (Val < MinLog || Val > MaxLog)
@@ -6507,10 +6738,10 @@ static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val,
return S;
}
-template<unsigned start>
-static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
+template <unsigned start>
+static DecodeStatus
+DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
Inst.addOperand(MCOperand::createImm(start + Val));
@@ -6519,7 +6750,8 @@ static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 0, 4);
unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
@@ -6542,7 +6774,8 @@ static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 0, 4);
unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
@@ -6566,8 +6799,9 @@ static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeMVEOverlappingLongShift(
- MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) {
+static DecodeStatus
+DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned RdaLo = fieldFromInstruction(Insn, 17, 3) << 1;
@@ -6645,8 +6879,9 @@ static DecodeStatus DecodeMVEOverlappingLongShift(
return S;
}
-static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) |
fieldFromInstruction(Insn, 13, 3));
@@ -6664,9 +6899,9 @@ static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, uint64_t Addr
return S;
}
-template<bool scalar, OperandDecoder predicate_decoder>
+template <bool scalar, OperandDecoder predicate_decoder>
static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
Inst.addOperand(MCOperand::createReg(ARM::VPR));
unsigned Qn = fieldFromInstruction(Insn, 17, 3);
@@ -6703,7 +6938,7 @@ static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address,
}
static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
Inst.addOperand(MCOperand::createReg(ARM::VPR));
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -6712,8 +6947,9 @@ static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
Inst.addOperand(MCOperand::createReg(ARM::VPR));
Inst.addOperand(MCOperand::createReg(ARM::VPR));
@@ -6721,7 +6957,8 @@ static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, uint64_t Address
}
static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
const unsigned Rd = fieldFromInstruction(Insn, 8, 4);
const unsigned Rn = fieldFromInstruction(Insn, 16, 4);
const unsigned Imm12 = fieldFromInstruction(Insn, 26, 1) << 11 |
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 16bc0ca179a7..d74da27fbc4f 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -17,8 +17,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -98,9 +98,20 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer {
void emitInst(uint32_t Inst, char Suffix = '\0') override;
void finishAttributeSection() override;
- void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override;
+ void annotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override;
void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override;
+ void emitARMWinCFIAllocStack(unsigned Size, bool Wide) override;
+ void emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) override;
+ void emitARMWinCFISaveSP(unsigned Reg) override;
+ void emitARMWinCFISaveFRegs(unsigned First, unsigned Last) override;
+ void emitARMWinCFISaveLR(unsigned Offset) override;
+ void emitARMWinCFIPrologEnd(bool Fragment) override;
+ void emitARMWinCFINop(bool Wide) override;
+ void emitARMWinCFIEpilogStart(unsigned Condition) override;
+ void emitARMWinCFIEpilogEnd() override;
+ void emitARMWinCFICustom(unsigned Opcode) override;
+
public:
ARMTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS,
MCInstPrinter &InstPrinter, bool VerboseAsm);
@@ -239,8 +250,8 @@ void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
void ARMTargetAsmStreamer::finishAttributeSection() {}
-void
-ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
+void ARMTargetAsmStreamer::annotateTLSDescriptorSequence(
+ const MCSymbolRefExpr *S) {
OS << "\t.tlsdescseq\t" << S->getSymbol().getName() << "\n";
}
@@ -269,6 +280,101 @@ void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset,
OS << '\n';
}
+void ARMTargetAsmStreamer::emitARMWinCFIAllocStack(unsigned Size, bool Wide) {
+ if (Wide)
+ OS << "\t.seh_stackalloc_w\t" << Size << "\n";
+ else
+ OS << "\t.seh_stackalloc\t" << Size << "\n";
+}
+
+static void printRegs(formatted_raw_ostream &OS, ListSeparator &LS, int First,
+ int Last) {
+ if (First != Last)
+ OS << LS << "r" << First << "-r" << Last;
+ else
+ OS << LS << "r" << First;
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) {
+ if (Wide)
+ OS << "\t.seh_save_regs_w\t";
+ else
+ OS << "\t.seh_save_regs\t";
+ ListSeparator LS;
+ int First = -1;
+ OS << "{";
+ for (int I = 0; I <= 12; I++) {
+ if (Mask & (1 << I)) {
+ if (First < 0)
+ First = I;
+ } else {
+ if (First >= 0) {
+ printRegs(OS, LS, First, I - 1);
+ First = -1;
+ }
+ }
+ }
+ if (First >= 0)
+ printRegs(OS, LS, First, 12);
+ if (Mask & (1 << 14))
+ OS << LS << "lr";
+ OS << "}\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFISaveSP(unsigned Reg) {
+ OS << "\t.seh_save_sp\tr" << Reg << "\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFISaveFRegs(unsigned First,
+ unsigned Last) {
+ if (First != Last)
+ OS << "\t.seh_save_fregs\t{d" << First << "-d" << Last << "}\n";
+ else
+ OS << "\t.seh_save_fregs\t{d" << First << "}\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFISaveLR(unsigned Offset) {
+ OS << "\t.seh_save_lr\t" << Offset << "\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFIPrologEnd(bool Fragment) {
+ if (Fragment)
+ OS << "\t.seh_endprologue_fragment\n";
+ else
+ OS << "\t.seh_endprologue\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFINop(bool Wide) {
+ if (Wide)
+ OS << "\t.seh_nop_w\n";
+ else
+ OS << "\t.seh_nop\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFIEpilogStart(unsigned Condition) {
+ if (Condition == ARMCC::AL)
+ OS << "\t.seh_startepilogue\n";
+ else
+ OS << "\t.seh_startepilogue_cond\t"
+ << ARMCondCodeToString(static_cast<ARMCC::CondCodes>(Condition)) << "\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFIEpilogEnd() {
+ OS << "\t.seh_endepilogue\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFICustom(unsigned Opcode) {
+ int I;
+ for (I = 3; I > 0; I--)
+ if (Opcode & (0xffu << (8 * I)))
+ break;
+ ListSeparator LS;
+ OS << "\t.seh_custom\t";
+ for (; I >= 0; I--)
+ OS << LS << ((Opcode >> (8 * I)) & 0xff);
+ OS << "\n";
+}
+
class ARMTargetELFStreamer : public ARMTargetStreamer {
private:
StringRef CurrentVendor;
@@ -309,7 +415,7 @@ private:
void finishAttributeSection() override;
void emitLabel(MCSymbol *Symbol) override;
- void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override;
+ void annotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override;
void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override;
// Reset state between object emissions
@@ -984,8 +1090,8 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
Streamer.emitThumbFunc(Symbol);
}
-void
-ARMTargetELFStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
+void ARMTargetELFStreamer::annotateTLSDescriptorSequence(
+ const MCSymbolRefExpr *S) {
getStreamer().EmitFixup(S, FK_Data_4);
}
@@ -1057,7 +1163,7 @@ inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix,
assert(EHSection && "Failed to get the required EH section");
// Switch to .ARM.extab or .ARM.exidx section
- SwitchSection(EHSection);
+ switchSection(EHSection);
emitValueToAlignment(4, 0, 1, 0);
}
@@ -1150,7 +1256,7 @@ void ARMELFStreamer::emitFnEnd() {
}
// Switch to the section containing FnStart
- SwitchSection(&FnStart->getSection());
+ switchSection(&FnStart->getSection());
// Clean exception handling frame information
EHReset();
@@ -1369,12 +1475,8 @@ MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) {
return new ARMTargetStreamer(S);
}
-MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
- const MCSubtargetInfo &STI) {
- const Triple &TT = STI.getTargetTriple();
- if (TT.isOSBinFormatELF())
- return new ARMTargetELFStreamer(S);
- return new ARMTargetStreamer(S);
+MCTargetStreamer *createARMObjectTargetELFStreamer(MCStreamer &S) {
+ return new ARMTargetELFStreamer(S);
}
MCELFStreamer *createARMELFStreamer(MCContext &Context,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 77c0e3522911..febd8ab8bbc0 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -89,6 +89,7 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
AlignmentIsInBytes = false;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::WinEH;
+ WinEHEncodingType = WinEH::EncodingType::Itanium;
PrivateGlobalPrefix = "$M";
PrivateLabelPrefix = "$M";
CommentString = "@";
@@ -110,7 +111,8 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
PrivateLabelPrefix = ".L";
SupportsDebugInformation = true;
- ExceptionsType = ExceptionHandling::DwarfCFI;
+ ExceptionsType = ExceptionHandling::WinEH;
+ WinEHEncodingType = WinEH::EncodingType::Itanium;
UseParensForSymbolVariant = true;
DwarfRegNumForCFI = false;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 5ecacdab390f..c33bbfcc7114 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -2006,13 +2006,11 @@ getMVEPairVectorIndexOpValue(const MCInst &MI, unsigned OpIdx,
#include "ARMGenMCCodeEmitter.inc"
MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new ARMMCCodeEmitter(MCII, Ctx, true);
}
MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new ARMMCCodeEmitter(MCII, Ctx, false);
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 17ca1866cf95..3f1379f135d1 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -87,18 +87,6 @@ static bool getMRCDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
return false;
}
-static bool getITDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
- std::string &Info) {
- if (STI.getFeatureBits()[llvm::ARM::HasV8Ops] && MI.getOperand(1).isImm() &&
- MI.getOperand(1).getImm() != 8) {
- Info = "applying IT instruction to more than one subsequent instruction is "
- "deprecated";
- return true;
- }
-
- return false;
-}
-
static bool getARMStoreDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) {
assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 5c8f9bfdca08..e0c992f4fae2 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -71,13 +71,13 @@ MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S,
bool isVerboseAsm);
MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
const MCSubtargetInfo &STI);
+MCTargetStreamer *createARMObjectTargetELFStreamer(MCStreamer &S);
+MCTargetStreamer *createARMObjectTargetWinCOFFStreamer(MCStreamer &S);
MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index ed4000c7e5be..0ea51839824b 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -9,6 +9,7 @@
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmLayout.h"
@@ -21,7 +22,6 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ScopedPrinter.h"
using namespace llvm;
@@ -149,7 +149,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
if (FixupOffset & 0xff000000) {
Asm.getContext().reportError(Fixup.getLoc(),
"can not encode offset '0x" +
- to_hexString(FixupOffset) +
+ utohexstr(FixupOffset) +
"' in resulting scattered relocation.");
return;
}
@@ -264,7 +264,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
if (FixupOffset & 0xff000000) {
Asm.getContext().reportError(Fixup.getLoc(),
"can not encode offset '0x" +
- to_hexString(FixupOffset) +
+ utohexstr(FixupOffset) +
"' in resulting scattered relocation.");
return;
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 02a2d01176fc..16d1ae62053e 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -114,15 +114,28 @@ void ARMTargetStreamer::emitArchExtension(uint64_t ArchExt) {}
void ARMTargetStreamer::emitObjectArch(ARM::ArchKind Arch) {}
void ARMTargetStreamer::emitFPU(unsigned FPU) {}
void ARMTargetStreamer::finishAttributeSection() {}
-void
-ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
+void ARMTargetStreamer::annotateTLSDescriptorSequence(
+ const MCSymbolRefExpr *SRE) {}
void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
+void ARMTargetStreamer::emitARMWinCFIAllocStack(unsigned Size, bool Wide) {}
+void ARMTargetStreamer::emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) {}
+void ARMTargetStreamer::emitARMWinCFISaveSP(unsigned Reg) {}
+void ARMTargetStreamer::emitARMWinCFISaveFRegs(unsigned First, unsigned Last) {}
+void ARMTargetStreamer::emitARMWinCFISaveLR(unsigned Offset) {}
+void ARMTargetStreamer::emitARMWinCFINop(bool Wide) {}
+void ARMTargetStreamer::emitARMWinCFIPrologEnd(bool Fragment) {}
+void ARMTargetStreamer::emitARMWinCFIEpilogStart(unsigned Condition) {}
+void ARMTargetStreamer::emitARMWinCFIEpilogEnd() {}
+void ARMTargetStreamer::emitARMWinCFICustom(unsigned Opcode) {}
+
static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) {
if (STI.getCPU() == "xscale")
return ARMBuildAttrs::v5TEJ;
- if (STI.hasFeature(ARM::HasV8Ops)) {
+ if (STI.hasFeature(ARM::HasV9_0aOps))
+ return ARMBuildAttrs::v9_A;
+ else if (STI.hasFeature(ARM::HasV8Ops)) {
if (STI.hasFeature(ARM::FeatureRClass))
return ARMBuildAttrs::v8_R;
return ARMBuildAttrs::v8_A;
@@ -305,3 +318,13 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
emitAttribute(ARMBuildAttrs::BTI_extension, ARMBuildAttrs::AllowBTI);
}
}
+
+MCTargetStreamer *
+llvm::createARMObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ const Triple &TT = STI.getTargetTriple();
+ if (TT.isOSBinFormatELF())
+ return createARMObjectTargetELFStreamer(S);
+ if (TT.isOSBinFormatCOFF())
+ return createARMObjectTargetWinCOFFStreamer(S);
+ return new ARMTargetStreamer(S);
+}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index e6f649164a29..cdd7f6fb715a 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -8,30 +8,59 @@
#include "ARMMCTargetDesc.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCWin64EH.h"
#include "llvm/MC/MCWinCOFFStreamer.h"
using namespace llvm;
namespace {
class ARMWinCOFFStreamer : public MCWinCOFFStreamer {
+ Win64EH::ARMUnwindEmitter EHStreamer;
+
public:
ARMWinCOFFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> AB,
std::unique_ptr<MCCodeEmitter> CE,
std::unique_ptr<MCObjectWriter> OW)
: MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {}
+ void emitWinEHHandlerData(SMLoc Loc) override;
+ void emitWindowsUnwindTables() override;
+ void emitWindowsUnwindTables(WinEH::FrameInfo *Frame) override;
+
void emitThumbFunc(MCSymbol *Symbol) override;
void finishImpl() override;
};
+void ARMWinCOFFStreamer::emitWinEHHandlerData(SMLoc Loc) {
+ MCStreamer::emitWinEHHandlerData(Loc);
+
+ // We have to emit the unwind info now, because this directive
+ // actually switches to the .xdata section!
+ EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo(),
+ /* HandlerData = */ true);
+}
+
+void ARMWinCOFFStreamer::emitWindowsUnwindTables(WinEH::FrameInfo *Frame) {
+ EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false);
+}
+
+void ARMWinCOFFStreamer::emitWindowsUnwindTables() {
+ if (!getNumWinFrameInfos())
+ return;
+ EHStreamer.Emit(*this);
+}
+
void ARMWinCOFFStreamer::emitThumbFunc(MCSymbol *Symbol) {
getAssembler().setIsThumbFunc(Symbol);
}
void ARMWinCOFFStreamer::finishImpl() {
emitFrames(nullptr);
+ emitWindowsUnwindTables();
MCWinCOFFStreamer::finishImpl();
}
@@ -48,3 +77,201 @@ MCStreamer *llvm::createARMWinCOFFStreamer(
return S;
}
+namespace {
+class ARMTargetWinCOFFStreamer : public llvm::ARMTargetStreamer {
+private:
+ // True if we are processing SEH directives in an epilogue.
+ bool InEpilogCFI = false;
+
+ // Symbol of the current epilog for which we are processing SEH directives.
+ MCSymbol *CurrentEpilog = nullptr;
+
+public:
+ ARMTargetWinCOFFStreamer(llvm::MCStreamer &S) : ARMTargetStreamer(S) {}
+
+ // The unwind codes on ARM Windows are documented at
+ // https://docs.microsoft.com/en-us/cpp/build/arm-exception-handling
+ void emitARMWinCFIAllocStack(unsigned Size, bool Wide) override;
+ void emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) override;
+ void emitARMWinCFISaveSP(unsigned Reg) override;
+ void emitARMWinCFISaveFRegs(unsigned First, unsigned Last) override;
+ void emitARMWinCFISaveLR(unsigned Offset) override;
+ void emitARMWinCFIPrologEnd(bool Fragment) override;
+ void emitARMWinCFINop(bool Wide) override;
+ void emitARMWinCFIEpilogStart(unsigned Condition) override;
+ void emitARMWinCFIEpilogEnd() override;
+ void emitARMWinCFICustom(unsigned Opcode) override;
+
+private:
+ void emitARMWinUnwindCode(unsigned UnwindCode, int Reg, int Offset);
+};
+
+// Helper function to common out unwind code setup for those codes that can
+// belong to both prolog and epilog.
+void ARMTargetWinCOFFStreamer::emitARMWinUnwindCode(unsigned UnwindCode,
+ int Reg, int Offset) {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+ MCSymbol *Label = S.emitCFILabel();
+ auto Inst = WinEH::Instruction(UnwindCode, Label, Reg, Offset);
+ if (InEpilogCFI)
+ CurFrame->EpilogMap[CurrentEpilog].Instructions.push_back(Inst);
+ else
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFIAllocStack(unsigned Size,
+ bool Wide) {
+ unsigned Op = Win64EH::UOP_AllocSmall;
+ if (!Wide) {
+ if (Size / 4 > 0xffff)
+ Op = Win64EH::UOP_AllocHuge;
+ else if (Size / 4 > 0x7f)
+ Op = Win64EH::UOP_AllocLarge;
+ } else {
+ Op = Win64EH::UOP_WideAllocMedium;
+ if (Size / 4 > 0xffff)
+ Op = Win64EH::UOP_WideAllocHuge;
+ else if (Size / 4 > 0x3ff)
+ Op = Win64EH::UOP_WideAllocLarge;
+ }
+ emitARMWinUnwindCode(Op, -1, Size);
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFISaveRegMask(unsigned Mask,
+ bool Wide) {
+ assert(Mask != 0);
+ int Lr = (Mask & 0x4000) ? 1 : 0;
+ Mask &= ~0x4000;
+ if (Wide)
+ assert((Mask & ~0x1fff) == 0);
+ else
+ assert((Mask & ~0x00ff) == 0);
+ if (Mask && ((Mask + (1 << 4)) & Mask) == 0) {
+ if (Wide && (Mask & 0x1000) == 0 && (Mask & 0xff) == 0xf0) {
+ // One continuous range from r4 to r8-r11
+ for (int I = 11; I >= 8; I--) {
+ if (Mask & (1 << I)) {
+ emitARMWinUnwindCode(Win64EH::UOP_WideSaveRegsR4R11LR, I, Lr);
+ return;
+ }
+ }
+ // If it actually was from r4 to r4-r7, continue below.
+ } else if (!Wide) {
+ // One continuous range from r4 to r4-r7
+ for (int I = 7; I >= 4; I--) {
+ if (Mask & (1 << I)) {
+ emitARMWinUnwindCode(Win64EH::UOP_SaveRegsR4R7LR, I, Lr);
+ return;
+ }
+ }
+ llvm_unreachable("logic error");
+ }
+ }
+ Mask |= Lr << 14;
+ if (Wide)
+ emitARMWinUnwindCode(Win64EH::UOP_WideSaveRegMask, Mask, 0);
+ else
+ emitARMWinUnwindCode(Win64EH::UOP_SaveRegMask, Mask, 0);
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFISaveSP(unsigned Reg) {
+ emitARMWinUnwindCode(Win64EH::UOP_SaveSP, Reg, 0);
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFISaveFRegs(unsigned First,
+ unsigned Last) {
+ assert(First <= Last);
+ assert(First >= 16 || Last < 16);
+ assert(First <= 31 && Last <= 31);
+ if (First == 8)
+ emitARMWinUnwindCode(Win64EH::UOP_SaveFRegD8D15, Last, 0);
+ else if (First <= 15)
+ emitARMWinUnwindCode(Win64EH::UOP_SaveFRegD0D15, First, Last);
+ else
+ emitARMWinUnwindCode(Win64EH::UOP_SaveFRegD16D31, First, Last);
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFISaveLR(unsigned Offset) {
+ emitARMWinUnwindCode(Win64EH::UOP_SaveLR, 0, Offset);
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFINop(bool Wide) {
+ if (Wide)
+ emitARMWinUnwindCode(Win64EH::UOP_WideNop, -1, 0);
+ else
+ emitARMWinUnwindCode(Win64EH::UOP_Nop, -1, 0);
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFIPrologEnd(bool Fragment) {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+
+ MCSymbol *Label = S.emitCFILabel();
+ CurFrame->PrologEnd = Label;
+ WinEH::Instruction Inst =
+ WinEH::Instruction(Win64EH::UOP_End, /*Label=*/nullptr, -1, 0);
+ auto it = CurFrame->Instructions.begin();
+ CurFrame->Instructions.insert(it, Inst);
+ CurFrame->Fragment = Fragment;
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFIEpilogStart(unsigned Condition) {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+
+ InEpilogCFI = true;
+ CurrentEpilog = S.emitCFILabel();
+ CurFrame->EpilogMap[CurrentEpilog].Condition = Condition;
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFIEpilogEnd() {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+
+ if (!CurrentEpilog) {
+ S.getContext().reportError(SMLoc(), "Stray .seh_endepilogue in " +
+ CurFrame->Function->getName());
+ return;
+ }
+
+ std::vector<WinEH::Instruction> &Epilog =
+ CurFrame->EpilogMap[CurrentEpilog].Instructions;
+
+ unsigned UnwindCode = Win64EH::UOP_End;
+ if (!Epilog.empty()) {
+ WinEH::Instruction EndInstr = Epilog.back();
+ if (EndInstr.Operation == Win64EH::UOP_Nop) {
+ UnwindCode = Win64EH::UOP_EndNop;
+ Epilog.pop_back();
+ } else if (EndInstr.Operation == Win64EH::UOP_WideNop) {
+ UnwindCode = Win64EH::UOP_WideEndNop;
+ Epilog.pop_back();
+ }
+ }
+
+ InEpilogCFI = false;
+ WinEH::Instruction Inst = WinEH::Instruction(UnwindCode, nullptr, -1, 0);
+ CurFrame->EpilogMap[CurrentEpilog].Instructions.push_back(Inst);
+ MCSymbol *Label = S.emitCFILabel();
+ CurFrame->EpilogMap[CurrentEpilog].End = Label;
+ CurrentEpilog = nullptr;
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFICustom(unsigned Opcode) {
+ emitARMWinUnwindCode(Win64EH::UOP_Custom, 0, Opcode);
+}
+
+} // end anonymous namespace
+
+MCTargetStreamer *llvm::createARMObjectTargetWinCOFFStreamer(MCStreamer &S) {
+ return new ARMTargetWinCOFFStreamer(S);
+}
diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
index cfd275bc0621..30785340ef12 100644
--- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -145,7 +145,8 @@ private:
// Optimise the base and offsets of the given address
bool optimiseAddress(Value *Address, BasicBlock *BB, LoopInfo *LI);
// Try to fold consecutive geps together into one
- Value *foldGEP(GetElementPtrInst *GEP, Value *&Offsets, IRBuilder<> &Builder);
+ Value *foldGEP(GetElementPtrInst *GEP, Value *&Offsets, unsigned &Scale,
+ IRBuilder<> &Builder);
// Check whether these offsets could be moved out of the loop they're in
bool optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI);
// Pushes the given add out of the loop
@@ -390,7 +391,7 @@ MVEGatherScatterLowering::getVarAndConst(Value *Inst, int TypeScale) {
return ReturnFalse;
// Check that the constant is small enough for an incrementing gather
- int64_t Immediate = Const.getValue() << TypeScale;
+ int64_t Immediate = *Const << TypeScale;
if (Immediate > 512 || Immediate < -512 || Immediate % 4 != 0)
return ReturnFalse;
@@ -964,7 +965,7 @@ static bool hasAllGatScatUsers(Instruction *I, const DataLayout &DL) {
bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
LoopInfo *LI) {
- LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to optimize\n"
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to optimize: "
<< *Offsets << "\n");
// Optimise the addresses of gathers/scatters by moving invariant
// calculations out of the loop
@@ -1103,8 +1104,8 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
return true;
}
-static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP,
- IRBuilder<> &Builder) {
+static Value *CheckAndCreateOffsetAdd(Value *X, unsigned ScaleX, Value *Y,
+ unsigned ScaleY, IRBuilder<> &Builder) {
// Splat the non-vector value to a vector of the given type - if the value is
// a constant (and its value isn't too big), we can even use this opportunity
// to scale it to the size of the vector elements
@@ -1156,40 +1157,49 @@ static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP,
ConstantInt *ConstYEl =
dyn_cast<ConstantInt>(ConstY->getAggregateElement(i));
if (!ConstXEl || !ConstYEl ||
- ConstXEl->getZExtValue() + ConstYEl->getZExtValue() >=
+ ConstXEl->getZExtValue() * ScaleX +
+ ConstYEl->getZExtValue() * ScaleY >=
(unsigned)(1 << (TargetElemSize - 1)))
return nullptr;
}
}
- Value *Add = Builder.CreateAdd(X, Y);
+ Value *XScale = Builder.CreateVectorSplat(
+ XElType->getNumElements(),
+ Builder.getIntN(XElType->getScalarSizeInBits(), ScaleX));
+ Value *YScale = Builder.CreateVectorSplat(
+ YElType->getNumElements(),
+ Builder.getIntN(YElType->getScalarSizeInBits(), ScaleY));
+ Value *Add = Builder.CreateAdd(Builder.CreateMul(X, XScale),
+ Builder.CreateMul(Y, YScale));
- FixedVectorType *GEPType = cast<FixedVectorType>(GEP->getType());
- if (checkOffsetSize(Add, GEPType->getNumElements()))
+ if (checkOffsetSize(Add, XElType->getNumElements()))
return Add;
else
return nullptr;
}
Value *MVEGatherScatterLowering::foldGEP(GetElementPtrInst *GEP,
- Value *&Offsets,
+ Value *&Offsets, unsigned &Scale,
IRBuilder<> &Builder) {
Value *GEPPtr = GEP->getPointerOperand();
Offsets = GEP->getOperand(1);
+ Scale = DL->getTypeAllocSize(GEP->getSourceElementType());
// We only merge geps with constant offsets, because only for those
// we can make sure that we do not cause an overflow
- if (!isa<Constant>(Offsets))
+ if (GEP->getNumIndices() != 1 || !isa<Constant>(Offsets))
return nullptr;
- GetElementPtrInst *BaseGEP;
- if ((BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr))) {
+ if (GetElementPtrInst *BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr)) {
// Merge the two geps into one
- Value *BaseBasePtr = foldGEP(BaseGEP, Offsets, Builder);
+ Value *BaseBasePtr = foldGEP(BaseGEP, Offsets, Scale, Builder);
if (!BaseBasePtr)
return nullptr;
- Offsets =
- CheckAndCreateOffsetAdd(Offsets, GEP->getOperand(1), GEP, Builder);
+ Offsets = CheckAndCreateOffsetAdd(
+ Offsets, Scale, GEP->getOperand(1),
+ DL->getTypeAllocSize(GEP->getSourceElementType()), Builder);
if (Offsets == nullptr)
return nullptr;
+ Scale = 1; // Scale is always an i8 at this point.
return BaseBasePtr;
}
return GEPPtr;
@@ -1206,15 +1216,24 @@ bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB,
Builder.SetInsertPoint(GEP);
Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
Value *Offsets;
- Value *Base = foldGEP(GEP, Offsets, Builder);
+ unsigned Scale;
+ Value *Base = foldGEP(GEP, Offsets, Scale, Builder);
// We only want to merge the geps if there is a real chance that they can be
// used by an MVE gather; thus the offset has to have the correct size
// (always i32 if it is not of vector type) and the base has to be a
// pointer.
if (Offsets && Base && Base != GEP) {
+ assert(Scale == 1 && "Expected to fold GEP to a scale of 1");
+ Type *BaseTy = Builder.getInt8PtrTy();
+ if (auto *VecTy = dyn_cast<FixedVectorType>(Base->getType()))
+ BaseTy = FixedVectorType::get(BaseTy, VecTy);
GetElementPtrInst *NewAddress = GetElementPtrInst::Create(
- GEP->getSourceElementType(), Base, Offsets, "gep.merged", GEP);
- GEP->replaceAllUsesWith(NewAddress);
+ Builder.getInt8Ty(), Builder.CreateBitCast(Base, BaseTy), Offsets,
+ "gep.merged", GEP);
+ LLVM_DEBUG(dbgs() << "Folded GEP: " << *GEP
+ << "\n new : " << *NewAddress << "\n");
+ GEP->replaceAllUsesWith(
+ Builder.CreateBitCast(NewAddress, GEP->getType()));
GEP = NewAddress;
Changed = true;
}
diff --git a/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp b/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp
index 538bd10685b0..3e76efb5133f 100644
--- a/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp
+++ b/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp
@@ -45,6 +45,7 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -176,9 +177,8 @@ static bool tryInterleave(Instruction *Start,
// Truncs
case Instruction::Trunc:
case Instruction::FPTrunc:
- if (Truncs.count(I))
+ if (!Truncs.insert(I))
continue;
- Truncs.insert(I);
Visited.insert(I);
break;
@@ -235,9 +235,8 @@ static bool tryInterleave(Instruction *Start,
case Instruction::FAdd:
case Instruction::FMul:
case Instruction::Select:
- if (Ops.count(I))
+ if (!Ops.insert(I))
continue;
- Ops.insert(I);
for (Use &Op : I->operands()) {
if (!isa<FixedVectorType>(Op->getType()))
diff --git a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
index 7e31ea77f4f5..6bad9d61238e 100644
--- a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
+++ b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
@@ -404,6 +404,17 @@ bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
LoopPhi->getOperand(3).setReg(DecReg);
}
+ SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
+ if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) {
+ // If the LoopEnd falls through, need to insert a t2B to the fall-through
+ // block so that the non-analyzable t2LoopEndDec doesn't fall through.
+ MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator();
+ BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B))
+ .addMBB(&*MBBI)
+ .add(predOps(ARMCC::AL));
+ }
+
// Replace the loop dec and loop end as a single instruction.
MachineInstrBuilder MI =
BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
@@ -1041,8 +1052,7 @@ bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
}
bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
- const ARMSubtarget &STI =
- static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
if (!STI.isThumb2() || !STI.hasLOB())
return false;
diff --git a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
index c7f451cba14f..d6d43b9143d6 100644
--- a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
+++ b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
@@ -312,8 +312,7 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
}
bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
- const ARMSubtarget &STI =
- static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
return false;
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 71a82a1e3271..df64710712cc 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -176,7 +176,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
// Determine the sizes of each callee-save spill areas and record which frame
// belongs to which callee-save spill areas.
- unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
int FramePtrSpillFI = 0;
if (ArgRegsSaveSize) {
@@ -205,26 +205,38 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
return;
}
+ bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(FramePtr);
+
for (const CalleeSavedInfo &I : CSI) {
Register Reg = I.getReg();
int FI = I.getFrameIdx();
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
switch (Reg) {
+ case ARM::R11:
+ if (HasFrameRecordArea) {
+ FRSize += 4;
+ break;
+ }
+ LLVM_FALLTHROUGH;
case ARM::R8:
case ARM::R9:
case ARM::R10:
- case ARM::R11:
if (STI.splitFramePushPop(MF)) {
GPRCS2Size += 4;
break;
}
LLVM_FALLTHROUGH;
+ case ARM::LR:
+ if (HasFrameRecordArea) {
+ FRSize += 4;
+ break;
+ }
+ LLVM_FALLTHROUGH;
case ARM::R4:
case ARM::R5:
case ARM::R6:
case ARM::R7:
- case ARM::LR:
- if (Reg == FramePtr)
- FramePtrSpillFI = FI;
GPRCS1Size += 4;
break;
default:
@@ -232,18 +244,53 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
}
}
+ MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push;
+ if (HasFrameRecordArea) {
+ // Skip Frame Record setup:
+ // push {lr}
+ // mov lr, r11
+ // push {lr}
+ std::advance(MBBI, 2);
+ FRPush = MBBI++;
+ }
+
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+ GPRCS1Push = MBBI;
++MBBI;
}
+ // Find last push instruction for GPRCS2 - spilling of high registers
+ // (r8-r11) could consist of multiple tPUSH and tMOVr instructions.
+ while (true) {
+ MachineBasicBlock::iterator OldMBBI = MBBI;
+ // Skip a run of tMOVr instructions
+ while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr &&
+ MBBI->getFlag(MachineInstr::FrameSetup))
+ MBBI++;
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH &&
+ MBBI->getFlag(MachineInstr::FrameSetup)) {
+ GPRCS2Push = MBBI;
+ MBBI++;
+ } else {
+ // We have reached an instruction which is not a push, so the previous
+ // run of tMOVr instructions (which may have been empty) was not part of
+ // the prologue. Reset MBBI back to the last PUSH of the prologue.
+ MBBI = OldMBBI;
+ break;
+ }
+ }
+
// Determine starting offsets of spill areas.
- unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize -
+ (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
bool HasFP = hasFP(MF);
if (HasFP)
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
+ if (HasFrameRecordArea)
+ AFI->setFrameRecordSavedAreaSize(FRSize);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
@@ -252,71 +299,45 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
int FramePtrOffsetInBlock = 0;
unsigned adjustedGPRCS1Size = GPRCS1Size;
if (GPRCS1Size > 0 && GPRCS2Size == 0 &&
- tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) {
+ tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) {
FramePtrOffsetInBlock = NumBytes;
adjustedGPRCS1Size += NumBytes;
NumBytes = 0;
}
-
- if (adjustedGPRCS1Size) {
- CFAOffset += adjustedGPRCS1Size;
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- }
- for (const CalleeSavedInfo &I : CSI) {
- Register Reg = I.getReg();
- int FI = I.getFrameIdx();
- switch (Reg) {
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R11:
- case ARM::R12:
- if (STI.splitFramePushPop(MF))
- break;
- LLVM_FALLTHROUGH;
- case ARM::R0:
- case ARM::R1:
- case ARM::R2:
- case ARM::R3:
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::LR:
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- break;
- }
- }
+ CFAOffset += adjustedGPRCS1Size;
// Adjust FP so it point to the stack slot that contains the previous FP.
if (HasFP) {
- FramePtrOffsetInBlock +=
- MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
- .addReg(ARM::SP)
- .addImm(FramePtrOffsetInBlock / 4)
- .setMIFlags(MachineInstr::FrameSetup)
- .add(predOps(ARMCC::AL));
+ MachineBasicBlock::iterator AfterPush =
+ HasFrameRecordArea ? std::next(FRPush) : std::next(GPRCS1Push);
+ if (HasFrameRecordArea) {
+ // We have just finished pushing the previous FP into the stack,
+ // so simply capture the SP value as the new Frame Pointer.
+ BuildMI(MBB, AfterPush, dl, TII.get(ARM::tMOVr), FramePtr)
+ .addReg(ARM::SP)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
+ } else {
+ FramePtrOffsetInBlock +=
+ MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
+ BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ .addReg(ARM::SP)
+ .addImm(FramePtrOffsetInBlock / 4)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
+ }
+
if(FramePtrOffsetInBlock) {
- CFAOffset -= FramePtrOffsetInBlock;
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
- nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ nullptr, MRI->getDwarfRegNum(FramePtr, true), (CFAOffset - FramePtrOffsetInBlock)));
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
} else {
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
nullptr, MRI->getDwarfRegNum(FramePtr, true)));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
}
@@ -326,45 +347,69 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
AFI->setShouldRestoreSPFromFP(true);
}
- // Skip past the spilling of r8-r11, which could consist of multiple tPUSH
- // and tMOVr instructions. We don't need to add any call frame information
- // in-between these instructions, because they do not modify the high
- // registers.
- while (true) {
- MachineBasicBlock::iterator OldMBBI = MBBI;
- // Skip a run of tMOVr instructions
- while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr)
- MBBI++;
- if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
- MBBI++;
- } else {
- // We have reached an instruction which is not a push, so the previous
- // run of tMOVr instructions (which may have been empty) was not part of
- // the prologue. Reset MBBI back to the last PUSH of the prologue.
- MBBI = OldMBBI;
- break;
+ // Emit call frame information for the callee-saved low registers.
+ if (GPRCS1Size > 0) {
+ MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
+ if (adjustedGPRCS1Size) {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
+ BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ for (const CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ int FI = I.getFrameIdx();
+ switch (Reg) {
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ case ARM::R12:
+ if (STI.splitFramePushPop(MF))
+ break;
+ LLVM_FALLTHROUGH;
+ case ARM::R0:
+ case ARM::R1:
+ case ARM::R2:
+ case ARM::R3:
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
+ BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ }
}
}
// Emit call frame information for the callee-saved high registers.
- for (auto &I : CSI) {
- Register Reg = I.getReg();
- int FI = I.getFrameIdx();
- switch (Reg) {
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R11:
- case ARM::R12: {
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- break;
- }
- default:
- break;
+ if (GPRCS2Size > 0) {
+ MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
+ for (auto &I : CSI) {
+ Register Reg = I.getReg();
+ int FI = I.getFrameIdx();
+ switch (Reg) {
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ case ARM::R12: {
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
+ BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ }
+ default:
+ break;
+ }
}
}
@@ -453,21 +498,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs);
}
-static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) {
- if (MI.getOpcode() == ARM::tLDRspi && MI.getOperand(1).isFI() &&
- isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs))
- return true;
- else if (MI.getOpcode() == ARM::tPOP) {
- return true;
- } else if (MI.getOpcode() == ARM::tMOVr) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) &&
- ARM::hGPRRegClass.contains(Dst));
- }
- return false;
-}
-
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
@@ -483,26 +513,26 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
int NumBytes = (int)MFI.getStackSize();
assert((unsigned)NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
Register FramePtr = RegInfo->getFrameRegister(MF);
if (!AFI->hasStackFrame()) {
if (NumBytes - ArgRegsSaveSize != 0)
emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
NumBytes - ArgRegsSaveSize, ARM::NoRegister,
- MachineInstr::NoFlags);
+ MachineInstr::FrameDestroy);
} else {
// Unwind MBBI to point to first LDR / VLDRD.
if (MBBI != MBB.begin()) {
do
--MBBI;
- while (MBBI != MBB.begin() && isCSRestore(*MBBI, CSRegs));
- if (!isCSRestore(*MBBI, CSRegs))
+ while (MBBI != MBB.begin() && MBBI->getFlag(MachineInstr::FrameDestroy));
+ if (!MBBI->getFlag(MachineInstr::FrameDestroy))
++MBBI;
}
// Move SP to start of FP callee save spill area.
- NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ NumBytes -= (AFI->getFrameRecordSavedAreaSize() +
+ AFI->getGPRCalleeSavedArea1Size() +
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize() +
ArgRegsSaveSize);
@@ -516,14 +546,16 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
"No scratch register to restore SP from FP!");
emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
- TII, *RegInfo);
+ TII, *RegInfo, MachineInstr::FrameDestroy);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
.addReg(ARM::R4)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
} else
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
.addReg(FramePtr)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
} else {
// For a large stack frame, we might need a scratch register to store
// the size of the frame. We know all callee-save registers are free
@@ -542,10 +574,10 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes))
emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes,
- ScratchRegister, MachineInstr::NoFlags);
+ ScratchRegister, MachineInstr::FrameDestroy);
} else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes,
- ScratchRegister, MachineInstr::NoFlags);
+ ScratchRegister, MachineInstr::FrameDestroy);
}
}
@@ -637,7 +669,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
return true;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
// Copy implicit ops and popped registers, if any.
for (auto MO: MBBI->operands())
if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
@@ -725,18 +758,20 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
.addReg(PopReg, RegState::Define)
.addReg(ARM::SP)
.addImm(MBBI->getNumExplicitOperands() - 2)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
// Move from the temporary register to the LR.
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(ARM::LR, RegState::Define)
.addReg(PopReg, RegState::Kill)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
// Advance past the pop instruction.
MBBI++;
// Increment the SP.
emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
ArgRegsSaveSize + 4, ARM::NoRegister,
- MachineInstr::NoFlags);
+ MachineInstr::FrameDestroy);
return true;
}
@@ -746,7 +781,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(TemporaryReg, RegState::Define)
.addReg(PopReg, RegState::Kill)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
}
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
@@ -754,7 +790,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
// perform the opposite conversion: tPOP_RET to tPOP.
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
bool Popped = false;
for (auto MO: MBBI->operands())
if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
@@ -769,90 +806,82 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
// Erase the old instruction.
MBB.erase(MBBI);
MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
}
assert(PopReg && "Do not know how to get LR");
BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))
.add(predOps(ARMCC::AL))
- .addReg(PopReg, RegState::Define);
+ .addReg(PopReg, RegState::Define)
+ .setMIFlag(MachineInstr::FrameDestroy);
emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize,
- ARM::NoRegister, MachineInstr::NoFlags);
+ ARM::NoRegister, MachineInstr::FrameDestroy);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(ARM::LR, RegState::Define)
.addReg(PopReg, RegState::Kill)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
if (TemporaryReg)
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(PopReg, RegState::Define)
.addReg(TemporaryReg, RegState::Kill)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
return true;
}
-using ARMRegSet = std::bitset<ARM::NUM_TARGET_REGS>;
-
-// Return the first iteraror after CurrentReg which is present in EnabledRegs,
-// or OrderEnd if no further registers are in that set. This does not advance
-// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs.
-static const unsigned *findNextOrderedReg(const unsigned *CurrentReg,
- const ARMRegSet &EnabledRegs,
- const unsigned *OrderEnd) {
- while (CurrentReg != OrderEnd && !EnabledRegs[*CurrentReg])
- ++CurrentReg;
- return CurrentReg;
-}
-
-bool Thumb1FrameLowering::spillCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL;
- const TargetInstrInfo &TII = *STI.getInstrInfo();
- MachineFunction &MF = *MBB.getParent();
- const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
-
- ARMRegSet LoRegsToSave; // r0-r7, lr
- ARMRegSet HiRegsToSave; // r8-r11
- ARMRegSet CopyRegs; // Registers which can be used after pushing
- // LoRegs for saving HiRegs.
-
- for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
- Register Reg = I.getReg();
-
+static const SmallVector<Register> OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6,
+ ARM::R7, ARM::LR};
+static const SmallVector<Register> OrderedHighRegs = {ARM::R8, ARM::R9,
+ ARM::R10, ARM::R11};
+static const SmallVector<Register> OrderedCopyRegs = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4,
+ ARM::R5, ARM::R6, ARM::R7, ARM::LR};
+
+static void splitLowAndHighRegs(const std::set<Register> &Regs,
+ std::set<Register> &LowRegs,
+ std::set<Register> &HighRegs) {
+ for (Register Reg : Regs) {
if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
- LoRegsToSave[Reg] = true;
+ LowRegs.insert(Reg);
} else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
- HiRegsToSave[Reg] = true;
+ HighRegs.insert(Reg);
} else {
llvm_unreachable("callee-saved register of unexpected class");
}
-
- if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) &&
- !MF.getRegInfo().isLiveIn(Reg) &&
- !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
- CopyRegs[Reg] = true;
}
+}
- // Unused argument registers can be used for the high register saving.
- for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
- if (!MF.getRegInfo().isLiveIn(ArgReg))
- CopyRegs[ArgReg] = true;
+template <typename It>
+It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt,
+ const std::set<Register> &RegSet) {
+ return std::find_if(OrderedStartIt, OrderedEndIt,
+ [&](Register Reg) { return RegSet.count(Reg); });
+}
- // Push the low registers and lr
+static void pushRegsToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const TargetInstrInfo &TII,
+ const std::set<Register> &RegsToSave,
+ const std::set<Register> &CopyRegs) {
+ MachineFunction &MF = *MBB.getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- if (!LoRegsToSave.none()) {
+ DebugLoc DL;
+
+ std::set<Register> LowRegs, HighRegs;
+ splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs);
+
+ // Push low regs first
+ if (!LowRegs.empty()) {
MachineInstrBuilder MIB =
BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
- for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
- if (LoRegsToSave[Reg]) {
+ for (unsigned Reg : OrderedLowRegs) {
+ if (LowRegs.count(Reg)) {
bool isKill = !MRI.isLiveIn(Reg);
if (isKill && !MRI.isReserved(Reg))
MBB.addLiveIn(Reg);
@@ -863,31 +892,26 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
MIB.setMIFlags(MachineInstr::FrameSetup);
}
- // Push the high registers. There are no store instructions that can access
- // these registers directly, so we have to move them to low registers, and
- // push them. This might take multiple pushes, as it is possible for there to
+ // Now push the high registers
+ // There are no store instructions that can access high registers directly,
+ // so we have to move them to low registers, and push them.
+ // This might take multiple pushes, as it is possible for there to
// be fewer low registers available than high registers which need saving.
- // These are in reverse order so that in the case where we need to use
+ // Find the first register to save.
+ // Registers must be processed in reverse order so that in case we need to use
// multiple PUSH instructions, the order of the registers on the stack still
// matches the unwind info. They need to be swicthed back to ascending order
// before adding to the PUSH instruction.
- static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6,
- ARM::R5, ARM::R4, ARM::R3,
- ARM::R2, ARM::R1, ARM::R0};
- static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8};
+ auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(),
+ OrderedHighRegs.rend(),
+ HighRegs);
- const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
- const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
-
- // Find the first register to save.
- const unsigned *HiRegToSave = findNextOrderedReg(
- std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd);
-
- while (HiRegToSave != AllHighRegsEnd) {
+ while (HiRegToSave != OrderedHighRegs.rend()) {
// Find the first low register to use.
- const unsigned *CopyReg =
- findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
+ auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(),
+ OrderedCopyRegs.rend(),
+ CopyRegs);
// Create the PUSH, but don't insert it yet (the MOVs need to come first).
MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH))
@@ -895,25 +919,29 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
.setMIFlags(MachineInstr::FrameSetup);
SmallVector<unsigned, 4> RegsToPush;
- while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
- if (HiRegsToSave[*HiRegToSave]) {
+ while (HiRegToSave != OrderedHighRegs.rend() &&
+ CopyRegIt != OrderedCopyRegs.rend()) {
+ if (HighRegs.count(*HiRegToSave)) {
bool isKill = !MRI.isLiveIn(*HiRegToSave);
if (isKill && !MRI.isReserved(*HiRegToSave))
MBB.addLiveIn(*HiRegToSave);
// Emit a MOV from the high reg to the low reg.
BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
- .addReg(*CopyReg, RegState::Define)
+ .addReg(*CopyRegIt, RegState::Define)
.addReg(*HiRegToSave, getKillRegState(isKill))
.add(predOps(ARMCC::AL))
.setMIFlags(MachineInstr::FrameSetup);
// Record the register that must be added to the PUSH.
- RegsToPush.push_back(*CopyReg);
-
- CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
- HiRegToSave =
- findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd);
+ RegsToPush.push_back(*CopyRegIt);
+
+ CopyRegIt = getNextOrderedReg(std::next(CopyRegIt),
+ OrderedCopyRegs.rend(),
+ CopyRegs);
+ HiRegToSave = getNextOrderedReg(std::next(HiRegToSave),
+ OrderedHighRegs.rend(),
+ HighRegs);
}
}
@@ -924,84 +952,63 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
// Insert the PUSH instruction after the MOVs.
MBB.insert(MI, PushMIB);
}
-
- return true;
}
-bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
+static void popRegsFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MI,
+ const TargetInstrInfo &TII,
+ const std::set<Register> &RegsToRestore,
+ const std::set<Register> &AvailableCopyRegs,
+ bool IsVarArg, bool HasV5Ops) {
+ if (RegsToRestore.empty())
+ return;
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
- const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
-
- bool isVarArg = AFI->getArgRegsSaveSize() > 0;
DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
- ARMRegSet LoRegsToRestore;
- ARMRegSet HiRegsToRestore;
- // Low registers (r0-r7) which can be used to restore the high registers.
- ARMRegSet CopyRegs;
+ std::set<Register> LowRegs, HighRegs;
+ splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs);
- for (CalleeSavedInfo I : CSI) {
- Register Reg = I.getReg();
-
- if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
- LoRegsToRestore[Reg] = true;
- } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
- HiRegsToRestore[Reg] = true;
- } else {
- llvm_unreachable("callee-saved register of unexpected class");
- }
-
- // If this is a low register not used as the frame pointer, we may want to
- // use it for restoring the high registers.
- if ((ARM::tGPRRegClass.contains(Reg)) &&
- !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
- CopyRegs[Reg] = true;
- }
-
- // If this is a return block, we may be able to use some unused return value
- // registers for restoring the high regs.
- auto Terminator = MBB.getFirstTerminator();
- if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
- CopyRegs[ARM::R0] = true;
- CopyRegs[ARM::R1] = true;
- CopyRegs[ARM::R2] = true;
- CopyRegs[ARM::R3] = true;
- for (auto Op : Terminator->implicit_operands()) {
- if (Op.isReg())
- CopyRegs[Op.getReg()] = false;
- }
- }
-
- static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R4, ARM::R5, ARM::R6, ARM::R7};
- static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11};
-
- const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
- const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
+ // Pop the high registers first
+ // There are no store instructions that can access high registers directly,
+ // so we have to pop into low registers and them move to the high registers.
+ // This might take multiple pops, as it is possible for there to
+ // be fewer low registers available than high registers which need restoring.
// Find the first register to restore.
- auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs),
- HiRegsToRestore, AllHighRegsEnd);
+ auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(),
+ OrderedHighRegs.end(),
+ HighRegs);
+
+ std::set<Register> CopyRegs = AvailableCopyRegs;
+ Register LowScratchReg;
+ if (!HighRegs.empty() && CopyRegs.empty()) {
+ // No copy regs are available to pop high regs. Let's make use of a return
+ // register and the scratch register (IP/R12) to copy things around.
+ LowScratchReg = ARM::R0;
+ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
+ .addReg(ARM::R12, RegState::Define)
+ .addReg(LowScratchReg, RegState::Kill)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ CopyRegs.insert(LowScratchReg);
+ }
- while (HiRegToRestore != AllHighRegsEnd) {
- assert(!CopyRegs.none());
+ while (HiRegToRestore != OrderedHighRegs.end()) {
+ assert(!CopyRegs.empty());
// Find the first low register to use.
- auto CopyReg =
- findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
+ auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(),
+ OrderedCopyRegs.end(),
+ CopyRegs);
// Create the POP instruction.
- MachineInstrBuilder PopMIB =
- BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
+ MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
- while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
+ while (HiRegToRestore != OrderedHighRegs.end() &&
+ CopyReg != OrderedCopyRegs.end()) {
// Add the low register to the POP.
PopMIB.addReg(*CopyReg, RegState::Define);
@@ -1009,64 +1016,189 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
.addReg(*HiRegToRestore, RegState::Define)
.addReg(*CopyReg, RegState::Kill)
- .add(predOps(ARMCC::AL));
-
- CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
- HiRegToRestore =
- findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd);
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
+
+ CopyReg = getNextOrderedReg(std::next(CopyReg),
+ OrderedCopyRegs.end(),
+ CopyRegs);
+ HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore),
+ OrderedHighRegs.end(),
+ HighRegs);
}
}
- MachineInstrBuilder MIB =
- BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
-
- bool NeedsPop = false;
- for (CalleeSavedInfo &Info : llvm::reverse(CSI)) {
- Register Reg = Info.getReg();
-
- // High registers (excluding lr) have already been dealt with
- if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR))
- continue;
-
- if (Reg == ARM::LR) {
- Info.setRestored(false);
- if (!MBB.succ_empty() ||
- MI->getOpcode() == ARM::TCRETURNdi ||
- MI->getOpcode() == ARM::TCRETURNri)
- // LR may only be popped into PC, as part of return sequence.
- // If this isn't the return sequence, we'll need emitPopSpecialFixUp
- // to restore LR the hard way.
- // FIXME: if we don't pass any stack arguments it would be actually
- // advantageous *and* correct to do the conversion to an ordinary call
- // instruction here.
- continue;
- // Special epilogue for vararg functions. See emitEpilogue
- if (isVarArg)
- continue;
- // ARMv4T requires BX, see emitEpilogue
- if (!STI.hasV5TOps())
- continue;
+ // Restore low register used as scratch if necessary
+ if (LowScratchReg.isValid()) {
+ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
+ .addReg(LowScratchReg, RegState::Define)
+ .addReg(ARM::R12, RegState::Kill)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
- // CMSE entry functions must return via BXNS, see emitEpilogue.
- if (AFI->isCmseNSEntryFunction())
+ // Now pop the low registers
+ if (!LowRegs.empty()) {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
+
+ bool NeedsPop = false;
+ for (Register Reg : OrderedLowRegs) {
+ if (!LowRegs.count(Reg))
continue;
- // Pop LR into PC.
- Reg = ARM::PC;
- (*MIB).setDesc(TII.get(ARM::tPOP_RET));
- if (MI != MBB.end())
- MIB.copyImplicitOps(*MI);
- MI = MBB.erase(MI);
+ if (Reg == ARM::LR) {
+ if (!MBB.succ_empty() ||
+ MI->getOpcode() == ARM::TCRETURNdi ||
+ MI->getOpcode() == ARM::TCRETURNri)
+ // LR may only be popped into PC, as part of return sequence.
+ // If this isn't the return sequence, we'll need emitPopSpecialFixUp
+ // to restore LR the hard way.
+ // FIXME: if we don't pass any stack arguments it would be actually
+ // advantageous *and* correct to do the conversion to an ordinary call
+ // instruction here.
+ continue;
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (IsVarArg)
+ continue;
+ // ARMv4T requires BX, see emitEpilogue
+ if (!HasV5Ops)
+ continue;
+
+ // CMSE entry functions must return via BXNS, see emitEpilogue.
+ if (AFI->isCmseNSEntryFunction())
+ continue;
+
+ // Pop LR into PC.
+ Reg = ARM::PC;
+ (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ if (MI != MBB.end())
+ MIB.copyImplicitOps(*MI);
+ MI = MBB.erase(MI);
+ }
+ MIB.addReg(Reg, getDefRegState(true));
+ NeedsPop = true;
}
- MIB.addReg(Reg, getDefRegState(true));
- NeedsPop = true;
+
+ // It's illegal to emit pop instruction without operands.
+ if (NeedsPop)
+ MBB.insert(MI, &*MIB);
+ else
+ MF.deleteMachineInstr(MIB);
+ }
+}
+
+bool Thumb1FrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ MachineFunction &MF = *MBB.getParent();
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
+ Register FPReg = RegInfo->getFrameRegister(MF);
+
+ // In case FP is a high reg, we need a separate push sequence to generate
+ // a correct Frame Record
+ bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg);
+
+ std::set<Register> FrameRecord;
+ std::set<Register> SpilledGPRs;
+ for (const CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR))
+ FrameRecord.insert(Reg);
+ else
+ SpilledGPRs.insert(Reg);
}
- // It's illegal to emit pop instruction without operands.
- if (NeedsPop)
- MBB.insert(MI, &*MIB);
- else
- MF.deleteMachineInstr(MIB);
+ pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR});
+
+ // Determine intermediate registers which can be used for pushing high regs:
+ // - Spilled low regs
+ // - Unused argument registers
+ std::set<Register> CopyRegs;
+ for (Register Reg : SpilledGPRs)
+ if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) &&
+ !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg))
+ CopyRegs.insert(Reg);
+ for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
+ if (!MF.getRegInfo().isLiveIn(ArgReg))
+ CopyRegs.insert(ArgReg);
+
+ pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs);
+
+ return true;
+}
+
+bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
+ bool IsVarArg = AFI->getArgRegsSaveSize() > 0;
+ Register FPReg = RegInfo->getFrameRegister(MF);
+
+ // In case FP is a high reg, we need a separate pop sequence to generate
+ // a correct Frame Record
+ bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg);
+
+ std::set<Register> FrameRecord;
+ std::set<Register> SpilledGPRs;
+ for (CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR))
+ FrameRecord.insert(Reg);
+ else
+ SpilledGPRs.insert(Reg);
+
+ if (Reg == ARM::LR)
+ I.setRestored(false);
+ }
+
+ // Determine intermidiate registers which can be used for popping high regs:
+ // - Spilled low regs
+ // - Unused return registers
+ std::set<Register> CopyRegs;
+ std::set<Register> UnusedReturnRegs;
+ for (Register Reg : SpilledGPRs)
+ if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg))
+ CopyRegs.insert(Reg);
+ auto Terminator = MBB.getFirstTerminator();
+ if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
+ UnusedReturnRegs.insert(ARM::R0);
+ UnusedReturnRegs.insert(ARM::R1);
+ UnusedReturnRegs.insert(ARM::R2);
+ UnusedReturnRegs.insert(ARM::R3);
+ for (auto Op : Terminator->implicit_operands()) {
+ if (Op.isReg())
+ UnusedReturnRegs.erase(Op.getReg());
+ }
+ }
+ CopyRegs.insert(UnusedReturnRegs.begin(), UnusedReturnRegs.end());
+
+ // First pop regular spilled regs.
+ popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg,
+ STI.hasV5TOps());
+
+ // LR may only be popped into pc, as part of a return sequence.
+ // Check that no other pop instructions are inserted after that.
+ assert((!SpilledGPRs.count(ARM::LR) || FrameRecord.empty()) &&
+ "Can't insert pop after return sequence");
+
+ // Now pop Frame Record regs.
+ // Only unused return registers can be used as copy regs at this point.
+ popRegsFromStack(MBB, MI, TII, FrameRecord, UnusedReturnRegs, IsVarArg,
+ STI.hasV5TOps());
return true;
}
diff --git a/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 5cdaa7f02201..155555152ced 100644
--- a/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -226,9 +226,10 @@ bool Thumb2ITBlock::InsertITInstructions(MachineBasicBlock &MBB) {
ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
unsigned Mask = 0, Pos = 3;
- // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it
+ // IT blocks are limited to one conditional op if -arm-restrict-it
// is set: skip the loop
if (!restrictIT) {
+ LLVM_DEBUG(dbgs() << "Allowing complex IT block\n";);
// Branches, including tricky ones like LDM_RET, need to end an IT
// block so check the instruction we just put in the block.
for (; MBBI != E && Pos &&
@@ -283,8 +284,7 @@ bool Thumb2ITBlock::InsertITInstructions(MachineBasicBlock &MBB) {
}
bool Thumb2ITBlock::runOnMachineFunction(MachineFunction &Fn) {
- const ARMSubtarget &STI =
- static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
if (!STI.isThumb2())
return false;
AFI = Fn.getInfo<ARMFunctionInfo>();
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index ebd139af2219..60dbc7b92013 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -555,7 +555,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
MI.setDesc(TII.get(ARM::tMOVr));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
// Remove offset and remaining explicit predicate operands.
- do MI.RemoveOperand(FrameRegIdx+1);
+ do MI.removeOperand(FrameRegIdx+1);
while (MI.getNumOperands() > FrameRegIdx+1);
MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
MIB.add(predOps(ARMCC::AL));
@@ -592,7 +592,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
// Remove the cc_out operand.
if (HasCCOut)
- MI.RemoveOperand(MI.getNumOperands()-1);
+ MI.removeOperand(MI.getNumOperands()-1);
Offset = 0;
return true;
}
@@ -626,7 +626,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return Offset == 0;
}
- MI.RemoveOperand(FrameRegIdx+1);
+ MI.removeOperand(FrameRegIdx+1);
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0);
NewOpc = immediateOffsetOpcode(Opcode);
AddrMode = ARMII::AddrModeT2_i12;
diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index 1cc5422523f1..7ae4b19afb60 100644
--- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
@@ -205,11 +206,11 @@ namespace {
bool IsSelfLoop);
/// ReduceMI - Attempt to reduce MI, return true on success.
- bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
- bool LiveCPSR, bool IsSelfLoop);
+ bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, bool LiveCPSR,
+ bool IsSelfLoop, bool SkipPrologueEpilogue);
/// ReduceMBB - Reduce width of instructions in the specified basic block.
- bool ReduceMBB(MachineBasicBlock &MBB);
+ bool ReduceMBB(MachineBasicBlock &MBB, bool SkipPrologueEpilogue);
bool OptimizeSize;
bool MinimizeSize;
@@ -620,7 +621,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
- LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
+ LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
<< " to 16-bit: " << *MIB);
MBB.erase_instr(MI);
@@ -668,7 +669,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
- LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
+ LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
<< " to 16-bit: " << *MIB);
MBB.erase_instr(MI);
@@ -848,7 +849,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
- LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
+ LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
<< " to 16-bit: " << *MIB);
MBB.erase_instr(MI);
@@ -971,7 +972,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
- LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
+ LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
<< " to 16-bit: " << *MIB);
MBB.erase_instr(MI);
@@ -1012,11 +1013,15 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
}
bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
- bool LiveCPSR, bool IsSelfLoop) {
+ bool LiveCPSR, bool IsSelfLoop,
+ bool SkipPrologueEpilogue) {
unsigned Opcode = MI->getOpcode();
DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
if (OPI == ReduceOpcodeMap.end())
return false;
+ if (SkipPrologueEpilogue && (MI->getFlag(MachineInstr::FrameSetup) ||
+ MI->getFlag(MachineInstr::FrameDestroy)))
+ return false;
const ReduceEntry &Entry = ReduceTable[OPI->second];
// Don't attempt normal reductions on "special" cases for now.
@@ -1036,7 +1041,8 @@ bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
return false;
}
-bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
+bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB,
+ bool SkipPrologueEpilogue) {
bool Modified = false;
// Yes, CPSR could be livein.
@@ -1080,7 +1086,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Does NextMII belong to the same bundle as MI?
bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
- if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) {
+ if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) {
Modified = true;
MachineBasicBlock::instr_iterator I = std::prev(NextMII);
MI = &*I;
@@ -1130,7 +1136,7 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
if (PredicateFtor && !PredicateFtor(MF.getFunction()))
return false;
- STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ STI = &MF.getSubtarget<ARMSubtarget>();
if (STI->isThumb1Only() || STI->prefers32BitThumb())
return false;
@@ -1147,8 +1153,10 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
// predecessors.
ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
bool Modified = false;
+ bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ MF.getFunction().needsUnwindTableEntry();
for (MachineBasicBlock *MBB : RPOT)
- Modified |= ReduceMBB(*MBB);
+ Modified |= ReduceMBB(*MBB, /*SkipPrologueEpilogue=*/NeedsWinCFI);
return Modified;
}
diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index 5d2bc4ebe191..2a3fa3b31512 100644
--- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -37,7 +37,7 @@ extern cl::opt<bool> ReuseFrameIndexVals;
using namespace llvm;
-ThumbRegisterInfo::ThumbRegisterInfo() {}
+ThumbRegisterInfo::ThumbRegisterInfo() = default;
const TargetRegisterClass *
ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
@@ -338,7 +338,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
static void removeOperands(MachineInstr &MI, unsigned i) {
unsigned Op = i;
for (unsigned e = MI.getNumOperands(); i != e; ++i)
- MI.RemoveOperand(Op);
+ MI.removeOperand(Op);
}
/// convertToNonSPOpcode - Change the opcode to the non-SP version, because
@@ -361,6 +361,7 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II,
const ARMBaseInstrInfo &TII) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
assert(MBB.getParent()->getSubtarget<ARMSubtarget>().isThumb1Only() &&
"This isn't needed for thumb2!");
DebugLoc dl = MI.getDebugLoc();
@@ -396,7 +397,18 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II,
if ((unsigned)Offset <= Mask * Scale) {
// Replace the FrameIndex with the frame register (e.g., sp).
- MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ Register DestReg = FrameReg;
+
+ // In case FrameReg is a high register, move it to a low reg to ensure it
+ // can be used as an operand.
+ if (ARM::hGPRRegClass.contains(FrameReg) && FrameReg != ARM::SP) {
+ DestReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
+ BuildMI(MBB, II, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(FrameReg)
+ .add(predOps(ARMCC::AL));
+ }
+
+ MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false);
ImmOp.ChangeToImmediate(ImmedOffset);
// If we're using a register where sp was stored, convert the instruction
@@ -517,7 +529,16 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset, false, TII, *this);
else {
emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
- UseRR = true;
+ if (!ARM::hGPRRegClass.contains(FrameReg)) {
+ UseRR = true;
+ } else {
+ // If FrameReg is a high register, add the reg values in a separate
+ // instruction as the load won't be able to access it.
+ BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), TmpReg)
+ .addReg(TmpReg)
+ .addReg(FrameReg)
+ .add(predOps(ARMCC::AL));
+ }
}
} else {
emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII,
@@ -526,11 +547,14 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true);
- if (UseRR)
+ if (UseRR) {
+ assert(!ARM::hGPRRegClass.contains(FrameReg) &&
+ "Thumb1 loads can't use high register");
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
false);
+ }
} else if (MI.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
bool UseRR = false;
@@ -541,18 +565,30 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset, false, TII, *this);
else {
emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
- UseRR = true;
+ if (!ARM::hGPRRegClass.contains(FrameReg)) {
+ UseRR = true;
+ } else {
+ // If FrameReg is a high register, add the reg values in a separate
+ // instruction as the load won't be able to access it.
+ BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), VReg)
+ .addReg(VReg)
+ .addReg(FrameReg)
+ .add(predOps(ARMCC::AL));
+ }
}
} else
emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII,
*this);
MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true);
- if (UseRR)
+ if (UseRR) {
+ assert(!ARM::hGPRRegClass.contains(FrameReg) &&
+ "Thumb1 stores can't use high register");
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
false);
+ }
} else {
llvm_unreachable("Unexpected opcode!");
}