diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-07-03 14:10:23 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2022-07-03 14:10:23 +0000 |
commit | 145449b1e420787bb99721a429341fa6be3adfb6 (patch) | |
tree | 1d56ae694a6de602e348dd80165cf881a36600ed /llvm/lib/Target/ARM | |
parent | ecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff) | |
download | src-145449b1e420787bb99721a429341fa6be3adfb6.tar.gz src-145449b1e420787bb99721a429341fa6be3adfb6.zip |
Vendor import of llvm-project main llvmorg-15-init-15358-g53dc0f107877.vendor/llvm-project/llvmorg-15-init-15358-g53dc0f107877
Diffstat (limited to 'llvm/lib/Target/ARM')
64 files changed, 4204 insertions, 2138 deletions
diff --git a/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/llvm/lib/Target/ARM/A15SDOptimizer.cpp index d0efecad63bc..65da95b0fc8d 100644 --- a/llvm/lib/Target/ARM/A15SDOptimizer.cpp +++ b/llvm/lib/Target/ARM/A15SDOptimizer.cpp @@ -361,9 +361,8 @@ void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, MI = Front.pop_back_val(); // If we have already explored this MachineInstr, ignore it. - if (Reached.find(MI) != Reached.end()) + if (!Reached.insert(MI).second) continue; - Reached.insert(MI); if (MI->isPHI()) { for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { Register Reg = MI->getOperand(I).getReg(); diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h index 979371bf7cf6..9990078cfdbb 100644 --- a/llvm/lib/Target/ARM/ARM.h +++ b/llvm/lib/Target/ARM/ARM.h @@ -57,6 +57,7 @@ Pass *createMVEGatherScatterLoweringPass(); FunctionPass *createARMSLSHardeningPass(); FunctionPass *createARMIndirectThunks(); Pass *createMVELaneInterleavingPass(); +FunctionPass *createARMFixCortexA57AES1742098Pass(); void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); @@ -77,6 +78,7 @@ void initializeMVETailPredicationPass(PassRegistry &); void initializeMVEGatherScatterLoweringPass(PassRegistry &); void initializeARMSLSHardeningPass(PassRegistry &); void initializeMVELaneInterleavingPass(PassRegistry &); +void initializeARMFixCortexA57AES1742098Pass(PassRegistry &); } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 27edf69b4abf..48559a89a30a 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -19,9 +19,11 @@ include "llvm/Target/Target.td" // ARM Subtarget state. // -def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", +// True if compiling for Thumb, false for ARM. +def ModeThumb : SubtargetFeature<"thumb-mode", "IsThumb", "true", "Thumb mode">; +// True if we're using software floating point features. def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat", "true", "Use software floating " "point features.">; @@ -48,14 +50,18 @@ def FeatureFPRegs64 : SubtargetFeature<"fpregs64", "HasFPRegs64", "true", "Enable 64-bit FP registers", [FeatureFPRegs]>; +// True if the floating point unit supports double precision. def FeatureFP64 : SubtargetFeature<"fp64", "HasFP64", "true", "Floating point unit supports " "double precision", [FeatureFPRegs64]>; +// True if subtarget has the full 32 double precision FP registers for VFPv3. def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true", "Extend FP to 32 double registers">; +/// Versions of the VFP flags restricted to single precision, or to +/// 16 d-registers, or both. multiclass VFPver<string name, string query, string description, list<SubtargetFeature> prev, list<SubtargetFeature> otherimplies, @@ -100,6 +106,7 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; +// True if subtarget supports half-precision FP conversions. def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision " "floating point">; @@ -110,169 +117,211 @@ defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions", defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP", [FeatureVFP4], []>; +// True if subtarget supports half-precision FP operations. def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", "Enable full half-precision " "floating point", [FeatureFPARMv8_D16_SP, FeatureFPRegs16]>; +// True if subtarget supports half-precision FP fml operations. def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true", "Enable full half-precision " "floating point fml instructions", [FeatureFullFP16]>; +// True if subtarget supports [su]div in Thumb mode. def FeatureHWDivThumb : SubtargetFeature<"hwdiv", - "HasHardwareDivideInThumb", "true", + "HasDivideInThumbMode", "true", "Enable divide instructions in Thumb">; +// True if subtarget supports [su]div in ARM mode. def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", - "HasHardwareDivideInARM", "true", + "HasDivideInARMMode", "true", "Enable divide instructions in ARM mode">; // Atomic Support + +// True if the subtarget supports DMB / DSB data barrier instructions. def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", "Has data barrier (dmb/dsb) instructions">; +// True if the subtarget supports CLREX instructions. def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", "Has v7 clrex instruction">; +// True if the subtarget supports DFB data barrier instruction. def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true", "Has full data barrier (dfb) instruction">; +// True if the subtarget supports v8 atomics (LDA/LDAEX etc) instructions. def FeatureAcquireRelease : SubtargetFeature<"acquire-release", "HasAcquireRelease", "true", "Has v8 acquire/release (lda/ldaex " " etc) instructions">; -def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", +// True if floating point compare + branch is slow. +def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "IsFPBrccSlow", "true", "FP compare + branch is slow">; +// True if the processor supports the Performance Monitor Extensions. These +// include a generic cycle-counter as well as more fine-grained (often +// implementation-specific) events. def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", "Enable support for Performance " "Monitor extensions">; // TrustZone Security Extensions + +// True if processor supports TrustZone security extensions. def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", "Enable support for TrustZone " "security extensions">; +// True if processor supports ARMv8-M Security Extensions. def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", "Enable support for ARMv8-M " "Security Extensions">; +// True if processor supports SHA1 and SHA256. def FeatureSHA2 : SubtargetFeature<"sha2", "HasSHA2", "true", "Enable SHA1 and SHA256 support", [FeatureNEON]>; def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", "Enable AES support", [FeatureNEON]>; +// True if processor supports Cryptography extensions. def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", "Enable support for " "Cryptography extensions", [FeatureNEON, FeatureSHA2, FeatureAES]>; +// True if processor supports CRC instructions. def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable support for CRC instructions">; +// True if the ARMv8.2A dot product instructions are supported. def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true", "Enable support for dot product instructions", [FeatureNEON]>; -// Not to be confused with FeatureHasRetAddrStack (return address stack) +// True if the processor supports RAS extensions. +// Not to be confused with FeatureHasRetAddrStack (return address stack). def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", "Enable Reliability, Availability " "and Serviceability extensions">; -// Fast computation of non-negative address offsets +// Fast computation of non-negative address offsets. +// True if processor does positive address offset computation faster. def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", "Enable fast computation of " "positive address offsets">; -// Fast execution of AES crypto operations +// Fast execution of AES crypto operations. +// True if processor executes back to back AES instruction pairs faster. def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", "CPU fuses AES crypto operations">; -// Fast execution of bottom and top halves of literal generation +// Fast execution of bottom and top halves of literal generation. +// True if processor executes back to back bottom and top halves of literal generation faster. def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true", "CPU fuses literal generation operations">; -// The way of reading thread pointer -def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true", +// The way of reading thread pointer. +// True if read thread pointer from coprocessor register. +def FeatureReadTp : SubtargetFeature<"read-tp-hard", "IsReadTPHard", "true", "Reading thread pointer from register">; // Cyclone can zero VFP registers in 0 cycles. +// True if the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are +// particularly effective at zeroing a VFP register. def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions">; -// Whether it is profitable to unpredicate certain instructions during if-conversion +// Whether it is profitable to unpredicate certain instructions during if-conversion. +// True if if conversion may decide to leave some instructions unpredicated. def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", "IsProfitableToUnpredicate", "true", "Is profitable to unpredicate">; // Some targets (e.g. Swift) have microcoded VGETLNi32. +// True if VMOV will be favored over VGETLNi32. def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", "HasSlowVGETLNi32", "true", "Has slow VGETLNi32 - prefer VMOV">; // Some targets (e.g. Swift) have microcoded VDUP32. +// True if VMOV will be favored over VDUP. def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", "true", "Has slow VDUP32 - prefer VMOV">; // Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON // for scalar FP, as this allows more effective execution domain optimization. +// True if VMOVSR will be favored over VMOVDRR. def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", "true", "Prefer VMOVSR">; // Swift has ISHST barriers compatible with Atomic Release semantics but weaker -// than ISH -def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST", +// than ISH. +// True if ISHST barriers will be used for Release semantics. +def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHSTBarriers", "true", "Prefer ISHST barriers">; // Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU. +// True if the AGU and NEON/FPU units are multiplexed. def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", "true", "Has muxed AGU and NEON/FPU">; // Whether VLDM/VSTM starting with odd register number need more microops -// than single VLDRS -def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister", +// than single VLDRS. +// True if a VLDM/VSTM starting with an odd register number is considered to +// take more microops than single VLDRS/VSTRS. +def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "HasSlowOddRegister", "true", "VLDM/VSTM starting " "with an odd register is slow">; // Some targets have a renaming dependency when loading into D subregisters. +// True if loading into a D subregister will be penalized. def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", - "SlowLoadDSubregister", "true", + "HasSlowLoadDSubregister", "true", "Loading into D subregs is slow">; +// True if use a wider stride when allocating VFP registers. def FeatureUseWideStrideVFP : SubtargetFeature<"wide-stride-vfp", "UseWideStrideVFP", "true", "Use a wide stride when allocating VFP registers">; // Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. +// True if VMOVS will never be widened to VMOVD. def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", "DontWidenVMOVS", "true", "Don't widen VMOVS to VMOVD">; // Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different // VFP register widths. +// True if splat a register between VFP and NEON instructions. def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon", - "SplatVFPToNeon", "true", + "UseSplatVFPToNeon", "true", "Splat register from VFP to NEON", [FeatureDontWidenVMOVS]>; // Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. +// True if run the MLx expansion pass. def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", "ExpandMLx", "true", "Expand VFP/NEON MLA/MLS instructions">; // Some targets have special RAW hazards for VFP/NEON VMLA/VMLS. +// True if VFP/NEON VMLA/VMLS have special RAW hazards. def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", "true", "Has VMLx hazards">; // Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from // VFP to NEON, as an execution domain optimization. +// True if VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs", "true", "Convert VMOVSR, VMOVRS, " @@ -281,18 +330,21 @@ def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", // Some processors benefit from using NEON instructions for scalar // single-precision FP operations. This affects instruction selection and should // only be enabled if the handling of denormals is not important. +// Use the method useNEONForSinglePrecisionFP() to determine if NEON should actually be used. def FeatureNEONForFP : SubtargetFeature<"neonfp", - "UseNEONForSinglePrecisionFP", + "HasNEONForFP", "true", "Use NEON for single precision FP">; // On some processors, VLDn instructions that access unaligned data take one // extra cycle. Take that into account when computing operand latencies. -def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign", +// True if VLDn instructions take an extra cycle for unaligned accesses. +def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAccessAlignment", "true", "Check for VLDn unaligned access">; // Some processors have a nonpipelined VFP coprocessor. +// True if VFP instructions are not pipelined. def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", "NonpipelinedVFP", "true", "VFP instructions are not pipelined">; @@ -300,20 +352,27 @@ def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better // to just not use them. +// If the VFP2 / NEON instructions are available, indicates +// whether the FP VML[AS] instructions are slow (if so, don't use them). def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", "Disable VFP / NEON MAC instructions">; -// VFPv4 added VFMA instructions that can similar be fast or slow. +// VFPv4 added VFMA instructions that can similarly be fast or slow. +// If the VFP4 / NEON instructions are available, indicates +// whether the FP VFM[AS] instructions are slow (if so, don't use them). def FeatureHasSlowFPVFMx : SubtargetFeature<"slowfpvfmx", "SlowFPVFMx", "true", "Disable VFP / NEON FMA instructions">; // Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. +/// True if NEON has special multiplier accumulator +/// forwarding to allow mul + mla being issued back to back. def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", "HasVMLxForwarding", "true", "Has multiplier accumulator forwarding">; // Disable 32-bit to 16-bit narrowing for experimentation. -def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", +// True if codegen would prefer 32-bit Thumb instructions over 16-bit ones. +def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Prefers32BitThumb", "true", "Prefer 32-bit Thumb instrs">; def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopLogAlignment","2", @@ -332,17 +391,22 @@ def FeatureMVEVectorCostFactor4 : SubtargetFeature<"mve4beat", "MVEVectorCostFac /// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is /// mapped to a separate physical register. Avoid partial CPSR update for these /// processors. +/// True if codegen would avoid using instructions +/// that partially update CPSR and add false dependency on the previous +/// CPSR setting instruction. def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; /// Disable +1 predication cost for instructions updating CPSR. /// Enabled for Cortex-A57. +/// True if disable +1 predication cost for instructions updating CPSR. Enabled for Cortex-A57. def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", "CheapPredicableCPSRDef", "true", "Disable +1 predication cost for instructions updating CPSR">; +// True if codegen should avoid using flag setting movs with shifter operand (i.e. asr, lsl, lsr). def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", "AvoidMOVsShifterOperand", "true", "Avoid movs instructions with " @@ -357,16 +421,20 @@ def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", // Some processors have no branch predictor, which changes the expected cost of // taking a branch which affects the choice of whether to use predicated // instructions. +// True if the subtarget has a branch predictor. Having +// a branch predictor or not changes the expected cost of taking a branch +// which affects the choice of whether to use predicated instructions. def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor", "HasBranchPredictor", "false", "Has no branch predictor">; /// DSP extension. +/// True if the subtarget supports the DSP (saturating arith and such) instructions. def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Supports DSP instructions in " "ARM and/or Thumb2">; -// Multiprocessing extension. +// True if the subtarget supports Multiprocessing extension (ARMv7 only). def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", "Supports Multiprocessing extension">; @@ -378,31 +446,42 @@ def FeatureVirtualization : SubtargetFeature<"virtualization", // Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. // See ARMInstrInfo.td for details. +// True if NaCl TRAP instruction is generated instead of the regular TRAP. def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", "NaCl trap">; +// True if the subtarget disallows unaligned memory +// accesses for some types. For details, see +// ARMTargetLowering::allowsMisalignedMemoryAccesses(). def FeatureStrictAlign : SubtargetFeature<"strict-align", "StrictAlign", "true", "Disallow all unaligned memory " "access">; +// Generate calls via indirect call instructions. def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", "Generate calls via indirect call " "instructions">; +// Generate code that does not contain data access to code sections. def FeatureExecuteOnly : SubtargetFeature<"execute-only", "GenExecuteOnly", "true", "Enable the generation of " "execute only code.">; +// True if R9 is not available as a general purpose register. def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", "Reserve R9, making it unavailable" " as GPR">; +// True if MOVT / MOVW pairs are not used for materialization of +// 32-bit imms (including global addresses). def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", "Don't use movt/movw pairs for " "32-bit imms">; +/// Implicitly convert an instruction to a different one if its immediates +/// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1. def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", "NegativeImmediates", "false", @@ -415,28 +494,39 @@ def FeatureNoNegativeImmediates def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true", "Use the MachineScheduler">; +// Use the MachinePipeliner for instruction scheduling for the subtarget. +def FeatureUseMIPipeliner: SubtargetFeature<"use-mipipeliner", "UseMIPipeliner", "true", + "Use the MachinePipeliner">; + +// False if scheduling should happen again after register allocation. def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler", "DisablePostRAScheduler", "true", "Don't schedule again after register allocation">; // Armv8.5-A extensions +// Has speculation barrier. def FeatureSB : SubtargetFeature<"sb", "HasSB", "true", "Enable v8.5a Speculation Barrier" >; // Armv8.6-A extensions + +// True if subtarget supports BFloat16 floating point operations. def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", "true", "Enable support for BFloat16 instructions", [FeatureNEON]>; +// True if subtarget supports 8-bit integer matrix multiply. def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8", "true", "Enable Matrix Multiply Int8 Extension", [FeatureNEON]>; // Armv8.1-M extensions +// True if the processor supports the Low Overhead Branch extension. def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true", "Enable Low Overhead Branch " "extensions">; +// Mitigate against the cve-2021-35465 security vulnurability. def FeatureFixCMSE_CVE_2021_35465 : SubtargetFeature<"fix-cmse-cve-2021-35465", "FixCMSE_CVE_2021_35465", "true", "Mitigate against the cve-2021-35465 " @@ -446,11 +536,26 @@ def FeaturePACBTI : SubtargetFeature<"pacbti", "HasPACBTI", "true", "Enable Pointer Authentication and Branch " "Target Identification">; +/// Don't place a BTI instruction after return-twice constructs (setjmp). def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice", "NoBTIAtReturnTwice", "true", "Don't place a BTI instruction " "after a return-twice">; +def FeatureFixCortexA57AES1742098 : SubtargetFeature<"fix-cortex-a57-aes-1742098", + "FixCortexA57AES1742098", "true", + "Work around Cortex-A57 Erratum 1742098 / Cortex-A72 Erratum 1655431 (AES)">; + +def FeatureAAPCSFrameChain : SubtargetFeature<"aapcs-frame-chain", + "CreateAAPCSFrameChain", "true", + "Create an AAPCS compliant frame chain">; + +def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf", + "CreateAAPCSFrameChainLeaf", "true", + "Create an AAPCS compliant frame chain " + "for leaf functions", + [FeatureAAPCSFrameChain]>; + //===----------------------------------------------------------------------===// // ARM architecture class // @@ -467,16 +572,18 @@ def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", "Is microcontroller profile ('M' series)">; - +// True if Thumb2 instructions are supported. def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; +// True if subtarget does not support ARM mode execution. def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", "Does not support ARM mode execution">; //===----------------------------------------------------------------------===// // ARM ISAa. // +// Specify whether target support specific ARM ISA variants. def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; @@ -599,13 +706,16 @@ foreach i = {0-7} in // Control codegen mitigation against Straight Line Speculation vulnerability. //===----------------------------------------------------------------------===// +/// Harden against Straight Line Speculation for Returns and Indirect Branches. def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr", "HardenSlsRetBr", "true", "Harden against straight line speculation across RETurn and BranchRegister " "instructions">; +/// Harden against Straight Line Speculation for indirect calls. def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr", "HardenSlsBlr", "true", "Harden against straight line speculation across indirect calls">; +/// Generate thunk code for SLS mitigation in the normal text section. def FeatureHardenSlsNoComdat : SubtargetFeature<"harden-sls-nocomdat", "HardenSlsNoComdat", "true", "Generate thunk code for SLS mitigation in the normal text section">; @@ -1303,6 +1413,7 @@ def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em, def : ProcessorModel<"cortex-m7", CortexM7Model, [ARMv7em, ProcM7, FeatureFPARMv8_D16, + FeatureUseMIPipeliner, FeatureUseMISched]>; def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, @@ -1370,13 +1481,15 @@ def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, FeatureCRC, FeatureFPAO, FeatureAvoidPartialCPSR, - FeatureCheapPredicableCPSR]>; + FeatureCheapPredicableCPSR, + FeatureFixCortexA57AES1742098]>; def : ProcessorModel<"cortex-a72", CortexA57Model, [ARMv8a, ProcA72, FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, - FeatureCRC]>; + FeatureCRC, + FeatureFixCortexA57AES1742098]>; def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, FeatureHWDivThumb, diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index fa09b2567aa9..4aa28bc5d28d 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -161,10 +161,10 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { : COFF::IMAGE_SYM_CLASS_EXTERNAL; int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; - OutStreamer->BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer->EmitCOFFSymbolStorageClass(Scl); - OutStreamer->EmitCOFFSymbolType(Type); - OutStreamer->EndCOFFSymbolDef(); + OutStreamer->beginCOFFSymbolDef(CurrentFnSym); + OutStreamer->emitCOFFSymbolStorageClass(Scl); + OutStreamer->emitCOFFSymbolType(Type); + OutStreamer->endCOFFSymbolDef(); } // Emit the rest of the function body. @@ -535,27 +535,27 @@ void ARMAsmPrinter::emitEndOfAsmFile(Module &M) { if (!Stubs.empty()) { // Switch with ".non_lazy_symbol_pointer" directive. - OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); + OutStreamer->switchSection(TLOFMacho.getNonLazySymbolPointerSection()); emitAlignment(Align(4)); for (auto &Stub : Stubs) emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second); Stubs.clear(); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } Stubs = MMIMacho.GetThreadLocalGVStubList(); if (!Stubs.empty()) { // Switch with ".non_lazy_symbol_pointer" directive. - OutStreamer->SwitchSection(TLOFMacho.getThreadLocalPointerSection()); + OutStreamer->switchSection(TLOFMacho.getThreadLocalPointerSection()); emitAlignment(Align(4)); for (auto &Stub : Stubs) emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second); Stubs.clear(); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } // Funny Darwin hack: This flag tells the linker that no global symbols @@ -740,55 +740,53 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format, ARMBuildAttrs::FP16FormatIEEE); - if (MMI) { - if (const Module *SourceModule = MMI->getModule()) { - // ABI_PCS_wchar_t to indicate wchar_t width - // FIXME: There is no way to emit value 0 (wchar_t prohibited). - if (auto WCharWidthValue = mdconst::extract_or_null<ConstantInt>( - SourceModule->getModuleFlag("wchar_size"))) { - int WCharWidth = WCharWidthValue->getZExtValue(); - assert((WCharWidth == 2 || WCharWidth == 4) && - "wchar_t width must be 2 or 4 bytes"); - ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth); - } + if (const Module *SourceModule = MMI->getModule()) { + // ABI_PCS_wchar_t to indicate wchar_t width + // FIXME: There is no way to emit value 0 (wchar_t prohibited). + if (auto WCharWidthValue = mdconst::extract_or_null<ConstantInt>( + SourceModule->getModuleFlag("wchar_size"))) { + int WCharWidth = WCharWidthValue->getZExtValue(); + assert((WCharWidth == 2 || WCharWidth == 4) && + "wchar_t width must be 2 or 4 bytes"); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth); + } - // ABI_enum_size to indicate enum width - // FIXME: There is no way to emit value 0 (enums prohibited) or value 3 - // (all enums contain a value needing 32 bits to encode). - if (auto EnumWidthValue = mdconst::extract_or_null<ConstantInt>( - SourceModule->getModuleFlag("min_enum_size"))) { - int EnumWidth = EnumWidthValue->getZExtValue(); - assert((EnumWidth == 1 || EnumWidth == 4) && - "Minimum enum width must be 1 or 4 bytes"); - int EnumBuildAttr = EnumWidth == 1 ? 1 : 2; - ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr); - } + // ABI_enum_size to indicate enum width + // FIXME: There is no way to emit value 0 (enums prohibited) or value 3 + // (all enums contain a value needing 32 bits to encode). + if (auto EnumWidthValue = mdconst::extract_or_null<ConstantInt>( + SourceModule->getModuleFlag("min_enum_size"))) { + int EnumWidth = EnumWidthValue->getZExtValue(); + assert((EnumWidth == 1 || EnumWidth == 4) && + "Minimum enum width must be 1 or 4 bytes"); + int EnumBuildAttr = EnumWidth == 1 ? 1 : 2; + ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr); + } - auto *PACValue = mdconst::extract_or_null<ConstantInt>( - SourceModule->getModuleFlag("sign-return-address")); - if (PACValue && PACValue->getZExtValue() == 1) { - // If "+pacbti" is used as an architecture extension, - // Tag_PAC_extension is emitted in - // ARMTargetStreamer::emitTargetAttributes(). - if (!STI.hasPACBTI()) { - ATS.emitAttribute(ARMBuildAttrs::PAC_extension, - ARMBuildAttrs::AllowPACInNOPSpace); - } - ATS.emitAttribute(ARMBuildAttrs::PACRET_use, ARMBuildAttrs::PACRETUsed); + auto *PACValue = mdconst::extract_or_null<ConstantInt>( + SourceModule->getModuleFlag("sign-return-address")); + if (PACValue && PACValue->getZExtValue() == 1) { + // If "+pacbti" is used as an architecture extension, + // Tag_PAC_extension is emitted in + // ARMTargetStreamer::emitTargetAttributes(). + if (!STI.hasPACBTI()) { + ATS.emitAttribute(ARMBuildAttrs::PAC_extension, + ARMBuildAttrs::AllowPACInNOPSpace); } + ATS.emitAttribute(ARMBuildAttrs::PACRET_use, ARMBuildAttrs::PACRETUsed); + } - auto *BTIValue = mdconst::extract_or_null<ConstantInt>( - SourceModule->getModuleFlag("branch-target-enforcement")); - if (BTIValue && BTIValue->getZExtValue() == 1) { - // If "+pacbti" is used as an architecture extension, - // Tag_BTI_extension is emitted in - // ARMTargetStreamer::emitTargetAttributes(). - if (!STI.hasPACBTI()) { - ATS.emitAttribute(ARMBuildAttrs::BTI_extension, - ARMBuildAttrs::AllowBTIInNOPSpace); - } - ATS.emitAttribute(ARMBuildAttrs::BTI_use, ARMBuildAttrs::BTIUsed); + auto *BTIValue = mdconst::extract_or_null<ConstantInt>( + SourceModule->getModuleFlag("branch-target-enforcement")); + if (BTIValue && BTIValue->getZExtValue() == 1) { + // If "+pacbti" is used as an architecture extension, + // Tag_BTI_extension is emitted in + // ARMTargetStreamer::emitTargetAttributes(). + if (!STI.hasPACBTI()) { + ATS.emitAttribute(ARMBuildAttrs::BTI_extension, + ARMBuildAttrs::AllowBTIInNOPSpace); } + ATS.emitAttribute(ARMBuildAttrs::BTI_use, ARMBuildAttrs::BTIUsed); } } @@ -2276,6 +2274,47 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, TmpInstSB); return; } + + case ARM::SEH_StackAlloc: + ATS.emitARMWinCFIAllocStack(MI->getOperand(0).getImm(), + MI->getOperand(1).getImm()); + return; + + case ARM::SEH_SaveRegs: + case ARM::SEH_SaveRegs_Ret: + ATS.emitARMWinCFISaveRegMask(MI->getOperand(0).getImm(), + MI->getOperand(1).getImm()); + return; + + case ARM::SEH_SaveSP: + ATS.emitARMWinCFISaveSP(MI->getOperand(0).getImm()); + return; + + case ARM::SEH_SaveFRegs: + ATS.emitARMWinCFISaveFRegs(MI->getOperand(0).getImm(), + MI->getOperand(1).getImm()); + return; + + case ARM::SEH_SaveLR: + ATS.emitARMWinCFISaveLR(MI->getOperand(0).getImm()); + return; + + case ARM::SEH_Nop: + case ARM::SEH_Nop_Ret: + ATS.emitARMWinCFINop(MI->getOperand(0).getImm()); + return; + + case ARM::SEH_PrologEnd: + ATS.emitARMWinCFIPrologEnd(/*Fragment=*/false); + return; + + case ARM::SEH_EpilogStart: + ATS.emitARMWinCFIEpilogStart(ARMCC::AL); + return; + + case ARM::SEH_EpilogEnd: + ATS.emitARMWinCFIEpilogEnd(); + return; } MCInst TmpInst; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 5b0bae4d9274..80ba7b5f0d2e 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -343,6 +343,13 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, } // Branch analysis. +// Cond vector output format: +// 0 elements indicates an unconditional branch +// 2 elements indicates a conditional branch; the elements are +// the condition to check and the CPSR. +// 3 elements indicates a hardware loop end; the elements +// are the opcode, the operand value to test, and a dummy +// operand used to pad out to 3 operands. bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -394,6 +401,17 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } else if (I->isReturn()) { // Returns can't be analyzed, but we should run cleanup. CantAnalyze = true; + } else if (I->getOpcode() == ARM::t2LoopEnd && + MBB.getParent() + ->getSubtarget<ARMSubtarget>() + .enableMachinePipeliner()) { + if (!Cond.empty()) + return true; + FBB = TBB; + TBB = I->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); + Cond.push_back(I->getOperand(0)); + Cond.push_back(MachineOperand::CreateImm(0)); } else { // We encountered other unrecognized terminator. Bail out immediately. return true; @@ -457,7 +475,7 @@ unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, return 0; if (!isUncondBranchOpcode(I->getOpcode()) && - !isCondBranchOpcode(I->getOpcode())) + !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) return 0; // Remove the branch. @@ -467,7 +485,7 @@ unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, if (I == MBB.begin()) return 1; --I; - if (!isCondBranchOpcode(I->getOpcode())) + if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) return 1; // Remove the branch. @@ -491,8 +509,8 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 2 || Cond.size() == 0) && - "ARM branch conditions have two components!"); + assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) && + "ARM branch conditions have two or three components!"); // For conditional branches, we use addOperand to preserve CPSR flags. @@ -502,19 +520,24 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL)); else BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); - } else + } else if (Cond.size() == 2) { BuildMI(&MBB, DL, get(BccOpc)) .addMBB(TBB) .addImm(Cond[0].getImm()) .add(Cond[1]); + } else + BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB); return 1; } // Two-way conditional branch. - BuildMI(&MBB, DL, get(BccOpc)) - .addMBB(TBB) - .addImm(Cond[0].getImm()) - .add(Cond[1]); + if (Cond.size() == 2) + BuildMI(&MBB, DL, get(BccOpc)) + .addMBB(TBB) + .addImm(Cond[0].getImm()) + .add(Cond[1]); + else if (Cond.size() == 3) + BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB); if (isThumb) BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL)); else @@ -524,9 +547,12 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, bool ARMBaseInstrInfo:: reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { - ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); - Cond[0].setImm(ARMCC::getOppositeCondition(CC)); - return false; + if (Cond.size() == 2) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); + Cond[0].setImm(ARMCC::getOppositeCondition(CC)); + return false; + } + return true; } bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { @@ -556,7 +582,7 @@ std::string ARMBaseInstrInfo::createMIROperandComment( return GenericComment; // If not, check if we have an immediate operand. - if (Op.getType() != MachineOperand::MO_Immediate) + if (!Op.isImm()) return std::string(); // And print its corresponding condition code if the immediate is a @@ -1703,7 +1729,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { // or some other super-register. int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD); if (ImpDefIdx != -1) - MI.RemoveOperand(ImpDefIdx); + MI.removeOperand(ImpDefIdx); // Change the opcode and operands. MI.setDesc(get(ARM::VMOVD)); @@ -2045,6 +2071,9 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI, if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) return true; + if (isSEHInstruction(MI)) + return true; + // Treat the start of the IT block as a scheduling boundary, but schedule // t2IT along with all instructions following it. // FIXME: This is a big hammer. But the alternative is to add all potential @@ -2598,7 +2627,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, // ahead: strip all existing registers off and add them back again // in the right order. for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) - MI->RemoveOperand(i); + MI->removeOperand(i); // Add the complete list back in. MachineInstrBuilder MIB(MF, &*MI); @@ -2626,7 +2655,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Turn it into a move. MI.setDesc(TII.get(ARM::MOVr)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - MI.RemoveOperand(FrameRegIdx+1); + MI.removeOperand(FrameRegIdx+1); Offset = 0; return true; } else if (Offset < 0) { @@ -5103,7 +5132,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, SrcReg = MI.getOperand(1).getReg(); for (unsigned i = MI.getDesc().getNumOperands(); i; --i) - MI.RemoveOperand(i - 1); + MI.removeOperand(i - 1); // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) MI.setDesc(get(ARM::VORRd)); @@ -5122,7 +5151,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, SrcReg = MI.getOperand(1).getReg(); for (unsigned i = MI.getDesc().getNumOperands(); i; --i) - MI.RemoveOperand(i - 1); + MI.removeOperand(i - 1); DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); @@ -5155,7 +5184,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, break; for (unsigned i = MI.getDesc().getNumOperands(); i; --i) - MI.RemoveOperand(i - 1); + MI.removeOperand(i - 1); // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) // Again DDst may be undefined at the beginning of this instruction. @@ -5190,7 +5219,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, break; for (unsigned i = MI.getDesc().getNumOperands(); i; --i) - MI.RemoveOperand(i - 1); + MI.removeOperand(i - 1); if (DSrc == DDst) { // Destination can be: @@ -5766,26 +5795,25 @@ struct OutlinerCosts { SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {} }; -unsigned -ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { - assert(C.LRUWasSet && "LRU wasn't set?"); +Register +ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const { MachineFunction *MF = C.getMF(); - const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo *>( - MF->getSubtarget().getRegisterInfo()); + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + const ARMBaseRegisterInfo *ARI = + static_cast<const ARMBaseRegisterInfo *>(&TRI); BitVector regsReserved = ARI->getReservedRegs(*MF); // Check if there is an available register across the sequence that we can // use. - for (unsigned Reg : ARM::rGPRRegClass) { + for (Register Reg : ARM::rGPRRegClass) { if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) && Reg != ARM::LR && // LR is not reserved, but don't use it. Reg != ARM::R12 && // R12 is not guaranteed to be preserved. - C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) + C.isAvailableAcrossAndOutOfSeq(Reg, TRI) && + C.isAvailableInsideSeq(Reg, TRI)) return Reg; } - - // No suitable register. Return 0. - return 0u; + return Register(); } // Compute liveness of LR at the point after the interval [I, E), which @@ -5833,9 +5861,8 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( // Compute liveness information for each candidate, and set FlagsSetInAll. const TargetRegisterInfo &TRI = getRegisterInfo(); - std::for_each( - RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), - [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; }); + for (outliner::Candidate &C : RepeatedSequenceLocs) + FlagsSetInAll &= C.Flags; // According to the ARM Procedure Call Standard, the following are // undefined on entry/exit from a function call: @@ -5854,9 +5881,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( // to compute liveness here. if (C.Flags & UnsafeRegsDead) return false; - C.initLRU(TRI); - LiveRegUnits LRU = C.LRU; - return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR)); + return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI); }; // Are there any candidates where those registers are live? @@ -5969,7 +5994,6 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( std::vector<outliner::Candidate> CandidatesWithoutStackFixups; for (outliner::Candidate &C : RepeatedSequenceLocs) { - C.initLRU(TRI); // LR liveness is overestimated in return blocks, unless they end with a // tail call. const auto Last = C.getMBB()->rbegin(); @@ -5977,7 +6001,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( C.getMBB()->isReturnBlock() && !Last->isCall() ? isLRAvailable(TRI, Last, (MachineBasicBlock::reverse_iterator)C.front()) - : C.LRU.available(ARM::LR); + : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI); if (LRIsAvailable) { FrameID = MachineOutlinerNoLRSave; NumBytesNoStackCalls += Costs.CallNoLRSave; @@ -5996,7 +6020,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( // Is SP used in the sequence at all? If not, we don't have to modify // the stack, so we are guaranteed to get the same frame. - else if (C.UsedInSequence.available(ARM::SP)) { + else if (C.isAvailableInsideSeq(ARM::SP, TRI)) { NumBytesNoStackCalls += Costs.CallDefault; C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault); CandidatesWithoutStackFixups.push_back(C); @@ -6189,8 +6213,8 @@ bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, LiveRegUnits LRU(getRegisterInfo()); - std::for_each(MBB.rbegin(), MBB.rend(), - [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); + for (MachineInstr &MI : llvm::reverse(MBB)) + LRU.accumulate(MI); // Check if each of the unsafe registers are available... bool R12AvailableInBlock = LRU.available(ARM::R12); @@ -6635,7 +6659,7 @@ void ARMBaseInstrInfo::buildOutlinedFrame( MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, - MachineFunction &MF, const outliner::Candidate &C) const { + MachineFunction &MF, outliner::Candidate &C) const { MachineInstrBuilder MIB; MachineBasicBlock::iterator CallPt; unsigned Opc; @@ -6726,3 +6750,122 @@ unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) { : ARM::BLX_pred; } +namespace { +class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { + MachineInstr *EndLoop, *LoopCount; + MachineFunction *MF; + const TargetInstrInfo *TII; + + // Meanings of the various stuff with loop types: + // t2Bcc: + // EndLoop = branch at end of original BB that will become a kernel + // LoopCount = CC setter live into branch + // t2LoopEnd: + // EndLoop = branch at end of original BB + // LoopCount = t2LoopDec +public: + ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount) + : EndLoop(EndLoop), LoopCount(LoopCount), + MF(EndLoop->getParent()->getParent()), + TII(MF->getSubtarget().getInstrInfo()) {} + + bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { + // Only ignore the terminator. + return MI == EndLoop || MI == LoopCount; + } + + Optional<bool> createTripCountGreaterCondition( + int TC, MachineBasicBlock &MBB, + SmallVectorImpl<MachineOperand> &Cond) override { + + if (isCondBranchOpcode(EndLoop->getOpcode())) { + Cond.push_back(EndLoop->getOperand(1)); + Cond.push_back(EndLoop->getOperand(2)); + if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) { + TII->reverseBranchCondition(Cond); + } + return {}; + } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) { + // General case just lets the unrolled t2LoopDec do the subtraction and + // therefore just needs to check if zero has been reached. + MachineInstr *LoopDec = nullptr; + for (auto &I : MBB.instrs()) + if (I.getOpcode() == ARM::t2LoopDec) + LoopDec = &I; + assert(LoopDec && "Unable to find copied LoopDec"); + // Check if we're done with the loop. + BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri)) + .addReg(LoopDec->getOperand(0).getReg()) + .addImm(0) + .addImm(ARMCC::AL) + .addReg(ARM::NoRegister); + Cond.push_back(MachineOperand::CreateImm(ARMCC::EQ)); + Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false)); + return {}; + } else + llvm_unreachable("Unknown EndLoop"); + } + + void setPreheader(MachineBasicBlock *NewPreheader) override {} + + void adjustTripCount(int TripCountAdjust) override {} + + void disposed() override {} +}; +} // namespace + +std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> +ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); + MachineBasicBlock *Preheader = *LoopBB->pred_begin(); + if (Preheader == LoopBB) + Preheader = *std::next(LoopBB->pred_begin()); + + if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) { + // If the branch is a Bcc, then the CPSR should be set somewhere within the + // block. We need to determine the reaching definition of CPSR so that + // it can be marked as non-pipelineable, allowing the pipeliner to force + // it into stage 0 or give up if it cannot or will not do so. + MachineInstr *CCSetter = nullptr; + for (auto &L : LoopBB->instrs()) { + if (L.isCall()) + return nullptr; + if (isCPSRDefined(L)) + CCSetter = &L; + } + if (CCSetter) + return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter); + else + return nullptr; // Unable to find the CC setter, so unable to guarantee + // that pipeline will work + } + + // Recognize: + // preheader: + // %1 = t2DoopLoopStart %0 + // loop: + // %2 = phi %1, <not loop>, %..., %loop + // %3 = t2LoopDec %2, <imm> + // t2LoopEnd %3, %loop + + if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) { + for (auto &L : LoopBB->instrs()) + if (L.isCall()) + return nullptr; + else if (isVCTP(&L)) + return nullptr; + Register LoopDecResult = I->getOperand(0).getReg(); + MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); + MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult); + if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) + return nullptr; + MachineInstr *LoopStart = nullptr; + for (auto &J : Preheader->instrs()) + if (J.getOpcode() == ARM::t2DoLoopStart) + LoopStart = &J; + if (!LoopStart) + return nullptr; + return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec); + } + return nullptr; +} diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index defce07dd862..3b8f3403e3c3 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -360,7 +360,7 @@ public: MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - const outliner::Candidate &C) const override; + outliner::Candidate &C) const override; /// Enable outlining by default at -Oz. bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; @@ -372,10 +372,15 @@ public: MI->getOpcode() == ARM::t2WhileLoopStartTP; } + /// Analyze loop L, which must be a single-basic-block loop, and if the + /// conditions can be understood enough produce a PipelinerLoopInfo object. + std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> + analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; + private: /// Returns an unused general-purpose register which can be used for /// constructing an outlined call if one exists. Returns 0 otherwise. - unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; + Register findRegisterToSaveLRTo(outliner::Candidate &C) const; /// Adds an instruction which saves the link register on top of the stack into /// the MachineBasicBlock \p MBB at position \p It. If \p Auth is true, @@ -752,6 +757,26 @@ static inline bool isValidCoprocessorNumber(unsigned Num, return true; } +static inline bool isSEHInstruction(const MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case ARM::SEH_StackAlloc: + case ARM::SEH_SaveRegs: + case ARM::SEH_SaveRegs_Ret: + case ARM::SEH_SaveSP: + case ARM::SEH_SaveFRegs: + case ARM::SEH_SaveLR: + case ARM::SEH_Nop: + case ARM::SEH_Nop_Ret: + case ARM::SEH_PrologEnd: + case ARM::SEH_EpilogStart: + case ARM::SEH_EpilogEnd: + return true; + default: + return false; + } +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index c543d02ff75a..1d0e743b94db 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -63,28 +63,26 @@ const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>(); bool UseSplitPush = STI.splitFramePushPop(*MF); - const MCPhysReg *RegList = - STI.isTargetDarwin() - ? CSR_iOS_SaveList - : (UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList); - const Function &F = MF->getFunction(); + if (F.getCallingConv() == CallingConv::GHC) { // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around return CSR_NoRegs_SaveList; + } else if (STI.splitFramePointerPush(*MF)) { + return CSR_Win_SplitFP_SaveList; } else if (F.getCallingConv() == CallingConv::CFGuard_Check) { return CSR_Win_AAPCS_CFGuard_Check_SaveList; } else if (F.getCallingConv() == CallingConv::SwiftTail) { return STI.isTargetDarwin() ? CSR_iOS_SwiftTail_SaveList - : (UseSplitPush ? CSR_AAPCS_SplitPush_SwiftTail_SaveList + : (UseSplitPush ? CSR_ATPCS_SplitPush_SwiftTail_SaveList : CSR_AAPCS_SwiftTail_SaveList); } else if (F.hasFnAttribute("interrupt")) { if (STI.isMClass()) { // M-class CPUs have hardware which saves the registers needed to allow a // function conforming to the AAPCS to function as a handler. - return UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList; + return UseSplitPush ? CSR_ATPCS_SplitPush_SaveList : CSR_AAPCS_SaveList; } else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") { // Fast interrupt mode gives the handler a private copy of R8-R14, so less // need to be saved to restore user-mode state. @@ -101,7 +99,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (STI.isTargetDarwin()) return CSR_iOS_SwiftError_SaveList; - return UseSplitPush ? CSR_AAPCS_SplitPush_SwiftError_SaveList : + return UseSplitPush ? CSR_ATPCS_SplitPush_SwiftError_SaveList : CSR_AAPCS_SwiftError_SaveList; } @@ -109,7 +107,15 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return MF->getInfo<ARMFunctionInfo>()->isSplitCSR() ? CSR_iOS_CXX_TLS_PE_SaveList : CSR_iOS_CXX_TLS_SaveList; - return RegList; + + if (STI.isTargetDarwin()) + return CSR_iOS_SaveList; + + if (UseSplitPush) + return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList + : CSR_ATPCS_SplitPush_SaveList; + + return CSR_AAPCS_SaveList; } const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy( @@ -238,7 +244,7 @@ bool ARMBaseRegisterInfo::isInlineAsmReadOnlyReg(const MachineFunction &MF, BitVector Reserved(getNumRegs()); markSuperRegs(Reserved, ARM::PC); - if (TFI->hasFP(MF)) + if (TFI->isFPReserved(MF)) markSuperRegs(Reserved, STI.getFramePointerReg()); if (hasBasePointer(MF)) markSuperRegs(Reserved, BasePtr); diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 57d7842c63ca..73ed300ccff4 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -43,7 +43,7 @@ namespace ARMRI { /// isARMArea1Register - Returns true if the register is a low register (r0-r7) /// or a stack/pc register that we should push/pop. -static inline bool isARMArea1Register(unsigned Reg, bool isIOS) { +static inline bool isARMArea1Register(unsigned Reg, bool SplitFramePushPop) { using namespace ARM; switch (Reg) { @@ -53,25 +53,52 @@ static inline bool isARMArea1Register(unsigned Reg, bool isIOS) { return true; case R8: case R9: case R10: case R11: case R12: // For iOS we want r7 and lr to be next to each other. - return !isIOS; + return !SplitFramePushPop; default: return false; } } -static inline bool isARMArea2Register(unsigned Reg, bool isIOS) { +static inline bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop) { using namespace ARM; switch (Reg) { case R8: case R9: case R10: case R11: case R12: // iOS has this second area. - return isIOS; + return SplitFramePushPop; default: return false; } } -static inline bool isARMArea3Register(unsigned Reg, bool isIOS) { +static inline bool isSplitFPArea1Register(unsigned Reg, + bool SplitFramePushPop) { + using namespace ARM; + + switch (Reg) { + case R0: case R1: case R2: case R3: + case R4: case R5: case R6: case R7: + case R8: case R9: case R10: case R12: + case SP: case PC: + return true; + default: + return false; + } +} + +static inline bool isSplitFPArea2Register(unsigned Reg, + bool SplitFramePushPop) { + using namespace ARM; + + switch (Reg) { + case R11: case LR: + return true; + default: + return false; + } +} + +static inline bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop) { using namespace ARM; switch (Reg) { @@ -214,6 +241,8 @@ public: unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override; + + int getSEHRegNum(unsigned i) const { return getEncodingValue(i); } }; } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp index ddbd6702e528..b2d291bbe7ff 100644 --- a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp +++ b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp @@ -16,6 +16,7 @@ #include "ARMBasicBlockInfo.h" #include "ARMSubtarget.h" #include "MVETailPredUtils.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -212,7 +213,7 @@ bool ARMBlockPlacement::processPostOrderLoops(MachineLoop *ML) { bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; - const ARMSubtarget &ST = static_cast<const ARMSubtarget &>(MF.getSubtarget()); + const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>(); if (!ST.hasLOB()) return false; LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Running on " << MF.getName() << "\n"); diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td index a6dbe563a4ab..d14424c2deca 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/llvm/lib/Target/ARM/ARMCallingConv.td @@ -284,19 +284,32 @@ def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>; // The order of callee-saved registers needs to match the order we actually push // them in FrameLowering, because this order is what's used by // PrologEpilogInserter to allocate frame index slots. So when R7 is the frame -// pointer, we use this AAPCS alternative. -def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, +// pointer, we use this ATPCS alternative. +def CSR_ATPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, R11, R10, R9, R8, (sequence "D%u", 15, 8))>; +def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4, + (sequence "D%u", 15, 8), + LR, R11)>; + // R8 is used to pass swifterror, remove it from CSR. -def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, +def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, R8)>; // R10 is used to pass swifterror, remove it from CSR. -def CSR_AAPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, +def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, R10)>; +// When enforcing an AAPCS compliant frame chain, R11 is used as the frame +// pointer even for Thumb targets, where split pushes are necessary. +// This AAPCS alternative makes sure the frame index slots match the push +// order in that case. +def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11, + R7, R6, R5, R4, + R10, R9, R8, + (sequence "D%u", 15, 8))>; + // Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' // and the pointer return value are both passed in R0 in these cases, this can // be partially modelled by treating R0 as a callee-saved register diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index a2a4f1f3bdfd..d77c3afd05e5 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -396,7 +396,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { << MCP->getConstants().size() << " CP entries, aligned to " << MCP->getConstantPoolAlign().value() << " bytes *****\n"); - STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget()); + STI = &MF->getSubtarget<ARMSubtarget>(); TII = STI->getInstrInfo(); isPositionIndependentOrROPI = STI->getTargetLowering()->isPositionIndependent() || STI->isROPI(); diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2f083561bbd4..613904f702f0 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -2107,6 +2108,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::TCRETURNdi: case ARM::TCRETURNri: { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + if (MBBI->getOpcode() == ARM::SEH_EpilogEnd) + MBBI--; + if (MBBI->getOpcode() == ARM::SEH_Nop_Ret) + MBBI--; assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); unsigned RetOpcode = MBBI->getOpcode(); @@ -2116,13 +2121,21 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Tail call return: adjust the stack pointer and jump to callee. MBBI = MBB.getLastNonDebugInstr(); + if (MBBI->getOpcode() == ARM::SEH_EpilogEnd) + MBBI--; + if (MBBI->getOpcode() == ARM::SEH_Nop_Ret) + MBBI--; MachineOperand &JumpTarget = MBBI->getOperand(0); // Jump to label or value in register. if (RetOpcode == ARM::TCRETURNdi) { + MachineFunction *MF = MBB.getParent(); + bool NeedsWinCFI = MF->getTarget().getMCAsmInfo()->usesWindowsCFI() && + MF->getFunction().needsUnwindTableEntry(); unsigned TCOpcode = STI->isThumb() - ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) + ? ((STI->isTargetMachO() || NeedsWinCFI) ? ARM::tTAILJMPd + : ARM::tTAILJMPdND) : ARM::TAILJMPd; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); if (JumpTarget.isGlobal()) @@ -3132,7 +3145,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { } bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget()); + STI = &MF.getSubtarget<ARMSubtarget>(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); AFI = MF.getInfo<ARMFunctionInfo>(); diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index 5d94b99d4c5d..a167225e2743 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -122,8 +122,7 @@ class ARMFastISel final : public FastISel { explicit ARMFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FastISel(funcInfo, libInfo), - Subtarget( - &static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget())), + Subtarget(&funcInfo.MF->getSubtarget<ARMSubtarget>()), M(const_cast<Module &>(*funcInfo.Fn->getParent())), TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()) { @@ -156,7 +155,7 @@ class ARMFastISel final : public FastISel { const LoadInst *LI) override; bool fastLowerArguments() override; - #include "ARMGenFastISel.inc" +#include "ARMGenFastISel.inc" // Instruction selection routines. @@ -189,10 +188,10 @@ class ARMFastISel final : public FastISel { bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt); bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, - unsigned Alignment = 0, bool isZExt = true, + MaybeAlign Alignment = None, bool isZExt = true, bool allocReg = true); bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, - unsigned Alignment = 0); + MaybeAlign Alignment = None); bool ARMComputeAddress(const Value *Obj, Address &Addr); void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3); bool ARMIsMemCpySmall(uint64_t Len); @@ -602,8 +601,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { } if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) || - (Subtarget->isTargetMachO() && IsIndirect) || - Subtarget->genLongCalls()) { + (Subtarget->isTargetMachO() && IsIndirect)) { MachineInstrBuilder MIB; Register NewDestReg = createResultReg(TLI.getRegClassFor(VT)); if (isThumb2) @@ -898,7 +896,8 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr, } bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, - unsigned Alignment, bool isZExt, bool allocReg) { + MaybeAlign Alignment, bool isZExt, + bool allocReg) { unsigned Opc; bool useAM3 = false; bool needVMOV = false; @@ -924,7 +923,8 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::i16: - if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem()) + if (Alignment && *Alignment < Align(2) && + !Subtarget->allowsUnalignedMem()) return false; if (isThumb2) { @@ -939,7 +939,8 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::i32: - if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem()) + if (Alignment && *Alignment < Align(4) && + !Subtarget->allowsUnalignedMem()) return false; if (isThumb2) { @@ -955,7 +956,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, case MVT::f32: if (!Subtarget->hasVFP2Base()) return false; // Unaligned loads need special handling. Floats require word-alignment. - if (Alignment && Alignment < 4) { + if (Alignment && *Alignment < Align(4)) { needVMOV = true; VT = MVT::i32; Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; @@ -970,7 +971,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, if (!Subtarget->hasVFP2Base()) return false; // FIXME: Unaligned loads need special handling. Doublewords require // word-alignment. - if (Alignment && Alignment < 4) + if (Alignment && *Alignment < Align(4)) return false; Opc = ARM::VLDRD; @@ -1030,14 +1031,14 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; Register ResultReg; - if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment())) + if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlign())) return false; updateValueMap(I, ResultReg); return true; } bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, - unsigned Alignment) { + MaybeAlign Alignment) { unsigned StrOpc; bool useAM3 = false; switch (VT.SimpleTy) { @@ -1065,7 +1066,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, } break; case MVT::i16: - if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem()) + if (Alignment && *Alignment < Align(2) && + !Subtarget->allowsUnalignedMem()) return false; if (isThumb2) { @@ -1079,7 +1081,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, } break; case MVT::i32: - if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem()) + if (Alignment && *Alignment < Align(4) && + !Subtarget->allowsUnalignedMem()) return false; if (isThumb2) { @@ -1094,7 +1097,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, case MVT::f32: if (!Subtarget->hasVFP2Base()) return false; // Unaligned stores need special handling. Floats require word-alignment. - if (Alignment && Alignment < 4) { + if (Alignment && *Alignment < Align(4)) { Register MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VMOVRS), MoveReg) @@ -1111,8 +1114,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, if (!Subtarget->hasVFP2Base()) return false; // FIXME: Unaligned stores need special handling. Doublewords require // word-alignment. - if (Alignment && Alignment < 4) - return false; + if (Alignment && *Alignment < Align(4)) + return false; StrOpc = ARM::VSTRD; break; @@ -1166,7 +1169,7 @@ bool ARMFastISel::SelectStore(const Instruction *I) { if (!ARMComputeAddress(I->getOperand(1), Addr)) return false; - if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment())) + if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlign())) return false; return true; } @@ -2939,7 +2942,7 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; Register ResultReg = MI->getOperand(0).getReg(); - if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false)) + if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlign(), isZExt, false)) return false; MachineBasicBlock::iterator I(MI); removeDeadCode(I, std::next(I)); diff --git a/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp b/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp new file mode 100644 index 000000000000..77c8f7134a55 --- /dev/null +++ b/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp @@ -0,0 +1,432 @@ +//===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This pass works around a Cortex Core Fused AES erratum: +// - Cortex-A57 Erratum 1742098 +// - Cortex-A72 Erratum 1655431 +// +// The erratum may be triggered if an input vector register to AESE or AESD was +// last written by an instruction that only updated 32 bits of it. This can +// occur for either of the input registers. +// +// The workaround chosen is to update the input register using `r = VORRq r, r`, +// as this updates all 128 bits of the register unconditionally, but does not +// change the values observed in `r`, making the input safe. +// +// This pass has to be conservative in a few cases: +// - an input vector register to the AES instruction is defined outside the +// current function, where we have to assume the register was updated in an +// unsafe way; and +// - an input vector register to the AES instruction is updated along multiple +// different control-flow paths, where we have to ensure all the register +// updating instructions are safe. +// +// Both of these cases may apply to a input vector register. In either case, we +// need to ensure that, when the pass is finished, there exists a safe +// instruction between every unsafe register updating instruction and the AES +// instruction. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" +#include "ARMSubtarget.h" +#include "Utils/ARMBaseInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/InitializePasses.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <assert.h> +#include <stdint.h> + +using namespace llvm; + +#define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098" + +//===----------------------------------------------------------------------===// + +namespace { +class ARMFixCortexA57AES1742098 : public MachineFunctionPass { +public: + static char ID; + explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) { + initializeARMFixCortexA57AES1742098Pass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "ARM fix for Cortex-A57 AES Erratum 1742098"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<ReachingDefAnalysis>(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + // This is the information needed to insert the fixup in the right place. + struct AESFixupLocation { + MachineBasicBlock *Block; + // The fixup instruction will be inserted *before* InsertionPt. + MachineInstr *InsertionPt; + MachineOperand *MOp; + }; + + void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA, + const ARMBaseRegisterInfo *TRI, + SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const; + + void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII, + const ARMBaseRegisterInfo *TRI) const; + + static bool isFirstAESPairInstr(MachineInstr &MI); + static bool isSafeAESInput(MachineInstr &MI); +}; +char ARMFixCortexA57AES1742098::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE, + "ARM fix for Cortex-A57 AES Erratum 1742098", false, + false) +INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis); +INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE, + "ARM fix for Cortex-A57 AES Erratum 1742098", false, false) + +//===----------------------------------------------------------------------===// + +bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + return Opc == ARM::AESD || Opc == ARM::AESE; +} + +bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) { + auto CondCodeIsAL = [](MachineInstr &MI) -> bool { + int CCIdx = MI.findFirstPredOperandIdx(); + if (CCIdx == -1) + return false; + return MI.getOperand(CCIdx).getImm() == (int64_t)ARMCC::AL; + }; + + switch (MI.getOpcode()) { + // Unknown: Assume not safe. + default: + return false; + // 128-bit wide AES instructions + case ARM::AESD: + case ARM::AESE: + case ARM::AESMC: + case ARM::AESIMC: + // No CondCode. + return true; + // 128-bit and 64-bit wide bitwise ops (when condition = al) + case ARM::VANDd: + case ARM::VANDq: + case ARM::VORRd: + case ARM::VORRq: + case ARM::VEORd: + case ARM::VEORq: + case ARM::VMVNd: + case ARM::VMVNq: + // VMOV of 64-bit value between D registers (when condition = al) + case ARM::VMOVD: + // VMOV of 64 bit value from GPRs (when condition = al) + case ARM::VMOVDRR: + // VMOV of immediate into D or Q registers (when condition = al) + case ARM::VMOVv2i64: + case ARM::VMOVv1i64: + case ARM::VMOVv2f32: + case ARM::VMOVv4f32: + case ARM::VMOVv2i32: + case ARM::VMOVv4i32: + case ARM::VMOVv4i16: + case ARM::VMOVv8i16: + case ARM::VMOVv8i8: + case ARM::VMOVv16i8: + // Loads (when condition = al) + // VLD Dn, [Rn, #imm] + case ARM::VLDRD: + // VLDM + case ARM::VLDMDDB_UPD: + case ARM::VLDMDIA_UPD: + case ARM::VLDMDIA: + // VLDn to all lanes. + case ARM::VLD1d64: + case ARM::VLD1q64: + case ARM::VLD1d32: + case ARM::VLD1q32: + case ARM::VLD2b32: + case ARM::VLD2d32: + case ARM::VLD2q32: + case ARM::VLD1d16: + case ARM::VLD1q16: + case ARM::VLD2d16: + case ARM::VLD2q16: + case ARM::VLD1d8: + case ARM::VLD1q8: + case ARM::VLD2b8: + case ARM::VLD2d8: + case ARM::VLD2q8: + case ARM::VLD3d32: + case ARM::VLD3q32: + case ARM::VLD3d16: + case ARM::VLD3q16: + case ARM::VLD3d8: + case ARM::VLD3q8: + case ARM::VLD4d32: + case ARM::VLD4q32: + case ARM::VLD4d16: + case ARM::VLD4q16: + case ARM::VLD4d8: + case ARM::VLD4q8: + // VLD1 (single element to one lane) + case ARM::VLD1LNd32: + case ARM::VLD1LNd32_UPD: + case ARM::VLD1LNd8: + case ARM::VLD1LNd8_UPD: + case ARM::VLD1LNd16: + case ARM::VLD1LNd16_UPD: + // VLD1 (single element to all lanes) + case ARM::VLD1DUPd32: + case ARM::VLD1DUPd32wb_fixed: + case ARM::VLD1DUPd32wb_register: + case ARM::VLD1DUPd16: + case ARM::VLD1DUPd16wb_fixed: + case ARM::VLD1DUPd16wb_register: + case ARM::VLD1DUPd8: + case ARM::VLD1DUPd8wb_fixed: + case ARM::VLD1DUPd8wb_register: + case ARM::VLD1DUPq32: + case ARM::VLD1DUPq32wb_fixed: + case ARM::VLD1DUPq32wb_register: + case ARM::VLD1DUPq16: + case ARM::VLD1DUPq16wb_fixed: + case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq8: + case ARM::VLD1DUPq8wb_fixed: + case ARM::VLD1DUPq8wb_register: + // VMOV + case ARM::VSETLNi32: + case ARM::VSETLNi16: + case ARM::VSETLNi8: + return CondCodeIsAL(MI); + }; + + return false; +} + +bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) { + LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n"); + auto &STI = F.getSubtarget<ARMSubtarget>(); + + // Fix not requested or AES instructions not present: skip pass. + if (!STI.hasAES() || !STI.fixCortexA57AES1742098()) + return false; + + const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo(); + const ARMBaseInstrInfo *TII = STI.getInstrInfo(); + + auto &RDA = getAnalysis<ReachingDefAnalysis>(); + + // Analyze whole function to find instructions which need fixing up... + SmallVector<AESFixupLocation> FixupLocsForFn{}; + analyzeMF(F, RDA, TRI, FixupLocsForFn); + + // ... and fix the instructions up all at the same time. + bool Changed = false; + LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n"); + for (AESFixupLocation &FixupLoc : FixupLocsForFn) { + insertAESFixup(FixupLoc, TII, TRI); + Changed |= true; + } + + return Changed; +} + +void ARMFixCortexA57AES1742098::analyzeMF( + MachineFunction &MF, ReachingDefAnalysis &RDA, + const ARMBaseRegisterInfo *TRI, + SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const { + unsigned MaxAllowedFixups = 0; + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!isFirstAESPairInstr(MI)) + continue; + + // Found an instruction to check the operands of. + LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI); + assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 && + "Unknown AES Instruction Format. Expected 1 def, 2 uses."); + + // A maximum of two fixups should be inserted for each AES pair (one per + // register use). + MaxAllowedFixups += 2; + + // Inspect all operands, choosing whether to insert a fixup. + for (MachineOperand &MOp : MI.uses()) { + SmallPtrSet<MachineInstr *, 1> AllDefs{}; + RDA.getGlobalReachingDefs(&MI, MOp.getReg(), AllDefs); + + // Planned Fixup: This should be added to FixupLocsForFn at most once. + AESFixupLocation NewLoc{&MBB, &MI, &MOp}; + + // In small functions with loops, this operand may be both a live-in and + // have definitions within the function itself. These will need a fixup. + bool IsLiveIn = MF.front().isLiveIn(MOp.getReg()); + + // If the register doesn't have defining instructions, and is not a + // live-in, then something is wrong and the fixup must always be + // inserted to be safe. + if (!IsLiveIn && AllDefs.size() == 0) { + LLVM_DEBUG(dbgs() + << "Fixup Planned: No Defining Instrs found, not live-in: " + << printReg(MOp.getReg(), TRI) << "\n"); + FixupLocsForFn.emplace_back(NewLoc); + continue; + } + + auto IsUnsafe = [](MachineInstr *MI) -> bool { + return !isSafeAESInput(*MI); + }; + size_t UnsafeCount = llvm::count_if(AllDefs, IsUnsafe); + + // If there are no unsafe definitions... + if (UnsafeCount == 0) { + // ... and the register is not live-in ... + if (!IsLiveIn) { + // ... then skip the fixup. + LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: " + << printReg(MOp.getReg(), TRI) << "\n"); + continue; + } + + // Otherwise, the only unsafe "definition" is a live-in, so insert the + // fixup at the start of the function. + LLVM_DEBUG(dbgs() + << "Fixup Planned: Live-In (with safe defining instrs): " + << printReg(MOp.getReg(), TRI) << "\n"); + NewLoc.Block = &MF.front(); + NewLoc.InsertionPt = &*NewLoc.Block->begin(); + LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before " + << *NewLoc.InsertionPt); + FixupLocsForFn.emplace_back(NewLoc); + continue; + } + + // If a fixup is needed in more than one place, then the best place to + // insert it is adjacent to the use rather than introducing a fixup + // adjacent to each def. + // + // FIXME: It might be better to hoist this to the start of the BB, if + // possible. + if (IsLiveIn || UnsafeCount > 1) { + LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs " + "(including live-ins): " + << printReg(MOp.getReg(), TRI) << "\n"); + FixupLocsForFn.emplace_back(NewLoc); + continue; + } + + assert(UnsafeCount == 1 && !IsLiveIn && + "At this point, there should be one unsafe defining instrs " + "and the defined register should not be a live-in."); + SmallPtrSetIterator<MachineInstr *> It = + llvm::find_if(AllDefs, IsUnsafe); + assert(It != AllDefs.end() && + "UnsafeCount == 1 but No Unsafe MachineInstr found."); + MachineInstr *DefMI = *It; + + LLVM_DEBUG( + dbgs() << "Fixup Planned: Found single unsafe defining instrs for " + << printReg(MOp.getReg(), TRI) << ": " << *DefMI); + + // There is one unsafe defining instruction, which needs a fixup. It is + // generally good to hoist the fixup to be adjacent to the defining + // instruction rather than the using instruction, as the using + // instruction may be inside a loop when the defining instruction is + // not. + MachineBasicBlock::iterator DefIt = DefMI; + ++DefIt; + if (DefIt != DefMI->getParent()->end()) { + LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI + << "And immediately before " << *DefIt); + NewLoc.Block = DefIt->getParent(); + NewLoc.InsertionPt = &*DefIt; + } + + FixupLocsForFn.emplace_back(NewLoc); + } + } + } + + assert(FixupLocsForFn.size() <= MaxAllowedFixups && + "Inserted too many fixups for this function."); + (void)MaxAllowedFixups; +} + +void ARMFixCortexA57AES1742098::insertAESFixup( + AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII, + const ARMBaseRegisterInfo *TRI) const { + MachineOperand *OperandToFixup = FixupLoc.MOp; + + assert(OperandToFixup->isReg() && "OperandToFixup must be a register"); + Register RegToFixup = OperandToFixup->getReg(); + + LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI) + << " before: " << *FixupLoc.InsertionPt); + + // Insert the new `VORRq qN, qN, qN`. There are a few details here: + // + // The uses are marked as killed, even if the original use of OperandToFixup + // is not killed, as the new instruction is clobbering the register. This is + // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op + // (it is inserted for microarchitectural reasons). + // + // The def and the uses are still marked as Renamable if the original register + // was, to avoid having to rummage through all the other uses and defs and + // unset their renamable bits. + unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0; + BuildMI(*FixupLoc.Block, FixupLoc.InsertionPt, DebugLoc(), + TII->get(ARM::VORRq)) + .addReg(RegToFixup, RegState::Define | Renamable) + .addReg(RegToFixup, RegState::Kill | Renamable) + .addReg(RegToFixup, RegState::Kill | Renamable) + .addImm((uint64_t)ARMCC::AL) + .addReg(ARM::NoRegister); +} + +// Factory function used by AArch64TargetMachine to add the pass to +// the passmanager. +FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() { + return new ARMFixCortexA57AES1742098(); +} diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 1f2f6f7497e0..48b4d266b41a 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -47,7 +47,8 @@ // | | // |-----------------------------------| // | | -// | prev_fp, prev_lr | +// | prev_lr | +// | prev_fp | // | (a.k.a. "frame record") | // | | // |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11) @@ -138,6 +139,7 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInstrDesc.h" @@ -210,6 +212,12 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { MFI.isFrameAddressTaken()); } +/// isFPReserved - Return true if the frame pointer register should be +/// considered a reserved register on the scope of the specified function. +bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const { + return hasFP(MF) || MF.getSubtarget<ARMSubtarget>().createAAPCSFrameChain(); +} + /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is /// not required, we reserve argument space for call sites in the function /// immediately on entry to the current function. This eliminates the need for @@ -272,6 +280,230 @@ static int getArgumentStackToRestore(MachineFunction &MF, return ArgumentPopSize; } +static bool needsWinCFI(const MachineFunction &MF) { + const Function &F = MF.getFunction(); + return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + F.needsUnwindTableEntry(); +} + +// Given a load or a store instruction, generate an appropriate unwinding SEH +// code on Windows. +static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, + const TargetInstrInfo &TII, + unsigned Flags) { + unsigned Opc = MBBI->getOpcode(); + MachineBasicBlock *MBB = MBBI->getParent(); + MachineFunction &MF = *MBB->getParent(); + DebugLoc DL = MBBI->getDebugLoc(); + MachineInstrBuilder MIB; + const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>(); + const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + + Flags |= MachineInstr::NoMerge; + + switch (Opc) { + default: + report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc)); + break; + case ARM::t2ADDri: // add.w r11, sp, #xx + case ARM::t2ADDri12: // add.w r11, sp, #xx + case ARM::t2MOVTi16: // movt r4, #xx + case ARM::tBL: // bl __chkstk + // These are harmless if used for just setting up a frame pointer, + // but that frame pointer can't be relied upon for unwinding, unless + // set up with SEH_SaveSP. + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)) + .addImm(/*Wide=*/1) + .setMIFlags(Flags); + break; + + case ARM::t2MOVi16: { // mov(w) r4, #xx + bool Wide = MBBI->getOperand(1).getImm() >= 256; + if (!Wide) { + MachineInstrBuilder NewInstr = + BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags()); + NewInstr.add(MBBI->getOperand(0)); + NewInstr.add(t1CondCodeOp(/*isDead=*/true)); + for (unsigned i = 1, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) + NewInstr.add(MBBI->getOperand(i)); + MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr); + MBB->erase(MBBI); + MBBI = NewMBBI; + } + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags); + break; + } + + case ARM::tBLXr: // blx r12 (__chkstk) + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)) + .addImm(/*Wide=*/0) + .setMIFlags(Flags); + break; + + case ARM::t2MOVi32imm: // movw+movt + // This pseudo instruction expands into two mov instructions. If the + // second operand is a symbol reference, this will stay as two wide + // instructions, movw+movt. If they're immediates, the first one can + // end up as a narrow mov though. + // As two SEH instructions are appended here, they won't get interleaved + // between the two final movw/movt instructions, but it doesn't make any + // practical difference. + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)) + .addImm(/*Wide=*/1) + .setMIFlags(Flags); + MBB->insertAfter(MBBI, MIB); + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)) + .addImm(/*Wide=*/1) + .setMIFlags(Flags); + break; + + case ARM::t2LDMIA_RET: + case ARM::t2LDMIA_UPD: + case ARM::t2STMDB_UPD: { + unsigned Mask = 0; + bool Wide = false; + for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) { + const MachineOperand &MO = MBBI->getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = RegInfo->getSEHRegNum(MO.getReg()); + if (Reg == 15) + Reg = 14; + if (Reg >= 8 && Reg <= 13) + Wide = true; + else if (Opc == ARM::t2LDMIA_UPD && Reg == 14) + Wide = true; + Mask |= 1 << Reg; + } + if (!Wide) { + unsigned NewOpc; + switch (Opc) { + case ARM::t2LDMIA_RET: + NewOpc = ARM::tPOP_RET; + break; + case ARM::t2LDMIA_UPD: + NewOpc = ARM::tPOP; + break; + case ARM::t2STMDB_UPD: + NewOpc = ARM::tPUSH; + break; + default: + llvm_unreachable(""); + } + MachineInstrBuilder NewInstr = + BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags()); + for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) + NewInstr.add(MBBI->getOperand(i)); + MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr); + MBB->erase(MBBI); + MBBI = NewMBBI; + } + unsigned SEHOpc = + (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs; + MIB = BuildMI(MF, DL, TII.get(SEHOpc)) + .addImm(Mask) + .addImm(Wide ? 1 : 0) + .setMIFlags(Flags); + break; + } + case ARM::VSTMDDB_UPD: + case ARM::VLDMDIA_UPD: { + int First = -1, Last = 0; + for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) { + const MachineOperand &MO = MBBI->getOperand(i); + unsigned Reg = RegInfo->getSEHRegNum(MO.getReg()); + if (First == -1) + First = Reg; + Last = Reg; + } + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs)) + .addImm(First) + .addImm(Last) + .setMIFlags(Flags); + break; + } + case ARM::tSUBspi: + case ARM::tADDspi: + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc)) + .addImm(MBBI->getOperand(2).getImm() * 4) + .addImm(/*Wide=*/0) + .setMIFlags(Flags); + break; + case ARM::t2SUBspImm: + case ARM::t2SUBspImm12: + case ARM::t2ADDspImm: + case ARM::t2ADDspImm12: + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc)) + .addImm(MBBI->getOperand(2).getImm()) + .addImm(/*Wide=*/1) + .setMIFlags(Flags); + break; + + case ARM::tMOVr: + if (MBBI->getOperand(1).getReg() == ARM::SP && + (Flags & MachineInstr::FrameSetup)) { + unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP)) + .addImm(Reg) + .setMIFlags(Flags); + } else if (MBBI->getOperand(0).getReg() == ARM::SP && + (Flags & MachineInstr::FrameDestroy)) { + unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP)) + .addImm(Reg) + .setMIFlags(Flags); + } else { + report_fatal_error("No SEH Opcode for MOV"); + } + break; + + case ARM::tBX_RET: + case ARM::TCRETURNri: + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret)) + .addImm(/*Wide=*/0) + .setMIFlags(Flags); + break; + + case ARM::TCRETURNdi: + MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret)) + .addImm(/*Wide=*/1) + .setMIFlags(Flags); + break; + } + return MBB->insertAfter(MBBI, MIB); +} + +static MachineBasicBlock::iterator +initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) { + if (MBBI == MBB.begin()) + return MachineBasicBlock::iterator(); + return std::prev(MBBI); +} + +static void insertSEHRange(MachineBasicBlock &MBB, + MachineBasicBlock::iterator Start, + const MachineBasicBlock::iterator &End, + const ARMBaseInstrInfo &TII, unsigned MIFlags) { + if (Start.isValid()) + Start = std::next(Start); + else + Start = MBB.begin(); + + for (auto MI = Start; MI != End;) { + auto Next = std::next(MI); + // Check if this instruction already has got a SEH opcode added. In that + // case, don't do this generic mapping. + if (Next != End && isSEHInstruction(*Next)) { + MI = std::next(Next); + while (MI != End && isSEHInstruction(*MI)) + ++MI; + continue; + } + insertSEH(MI, TII, MIFlags); + MI = Next; + } +} + static void emitRegPlusImmediate( bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, @@ -392,8 +624,7 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction) { - const ARMSubtarget &AST = - static_cast<const ARMSubtarget &>(MF.getSubtarget()); + const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>(); const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops(); const unsigned AlignMask = Alignment.value() - 1U; const unsigned NrBitsToZero = Log2(Alignment); @@ -452,15 +683,23 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, /// Unfortunately we cannot determine this value in determineCalleeSaves() yet /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use /// this to produce a conservative estimate that we check in an assert() later. -static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI) { +static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, + const MachineFunction &MF) { // For Thumb1, push.w isn't available, so the first push will always push // r7 and lr onto the stack first. if (AFI.isThumb1OnlyFunction()) return -AFI.getArgRegsSaveSize() - (2 * 4); // This is a conservative estimation: Assume the frame pointer being r7 and // pc("r15") up to r8 getting spilled before (= 8 registers). - int FPCXTSaveSize = (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0; - return - FPCXTSaveSize - AFI.getArgRegsSaveSize() - (8 * 4); + int MaxRegBytes = 8 * 4; + if (STI.splitFramePointerPush(MF)) { + // Here, r11 can be stored below all of r4-r15 (3 registers more than + // above), plus d8-d15. + MaxRegBytes = 11 * 4 + 8 * 8; + } + int FPCXTSaveSize = + (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0; + return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes; } void ARMFrameLowering::emitPrologue(MachineFunction &MF, @@ -482,6 +721,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, unsigned NumBytes = MFI.getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); int FPCXTSaveSize = 0; + bool NeedsWinCFI = needsWinCFI(MF); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. @@ -510,47 +750,92 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, MachineInstr::FrameSetup); DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true); } - DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP); + if (!NeedsWinCFI) + DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP); + if (NeedsWinCFI && MBBI != MBB.begin()) { + insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd)) + .setMIFlag(MachineInstr::FrameSetup); + MF.setHasWinCFI(true); + } return; } // Determine spill area sizes. - for (const CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: - if (STI.splitFramePushPop(MF)) { + if (STI.splitFramePointerPush(MF)) { + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R11: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; GPRCS2Size += 4; break; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R12: + GPRCS1Size += 4; + break; + case ARM::FPCXTNS: + FPCXTSaveSize = 4; + break; + default: + // This is a DPR. Exclude the aligned DPRCS2 spills. + if (Reg == ARM::D8) + D8SpillFI = FI; + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) + DPRCSSize += 8; + } + } + } else { + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.splitFramePushPop(MF)) { + GPRCS2Size += 4; + break; + } + LLVM_FALLTHROUGH; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + GPRCS1Size += 4; + break; + case ARM::FPCXTNS: + FPCXTSaveSize = 4; + break; + default: + // This is a DPR. Exclude the aligned DPRCS2 spills. + if (Reg == ARM::D8) + D8SpillFI = FI; + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) + DPRCSSize += 8; } - LLVM_FALLTHROUGH; - case ARM::R0: - case ARM::R1: - case ARM::R2: - case ARM::R3: - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - GPRCS1Size += 4; - break; - case ARM::FPCXTNS: - FPCXTSaveSize = 4; - break; - default: - // This is a DPR. Exclude the aligned DPRCS2 spills. - if (Reg == ARM::D8) - D8SpillFI = FI; - if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) - DPRCSSize += 8; } } @@ -585,15 +870,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size; unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4); - unsigned DPRGapSize = - (GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) % - DPRAlign.value(); + unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize; + if (!STI.splitFramePointerPush(MF)) { + DPRGapSize += GPRCS2Size; + } + DPRGapSize %= DPRAlign.value(); - unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; + unsigned DPRCSOffset; + if (STI.splitFramePointerPush(MF)) { + DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize; + GPRCS2Offset = DPRCSOffset - GPRCS2Size; + } else { + DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; + } int FramePtrOffsetInPush = 0; if (HasFP) { int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); - assert(getMaxFPOffset(STI, *AFI) <= FPOffset && + assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset && "Max FP estimation is wrong"); FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize; AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + @@ -604,7 +897,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 2. - if (GPRCS2Size > 0) { + if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) { GPRCS2Push = LastPush = MBBI++; DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); } @@ -644,18 +937,37 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } else NumBytes = DPRCSOffset; + if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) { + GPRCS2Push = LastPush = MBBI++; + DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); + } + + bool NeedsWinCFIStackAlloc = NeedsWinCFI; + if (STI.splitFramePointerPush(MF) && HasFP) + NeedsWinCFIStackAlloc = false; + if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) { uint32_t NumWords = NumBytes >> 2; - if (NumWords < 65536) + if (NumWords < 65536) { BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) .addImm(NumWords) .setMIFlags(MachineInstr::FrameSetup) .add(predOps(ARMCC::AL)); - else - BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4) - .addImm(NumWords) - .setMIFlags(MachineInstr::FrameSetup); + } else { + // Split into two instructions here, instead of using t2MOVi32imm, + // to allow inserting accurate SEH instructions (including accurate + // instruction size for each of them). + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) + .addImm(NumWords & 0xffff) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4) + .addReg(ARM::R4) + .addImm(NumWords >> 16) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); + } switch (TM.getCodeModel()) { case CodeModel::Tiny: @@ -682,12 +994,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, break; } - BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP) - .addReg(ARM::SP, RegState::Kill) - .addReg(ARM::R4, RegState::Kill) - .setMIFlags(MachineInstr::FrameSetup) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); + MachineInstrBuilder Instr, SEH; + Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP) + .addReg(ARM::SP, RegState::Kill) + .addReg(ARM::R4, RegState::Kill) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + if (NeedsWinCFIStackAlloc) { + SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc)) + .addImm(NumBytes) + .addImm(/*Wide=*/1) + .setMIFlags(MachineInstr::FrameSetup); + MBB.insertAfter(Instr, SEH); + } NumBytes = 0; } @@ -720,34 +1040,58 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // into spill area 1, including the FP in R11. In either case, it // is in area one and the adjustment needs to take place just after // that push. + // FIXME: The above is not necessary true when PACBTI is enabled. + // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes, + // so FP ends up on area two. + MachineBasicBlock::iterator AfterPush; if (HasFP) { - MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); + AfterPush = std::next(GPRCS1Push); unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push); - emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, - dl, TII, FramePtr, ARM::SP, - PushSize + FramePtrOffsetInPush, - MachineInstr::FrameSetup); - if (FramePtrOffsetInPush + PushSize != 0) { - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( - nullptr, MRI->getDwarfRegNum(FramePtr, true), - FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush)); - BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); + int FPOffset = PushSize + FramePtrOffsetInPush; + if (STI.splitFramePointerPush(MF)) { + AfterPush = std::next(GPRCS2Push); + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, + FramePtr, ARM::SP, 0, MachineInstr::FrameSetup); } else { - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( - nullptr, MRI->getDwarfRegNum(FramePtr, true))); - BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, + FramePtr, ARM::SP, FPOffset, + MachineInstr::FrameSetup); } + if (!NeedsWinCFI) { + if (FramePtrOffsetInPush + PushSize != 0) { + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( + nullptr, MRI->getDwarfRegNum(FramePtr, true), + FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush)); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } else { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( + nullptr, MRI->getDwarfRegNum(FramePtr, true))); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + } + } + + // Emit a SEH opcode indicating the prologue end. The rest of the prologue + // instructions below don't need to be replayed to unwind the stack. + if (NeedsWinCFI && MBBI != MBB.begin()) { + MachineBasicBlock::iterator End = MBBI; + if (HasFP && STI.splitFramePointerPush(MF)) + End = AfterPush; + insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup); + BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd)) + .setMIFlag(MachineInstr::FrameSetup); + MF.setHasWinCFI(true); } // Now that the prologue's actual instructions are finalised, we can insert // the necessary DWARF cf instructions to describe the situation. Start by // recording where each register ended up: - if (GPRCS1Size > 0) { + if (GPRCS1Size > 0 && !NeedsWinCFI) { MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); int CFIIndex; for (const auto &Entry : CSI) { @@ -781,7 +1125,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } } - if (GPRCS2Size > 0) { + if (GPRCS2Size > 0 && !NeedsWinCFI) { MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); for (const auto &Entry : CSI) { Register Reg = Entry.getReg(); @@ -807,7 +1151,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } } - if (DPRCSSize > 0) { + if (DPRCSSize > 0 && !NeedsWinCFI) { // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. MachineBasicBlock::iterator Pos = std::next(LastPush); @@ -831,7 +1175,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // throughout the process. If we have a frame pointer, it takes over the job // half-way through, so only the first few .cfi_def_cfa_offset instructions // actually get emitted. - DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP); + if (!NeedsWinCFI) + DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP); if (STI.isTargetELF() && hasFP(MF)) MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - @@ -928,7 +1273,14 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + MachineBasicBlock::iterator RangeStart; if (!AFI->hasStackFrame()) { + if (MF.hasWinCFI()) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart)) + .setMIFlag(MachineInstr::FrameDestroy); + RangeStart = initMBBRange(MBB, MBBI); + } + if (NumBytes + IncomingArgStackToRestore != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes + IncomingArgStackToRestore, @@ -944,6 +1296,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, ++MBBI; } + if (MF.hasWinCFI()) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart)) + .setMIFlag(MachineInstr::FrameDestroy); + RangeStart = initMBBRange(MBB, MBBI); + } + // Move SP to start of FP callee save spill area. NumBytes -= (ReservedArgStack + AFI->getFPCXTSaveAreaSize() + @@ -998,6 +1356,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineInstr::FrameDestroy); // Increment past our save areas. + if (AFI->getGPRCalleeSavedArea2Size() && STI.splitFramePointerPush(MF)) + MBBI++; + if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) { MBBI++; // Since vpop register list cannot have gaps, there may be multiple vpop @@ -1012,7 +1373,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineInstr::FrameDestroy); } - if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; + if (AFI->getGPRCalleeSavedArea2Size() && !STI.splitFramePointerPush(MF)) + MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; if (ReservedArgStack || IncomingArgStackToRestore) { @@ -1030,6 +1392,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction()) BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT)); } + + if (MF.hasWinCFI()) { + insertSEHRange(MBB, RangeStart, MBB.end(), TII, MachineInstr::FrameDestroy); + BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd)) + .setMIFlag(MachineInstr::FrameDestroy); + } } /// getFrameIndexReference - Provide a base+offset reference to an FI slot for @@ -1245,7 +1613,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 && - STI.hasV5TOps() && MBB.succ_empty() && !hasPAC) { + STI.hasV5TOps() && MBB.succ_empty() && !hasPAC && + !STI.splitFramePointerPush(MF)) { Reg = ARM::PC; // Fold the return instruction into the LDM. DeleteRet = true; @@ -1609,12 +1978,21 @@ bool ARMFrameLowering::spillCalleeSavedRegisters( .addImm(-4) .add(predOps(ARMCC::AL)); } - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, - MachineInstr::FrameSetup); - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, - MachineInstr::FrameSetup); - emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, - NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + if (STI.splitFramePointerPush(MF)) { + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, + &isSplitFPArea1Register, 0, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, + NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, + &isSplitFPArea2Register, 0, MachineInstr::FrameSetup); + } else { + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, + 0, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, + 0, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, + NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + } // The code above does not insert spill code for the aligned DPRCS2 registers. // The stack realignment code will be inserted between the push instructions @@ -1642,14 +2020,24 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters( emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; - unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; + unsigned LdrOpc = + AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; unsigned FltOpc = ARM::VLDMDIA_UPD; - emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, - NumAlignedDPRCS2Regs); - emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea2Register, 0); - emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea1Register, 0); + if (STI.splitFramePointerPush(MF)) { + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isSplitFPArea2Register, 0); + emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, + NumAlignedDPRCS2Regs); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isSplitFPArea1Register, 0); + } else { + emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, + NumAlignedDPRCS2Regs); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isARMArea2Register, 0); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isARMArea1Register, 0); + } return true; } @@ -1768,7 +2156,7 @@ checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) { return; // We are planning to use NEON instructions vst1 / vld1. - if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON()) + if (!MF.getSubtarget<ARMSubtarget>().hasNEON()) return; // Don't bother if the default stack alignment is sufficiently high. @@ -1818,6 +2206,34 @@ bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { return true; } +static bool requiresAAPCSFrameRecord(const MachineFunction &MF) { + const auto &Subtarget = MF.getSubtarget<ARMSubtarget>(); + return Subtarget.createAAPCSFrameChainLeaf() || + (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls()); +} + +// Thumb1 may require a spill when storing to a frame index through FP, for +// cases where FP is a high register (R11). This scans the function for cases +// where this may happen. +static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, + const TargetFrameLowering &TFI) { + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (!AFI->isThumb1OnlyFunction()) + return false; + + for (const auto &MBB : MF) + for (const auto &MI : MBB) + if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi) + for (const auto &Op : MI.operands()) + if (Op.isFI()) { + Register Reg; + TFI.getFrameIndexReference(MF, Op.getIndex(), Reg); + if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP) + return true; + } + return false; +} + void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { @@ -1826,7 +2242,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // to take advantage the eliminateFrameIndex machinery. This also ensures it // is spilled in the order specified by getCalleeSavedRegs() to make it easier // to combine multiple loads / stores. - bool CanEliminateFrame = true; + bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)); bool CS1Spilled = false; bool LRSpilled = false; unsigned NumGPRSpills = 0; @@ -2021,6 +2437,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // Functions with VLAs or extremely large call frames are rare, and // if a function is allocating more than 1KB of stack, an extra 4-byte // slot probably isn't relevant. + // + // A special case is the scenario where r11 is used as FP, where accesses + // to a frame index will require its value to be moved into a low reg. + // This is handled later on, once we are able to determine if we have any + // fp-relative accesses. if (RegInfo->hasBasePointer(MF)) EstimatedRSStackSizeLimit = (1U << 5) * 4; else @@ -2049,7 +2470,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // // We could do slightly better on Thumb1; in some cases, an sp-relative // offset would be legal even though an fp-relative offset is not. - int MaxFPOffset = getMaxFPOffset(STI, *AFI); + int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF); bool HasLargeArgumentList = HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit; @@ -2067,7 +2488,9 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(FramePtr); // If the frame pointer is required by the ABI, also spill LR so that we // emit a complete frame record. - if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) { + if ((requiresAAPCSFrameRecord(MF) || + MF.getTarget().Options.DisableFramePointerElim(MF)) && + !LRSpilled) { SavedRegs.set(ARM::LR); LRSpilled = true; NumGPRSpills++; @@ -2149,7 +2572,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } // r7 can be used if it is not being used as the frame pointer. - if (!HasFP) { + if (!HasFP || FramePtr != ARM::R7) { if (SavedRegs.test(ARM::R7)) { --RegDeficit; LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = " @@ -2270,8 +2693,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // to materialize a stack offset. If so, either spill one additional // callee-saved register or reserve a special spill slot to facilitate // register scavenging. Thumb1 needs a spill slot for stack pointer - // adjustments also, even when the frame itself is small. - if (BigFrameOffsets && !ExtraCSSpill) { + // adjustments and for frame index accesses when FP is high register, + // even when the frame itself is small. + if (!ExtraCSSpill && + (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this))) { // If any non-reserved CS register isn't spilled, just spill one or two // extra. That should take care of it! unsigned NumExtras = TargetAlign.value() / 4; @@ -2488,6 +2913,7 @@ void ARMFrameLowering::adjustForSegmentedStacks( unsigned CFIIndex; const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>(); bool Thumb = ST->isThumb(); + bool Thumb2 = ST->isThumb2(); // Sadly, this currently doesn't support varargs, platforms other than // android/linux. Note that thumb1/thumb2 are support for android/linux. @@ -2505,19 +2931,10 @@ void ARMFrameLowering::adjustForSegmentedStacks( ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL; - uint64_t StackSize = MFI.getStackSize(); - - // Do not generate a prologue for leaf functions with a stack of size zero. - // For non-leaf functions we have to allow for the possibility that the - // callis to a non-split function, as in PR37807. This function could also - // take the address of a non-split function. When the linker tries to adjust - // its non-existent prologue, it would fail with an error. Mark the object - // file so that such failures are not errors. See this Go language bug-report - // https://go-review.googlesource.com/c/go/+/148819/ - if (StackSize == 0 && !MFI.hasTailCall()) { - MF.getMMI().setHasNosplitStack(true); + if (!MFI.needsSplitStackProlog()) return; - } + + uint64_t StackSize = MFI.getStackSize(); // Use R4 and R5 as scratch registers. // We save R4 and R5 before use and restore them before leaving the function. @@ -2570,8 +2987,9 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Make sure the LiveIns are still sorted and unique. MBB->sortUniqueLiveIns(); // Replace the edges to PrologueMBB by edges to the sequences - // we are about to add. - MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]); + // we are about to add, but only update for immediate predecessors. + if (MBB->isSuccessor(&PrologueMBB)) + MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]); } // The required stack size that is aligned to ARM constant criterion. @@ -2604,17 +3022,19 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Emit the relevant DWARF information about the change in stack pointer as // well as where to find both r4 and r5 (the callee-save registers) - CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8)); - BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4)); - BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8)); - BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) { + CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8)); + BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4)); + BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8)); + BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } // mov SR1, sp if (Thumb) { @@ -2630,17 +3050,46 @@ void ARMFrameLowering::adjustForSegmentedStacks( // sub SR1, sp, #StackSize if (!CompareStackPointer && Thumb) { - BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1) - .add(condCodeOp()) - .addReg(ScratchReg1) - .addImm(AlignedStackSize) - .add(predOps(ARMCC::AL)); + if (AlignedStackSize < 256) { + BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1) + .add(condCodeOp()) + .addReg(ScratchReg1) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)); + } else { + if (Thumb2) { + BuildMI(McrMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0) + .addImm(AlignedStackSize); + } else { + auto MBBI = McrMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0, + AlignedStackSize); + } + BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1) + .add(condCodeOp()) + .addReg(ScratchReg1) + .addReg(ScratchReg0) + .add(predOps(ARMCC::AL)); + } } else if (!CompareStackPointer) { - BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) - .addReg(ARM::SP) - .addImm(AlignedStackSize) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); + if (AlignedStackSize < 256) { + BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) + .addReg(ARM::SP) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } else { + auto MBBI = McrMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0, + AlignedStackSize); + BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1) + .addReg(ARM::SP) + .addReg(ScratchReg0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } } if (Thumb && ST->isThumb1Only()) { @@ -2707,28 +3156,69 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Pass first argument for the __morestack by Scratch Register #0. // The amount size of stack required if (Thumb) { - BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0) - .add(condCodeOp()) - .addImm(AlignedStackSize) - .add(predOps(ARMCC::AL)); + if (AlignedStackSize < 256) { + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0) + .add(condCodeOp()) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)); + } else { + if (Thumb2) { + BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0) + .addImm(AlignedStackSize); + } else { + auto MBBI = AllocMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0, + AlignedStackSize); + } + } } else { - BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) - .addImm(AlignedStackSize) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); + if (AlignedStackSize < 256) { + BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } else { + auto MBBI = AllocMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0, + AlignedStackSize); + } } + // Pass second argument for the __morestack by Scratch Register #1. // The amount size of stack consumed to save function arguments. if (Thumb) { - BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1) - .add(condCodeOp()) - .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) - .add(predOps(ARMCC::AL)); + if (ARMFI->getArgumentStackSize() < 256) { + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1) + .add(condCodeOp()) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) + .add(predOps(ARMCC::AL)); + } else { + if (Thumb2) { + BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg1) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())); + } else { + auto MBBI = AllocMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool( + *AllocMBB, MBBI, DL, ScratchReg1, 0, + alignToARMConstant(ARMFI->getArgumentStackSize())); + } + } } else { - BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) - .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); + if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) { + BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } else { + auto MBBI = AllocMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool( + *AllocMBB, MBBI, DL, ScratchReg1, 0, + alignToARMConstant(ARMFI->getArgumentStackSize())); + } } // push {lr} - Save return address of this function. @@ -2746,13 +3236,15 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Emit the DWARF info about the change in stack as well as where to find the // previous link register - CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12)); - BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) { + CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12)); + BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12)); - BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } // Call __morestack(). if (Thumb) { @@ -2808,9 +3300,11 @@ void ARMFrameLowering::adjustForSegmentedStacks( } // Update the CFA offset now that we've popped - CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0)); - BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) { + CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0)); + BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } // Return from this function. BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL)); @@ -2832,20 +3326,22 @@ void ARMFrameLowering::adjustForSegmentedStacks( } // Update the CFA offset now that we've popped - CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0)); - BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - // Tell debuggers that r4 and r5 are now the same as they were in the - // previous function, that they're the "Same Value". - CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue( - nullptr, MRI->getDwarfRegNum(ScratchReg0, true))); - BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue( - nullptr, MRI->getDwarfRegNum(ScratchReg1, true))); - BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) { + CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0)); + BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Tell debuggers that r4 and r5 are now the same as they were in the + // previous function, that they're the "Same Value". + CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue( + nullptr, MRI->getDwarfRegNum(ScratchReg0, true))); + BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue( + nullptr, MRI->getDwarfRegNum(ScratchReg1, true))); + BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } // Organizing MBB lists PostStackMBB->addSuccessor(&PrologueMBB); diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h index 9822e2321bb4..16f2ce6bea6f 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -46,6 +46,7 @@ public: bool enableCalleeSaveSkip(const MachineFunction &MF) const override; bool hasFP(const MachineFunction &MF) const override; + bool isFPReserved(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const override; bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, diff --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp index 0d201a67af46..9b26aac6c0b7 100644 --- a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -11,6 +11,8 @@ #include "ARMBaseRegisterInfo.h" #include "ARMSubtarget.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetRegisterInfo.h" diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 98c8133282a2..e0e4ffd90e0e 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1058,15 +1058,15 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { // This case occurs only for VLD1-lane/dup and VST1-lane instructions. // The maximum alignment is equal to the memory size being referenced. - unsigned MMOAlign = MemN->getAlignment(); + llvm::Align MMOAlign = MemN->getAlign(); unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; - if (MMOAlign >= MemSize && MemSize > 1) + if (MMOAlign.value() >= MemSize && MemSize > 1) Alignment = MemSize; } else { // All other uses of addrmode6 are for intrinsics. For now just record // the raw alignment value; it will be refined later based on the legal // alignment operands for the intrinsic. - Alignment = MemN->getAlignment(); + Alignment = MemN->getAlign().value(); } Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); @@ -3464,40 +3464,39 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { return false; } -/// Target-specific DAG combining for ISD::XOR. +/// Target-specific DAG combining for ISD::SUB. /// Target-independent combining lowers SELECT_CC nodes of the form /// select_cc setg[ge] X, 0, X, -X /// select_cc setgt X, -1, X, -X /// select_cc setl[te] X, 0, -X, X /// select_cc setlt X, 1, -X, X /// which represent Integer ABS into: -/// Y = sra (X, size(X)-1); xor (add (X, Y), Y) +/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y) /// ARM instruction selection detects the latter and matches it to /// ARM::ABS or ARM::t2ABS machine node. bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ - SDValue XORSrc0 = N->getOperand(0); - SDValue XORSrc1 = N->getOperand(1); + SDValue SUBSrc0 = N->getOperand(0); + SDValue SUBSrc1 = N->getOperand(1); EVT VT = N->getValueType(0); if (Subtarget->isThumb1Only()) return false; - if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) + if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA) return false; - SDValue ADDSrc0 = XORSrc0.getOperand(0); - SDValue ADDSrc1 = XORSrc0.getOperand(1); - SDValue SRASrc0 = XORSrc1.getOperand(0); - SDValue SRASrc1 = XORSrc1.getOperand(1); + SDValue XORSrc0 = SUBSrc0.getOperand(0); + SDValue XORSrc1 = SUBSrc0.getOperand(1); + SDValue SRASrc0 = SUBSrc1.getOperand(0); + SDValue SRASrc1 = SUBSrc1.getOperand(1); ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); EVT XType = SRASrc0.getValueType(); unsigned Size = XType.getSizeInBits() - 1; - if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && - XType.isInteger() && SRAConstant != nullptr && - Size == SRAConstant->getZExtValue()) { + if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() && + SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; - CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); + CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0); return true; } @@ -3673,8 +3672,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) { if (tryInlineAsm(N)) return; break; - case ISD::XOR: - // Select special operations if XOR node forms integer ABS pattern + case ISD::SUB: + // Select special operations if SUB node forms integer ABS pattern if (tryABSOp(N)) return; // Other cases are autogenerated. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 1b41427a1cab..85e32c08c74c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -273,6 +273,10 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::USUBSAT, VT, Legal); setOperationAction(ISD::ABDS, VT, Legal); setOperationAction(ISD::ABDU, VT, Legal); + setOperationAction(ISD::AVGFLOORS, VT, Legal); + setOperationAction(ISD::AVGFLOORU, VT, Legal); + setOperationAction(ISD::AVGCEILS, VT, Legal); + setOperationAction(ISD::AVGCEILU, VT, Legal); // No native support for these. setOperationAction(ISD::UDIV, VT, Expand); @@ -392,6 +396,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); @@ -476,7 +481,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && - !Subtarget->isTargetWatchOS()) { + !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) { bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard; for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID), @@ -809,8 +814,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // Combine low-overhead loop intrinsics so that we can lower i1 types. if (Subtarget->hasLOB()) { - setTargetDAGCombine(ISD::BRCOND); - setTargetDAGCombine(ISD::BR_CC); + setTargetDAGCombine({ISD::BRCOND, ISD::BR_CC}); } if (Subtarget->hasNEON()) { @@ -982,13 +986,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMA, MVT::v4f32, Expand); } - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::FP_TO_SINT); - setTargetDAGCombine(ISD::FP_TO_UINT); - setTargetDAGCombine(ISD::FDIV); - setTargetDAGCombine(ISD::LOAD); + setTargetDAGCombine({ISD::SHL, ISD::SRL, ISD::SRA, ISD::FP_TO_SINT, + ISD::FP_TO_UINT, ISD::FDIV, ISD::LOAD}); // It is legal to extload from v4i8 to v4i16 or v4i32. for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, @@ -1002,32 +1001,17 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) { - setTargetDAGCombine(ISD::BUILD_VECTOR); - setTargetDAGCombine(ISD::VECTOR_SHUFFLE); - setTargetDAGCombine(ISD::INSERT_SUBVECTOR); - setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); - setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); - setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); - setTargetDAGCombine(ISD::STORE); - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::ANY_EXTEND); - setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); - setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); - setTargetDAGCombine(ISD::INTRINSIC_VOID); - setTargetDAGCombine(ISD::VECREDUCE_ADD); - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::BITCAST); + setTargetDAGCombine( + {ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE, ISD::INSERT_SUBVECTOR, + ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, + ISD::SIGN_EXTEND_INREG, ISD::STORE, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, + ISD::ANY_EXTEND, ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN, + ISD::INTRINSIC_VOID, ISD::VECREDUCE_ADD, ISD::ADD, ISD::BITCAST}); } if (Subtarget->hasMVEIntegerOps()) { - setTargetDAGCombine(ISD::SMIN); - setTargetDAGCombine(ISD::UMIN); - setTargetDAGCombine(ISD::SMAX); - setTargetDAGCombine(ISD::UMAX); - setTargetDAGCombine(ISD::FP_EXTEND); - setTargetDAGCombine(ISD::SELECT); - setTargetDAGCombine(ISD::SELECT_CC); - setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX, + ISD::FP_EXTEND, ISD::SELECT, ISD::SELECT_CC, + ISD::SETCC}); } if (Subtarget->hasMVEFloatOps()) { setTargetDAGCombine(ISD::FADD); @@ -1364,6 +1348,29 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } } + // Compute supported atomic widths. + if (Subtarget->isTargetLinux() || + (!Subtarget->isMClass() && Subtarget->hasV6Ops())) { + // For targets where __sync_* routines are reliably available, we use them + // if necessary. + // + // ARM Linux always supports 64-bit atomics through kernel-assisted atomic + // routines (kernel 3.1 or later). FIXME: Not with compiler-rt? + // + // ARMv6 targets have native instructions in ARM mode. For Thumb mode, + // such targets should provide __sync_* routines, which use the ARM mode + // instructions. (ARMv6 doesn't have dmb, but it has an equivalent + // encoding; see ARMISD::MEMBARRIER_MCR.) + setMaxAtomicSizeInBitsSupported(64); + } else if (Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) { + // Cortex-M (besides Cortex-M0) have 32-bit atomics. + setMaxAtomicSizeInBitsSupported(32); + } else { + // We can't assume anything about other targets; just use libatomic + // routines. + setMaxAtomicSizeInBitsSupported(0); + } + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. @@ -1545,12 +1552,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // We have target-specific dag combine patterns for the following nodes: // ARMISD::VMOVRRD - No need to call setTargetDAGCombine - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::SUB); - setTargetDAGCombine(ISD::MUL); - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::XOR); + setTargetDAGCombine( + {ISD::ADD, ISD::SUB, ISD::MUL, ISD::AND, ISD::OR, ISD::XOR}); if (Subtarget->hasMVEIntegerOps()) setTargetDAGCombine(ISD::VSELECT); @@ -1559,6 +1562,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SRL); if (Subtarget->isThumb1Only()) setTargetDAGCombine(ISD::SHL); + // Attempt to lower smin/smax to ssat/usat + if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || + Subtarget->isThumb2()) { + setTargetDAGCombine({ISD::SMIN, ISD::SMAX}); + } setStackPointerRegisterToSaveRestore(ARM::SP); @@ -1901,13 +1909,14 @@ ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { // source/dest is aligned and the copy size is large enough. We therefore want // to align such objects passed to memory intrinsics. bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, - unsigned &PrefAlign) const { + Align &PrefAlign) const { if (!isa<MemIntrinsic>(CI)) return false; MinSize = 8; // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 // cycle faster than 4-byte aligned LDM. - PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4); + PrefAlign = + (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4)); return true; } @@ -2326,7 +2335,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Lower 'returns_twice' calls to a pseudo-instruction. if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && - !Subtarget->getNoBTIAtReturnTwice()) + !Subtarget->noBTIAtReturnTwice()) GuardWithBTI = AFI->branchTargetEnforcement(); // Determine whether this is a non-secure function call. @@ -2778,25 +2787,23 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - if (!isTailCall) { - const uint32_t *Mask; - const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); - if (isThisReturn) { - // For 'this' returns, use the R0-preserving mask if applicable - Mask = ARI->getThisReturnPreservedMask(MF, CallConv); - if (!Mask) { - // Set isThisReturn to false if the calling convention is not one that - // allows 'returned' to be modeled in this way, so LowerCallResult does - // not try to pass 'this' straight through - isThisReturn = false; - Mask = ARI->getCallPreservedMask(MF, CallConv); - } - } else + const uint32_t *Mask; + const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); + if (isThisReturn) { + // For 'this' returns, use the R0-preserving mask if applicable + Mask = ARI->getThisReturnPreservedMask(MF, CallConv); + if (!Mask) { + // Set isThisReturn to false if the calling convention is not one that + // allows 'returned' to be modeled in this way, so LowerCallResult does + // not try to pass 'this' straight through + isThisReturn = false; Mask = ARI->getCallPreservedMask(MF, CallConv); + } + } else + Mask = ARI->getCallPreservedMask(MF, CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); - } + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); if (InFlag.getNode()) Ops.push_back(InFlag); @@ -4379,7 +4386,7 @@ void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, bool ARMTargetLowering::splitValueIntoRegisterParts( SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const { - bool IsABIRegCopy = CC.hasValue(); + bool IsABIRegCopy = CC.has_value(); EVT ValueVT = Val.getValueType(); if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) { @@ -4397,7 +4404,7 @@ bool ARMTargetLowering::splitValueIntoRegisterParts( SDValue ARMTargetLowering::joinRegisterPartsIntoValue( SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const { - bool IsABIRegCopy = CC.hasValue(); + bool IsABIRegCopy = CC.has_value(); if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) { unsigned ValueBits = ValueVT.getSizeInBits(); @@ -5547,7 +5554,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), Ld->getAlignment(), + Ld->getPointerInfo(), Ld->getAlign(), Ld->getMemOperand()->getFlags()); llvm_unreachable("Unknown VFP cmp argument!"); @@ -5567,14 +5574,14 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue Ptr = Ld->getBasePtr(); RetVal1 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), - Ld->getAlignment(), Ld->getMemOperand()->getFlags()); + Ld->getAlign(), Ld->getMemOperand()->getFlags()); EVT PtrType = Ptr.getValueType(); - unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); SDValue NewPtr = DAG.getNode(ISD::ADD, dl, PtrType, Ptr, DAG.getConstant(4, dl, PtrType)); RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr, - Ld->getPointerInfo().getWithOffset(4), NewAlign, + Ld->getPointerInfo().getWithOffset(4), + commonAlignment(Ld->getAlign(), 4), Ld->getMemOperand()->getFlags()); return; } @@ -5801,8 +5808,7 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { return DAG.UnrollVectorOp(Op.getNode()); } - const bool HasFullFP16 = - static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16(); + const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16(); EVT NewTy; const EVT OpTy = Op.getOperand(0).getValueType(); @@ -5912,8 +5918,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!"); - const bool HasFullFP16 = - static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16(); + const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16(); EVT DestVecType; if (VT == MVT::v4f32) @@ -9359,15 +9364,15 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { // The load already has the right type. if (ExtendedTy == LD->getMemoryVT()) return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), - LD->getBasePtr(), LD->getPointerInfo(), - LD->getAlignment(), LD->getMemOperand()->getFlags()); + LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(), + LD->getMemOperand()->getFlags()); // We need to create a zextload/sextload. We cannot just create a load // followed by a zext/zext node because LowerMUL is also run during normal // operation legalization where we can't create illegal types. return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), - LD->getMemoryVT(), LD->getAlignment(), + LD->getMemoryVT(), LD->getAlign(), LD->getMemOperand()->getFlags()); } @@ -9876,7 +9881,7 @@ ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, if (N->getOpcode() != ISD::SDIV) return SDValue(); - const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget()); + const auto &ST = DAG.getSubtarget<ARMSubtarget>(); const bool MinSize = ST.hasMinSize(); const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode() : ST.hasDivideInARMMode(); @@ -10311,6 +10316,15 @@ SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues({Result, Chain}, dl); } +SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + + EVT VT = getPointerTy(DAG.getDataLayout()); + SDLoc DL(Op); + int FI = MFI.CreateFixedObject(4, 0, false); + return DAG.getFrameIndex(FI, VT); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump()); switch (Op.getOpcode()) { @@ -10424,6 +10438,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG); + case ISD::SPONENTRY: + return LowerSPONENTRY(Op, DAG); case ARMISD::WIN__DBZCHK: return SDValue(); } } @@ -10509,9 +10525,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, return; case ISD::INTRINSIC_WO_CHAIN: return ReplaceLongIntrinsic(N, Results, DAG); - case ISD::ABS: - lowerABS(N, Results, DAG); - return ; case ISD::LOAD: LowerLOAD(N, Results, DAG); break; @@ -12170,7 +12183,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, if (Subtarget->isThumb1Only()) { for (unsigned c = MCID->getNumOperands() - 4; c--;) { MI.addOperand(MI.getOperand(1)); - MI.RemoveOperand(1); + MI.removeOperand(1); } // Restore the ties @@ -12208,7 +12221,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, definesCPSR = true; if (MO.isDead()) deadCPSR = true; - MI.RemoveOperand(i); + MI.removeOperand(i); break; } } @@ -14775,14 +14788,14 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, SDValue BasePtr = LD->getBasePtr(); SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(), - LD->getAlignment(), LD->getMemOperand()->getFlags()); + LD->getAlign(), LD->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, DL, MVT::i32)); SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr, LD->getPointerInfo().getWithOffset(4), - std::min(4U, LD->getAlignment()), + commonAlignment(LD->getAlign(), 4), LD->getMemOperand()->getFlags()); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); @@ -15352,6 +15365,10 @@ static SDValue FlattenVectorShuffle(ShuffleVectorSDNode *N, SelectionDAG &DAG) { case ISD::MULHU: case ISD::ABDS: case ISD::ABDU: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: + case ISD::AVGCEILS: + case ISD::AVGCEILU: break; default: return SDValue(); @@ -15721,7 +15738,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, // Now, create a _UPD node, taking care of not breaking alignment. EVT AlignedVecTy = VecTy; - unsigned Alignment = MemN->getAlignment(); + Align Alignment = MemN->getAlign(); // If this is a less-than-standard-aligned load/store, change the type to // match the standard alignment. @@ -15738,10 +15755,8 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, // memory type to match the explicit alignment. That way, we don't // generate non-standard-aligned ARMISD::VLDx nodes. if (isa<LSBaseSDNode>(N)) { - if (Alignment == 0) - Alignment = 1; - if (Alignment < VecTy.getScalarSizeInBits() / 8) { - MVT EltTy = MVT::getIntegerVT(Alignment * 8); + if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) { + MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8); assert(NumVecs == 1 && "Unexpected multi-element generic load/store."); assert(!isLaneOp && "Unexpected generic load/store lane."); unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8); @@ -15754,7 +15769,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, // alignment of the memory type. // Intrinsics, however, always get an explicit alignment, set to the // alignment of the MMO. - Alignment = 1; + Alignment = Align(1); } // Create the new updating load/store node. @@ -15787,7 +15802,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, } // For all node types, the alignment operand is always the last one. - Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32)); + Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32)); // If this is a non-standard-aligned STORE, the penultimate operand is the // stored value. Bitcast it to the aligned type. @@ -15965,10 +15980,10 @@ static SDValue CombineBaseUpdate(SDNode *N, // Try to fold with other users. Non-constant updates are considered // first, and constant updates are sorted to not break a sequence of // strided accesses (if there is any). - std::sort(BaseUpdates.begin(), BaseUpdates.end(), - [](BaseUpdateUser &LHS, BaseUpdateUser &RHS) { - return LHS.ConstInc < RHS.ConstInc; - }); + std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(), + [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) { + return LHS.ConstInc < RHS.ConstInc; + }); for (BaseUpdateUser &User : BaseUpdates) { if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI)) return SDValue(); @@ -16258,7 +16273,7 @@ static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, if (LD && Op.hasOneUse() && LD->isUnindexed() && LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) { SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1), - DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32)}; + DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)}; SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other); SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops, @@ -16360,7 +16375,7 @@ static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, ShuffWide, DAG.getIntPtrConstant(I, DL)); SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(), - St->getAlignment(), St->getMemOperand()->getFlags()); + St->getAlign(), St->getMemOperand()->getFlags()); BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment); Chains.push_back(Ch); @@ -16608,7 +16623,7 @@ static SDValue PerformSTORECombine(SDNode *N, DCI.AddToWorklist(ExtElt.getNode()); DCI.AddToWorklist(V.getNode()); return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), - St->getPointerInfo(), St->getAlignment(), + St->getPointerInfo(), St->getAlign(), St->getMemOperand()->getFlags(), St->getAAInfo()); } @@ -16690,14 +16705,16 @@ static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); SDLoc DL(N); - // The identity element for a fadd is -0.0, which these VMOV's represent. - auto isNegativeZeroSplat = [&](SDValue Op) { + // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set, + // which these VMOV's represent. + auto isIdentitySplat = [&](SDValue Op, bool NSZ) { if (Op.getOpcode() != ISD::BITCAST || Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM) return false; - if (VT == MVT::v4f32 && Op.getOperand(0).getConstantOperandVal(0) == 1664) + uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0); + if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ))) return true; - if (VT == MVT::v8f16 && Op.getOperand(0).getConstantOperandVal(0) == 2688) + if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ))) return true; return false; }; @@ -16705,12 +16722,17 @@ static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT) std::swap(Op0, Op1); - if (Op1.getOpcode() != ISD::VSELECT || - !isNegativeZeroSplat(Op1.getOperand(2))) + if (Op1.getOpcode() != ISD::VSELECT) + return SDValue(); + + SDNodeFlags FaddFlags = N->getFlags(); + bool NSZ = FaddFlags.hasNoSignedZeros(); + if (!isIdentitySplat(Op1.getOperand(2), NSZ)) return SDValue(); + SDValue FAdd = - DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), N->getFlags()); - return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0); + DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags); + return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags); } /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) @@ -17060,13 +17082,10 @@ static SDValue PerformVMOVNCombine(SDNode *N, IsTop ? Op1DemandedElts : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1)); - APInt KnownUndef, KnownZero; const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo(); - if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, KnownUndef, - KnownZero, DCI)) + if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI)) return SDValue(N, 0); - if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, KnownUndef, - KnownZero, DCI)) + if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI)) return SDValue(N, 0); return SDValue(); @@ -17082,10 +17101,8 @@ static SDValue PerformVQMOVNCombine(SDNode *N, APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1) : APInt::getHighBitsSet(2, 1)); - APInt KnownUndef, KnownZero; const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo(); - if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, KnownUndef, - KnownZero, DCI)) + if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI)) return SDValue(N, 0); return SDValue(); } @@ -17390,7 +17407,7 @@ static SDValue PerformShiftCombine(SDNode *N, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!VT.isVector() || !TLI.isTypeLegal(VT)) return SDValue(); - if (ST->hasMVEIntegerOps() && VT == MVT::v2i64) + if (ST->hasMVEIntegerOps()) return SDValue(); int64_t Cnt; @@ -17556,12 +17573,57 @@ static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating +// constant bounds. +static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) && + !Subtarget->isThumb2()) + return SDValue(); + + EVT VT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + + if (VT != MVT::i32 || + (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) || + !isa<ConstantSDNode>(Op.getOperand(1)) || + !isa<ConstantSDNode>(Op0.getOperand(1))) + return SDValue(); + + SDValue Min = Op; + SDValue Max = Op0; + SDValue Input = Op0.getOperand(0); + if (Min.getOpcode() == ISD::SMAX) + std::swap(Min, Max); + + APInt MinC = Min.getConstantOperandAPInt(1); + APInt MaxC = Max.getConstantOperandAPInt(1); + + if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX || + !(MinC + 1).isPowerOf2()) + return SDValue(); + + SDLoc DL(Op); + if (MinC == ~MaxC) + return DAG.getNode(ARMISD::SSAT, DL, VT, Input, + DAG.getConstant(MinC.countTrailingOnes(), DL, VT)); + if (MaxC == 0) + return DAG.getNode(ARMISD::USAT, DL, VT, Input, + DAG.getConstant(MinC.countTrailingOnes(), DL, VT)); + + return SDValue(); +} + /// PerformMinMaxCombine - Target-specific DAG combining for creating truncating /// saturates. static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); + + if (VT == MVT::i32) + return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST); + if (!ST->hasMVEIntegerOps()) return SDValue(); @@ -19354,8 +19416,8 @@ bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { // Return false to prevent folding // (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine, // if the folding leads to worse code. -bool ARMTargetLowering::isMulAddWithConstProfitable( - const SDValue &AddNode, const SDValue &ConstNode) const { +bool ARMTargetLowering::isMulAddWithConstProfitable(SDValue AddNode, + SDValue ConstNode) const { // Let the DAGCombiner decide for vector types and large types. const EVT VT = AddNode.getValueType(); if (VT.isVector() || VT.getScalarSizeInBits() > 32) @@ -20537,38 +20599,6 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result; } -void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const { - assert(N->getValueType(0) == MVT::i64 && "Unexpected type (!= i64) on ABS."); - MVT HalfT = MVT::i32; - SDLoc dl(N); - SDValue Hi, Lo, Tmp; - - if (!isOperationLegalOrCustom(ISD::ADDCARRY, HalfT) || - !isOperationLegalOrCustom(ISD::UADDO, HalfT)) - return ; - - unsigned OpTypeBits = HalfT.getScalarSizeInBits(); - SDVTList VTList = DAG.getVTList(HalfT, MVT::i1); - - Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), - DAG.getConstant(0, dl, HalfT)); - Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), - DAG.getConstant(1, dl, HalfT)); - - Tmp = DAG.getNode(ISD::SRA, dl, HalfT, Hi, - DAG.getConstant(OpTypeBits - 1, dl, - getShiftAmountTy(HalfT, DAG.getDataLayout()))); - Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo); - Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi, - SDValue(Lo.getNode(), 1)); - Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi); - Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo); - - Results.push_back(Lo); - Results.push_back(Hi); -} - bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -20787,24 +20817,24 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); - PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); + Type *ValTy = I.getParamElementType(0); Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(PtrTy->getPointerElementType()); + Info.memVT = MVT::getVT(ValTy); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType()); + Info.align = DL.getABITypeAlign(ValTy); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; } case Intrinsic::arm_stlex: case Intrinsic::arm_strex: { auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); - PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); + Type *ValTy = I.getParamElementType(1); Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(PtrTy->getPointerElementType()); + Info.memVT = MVT::getVT(ValTy); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; - Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType()); + Info.align = DL.getABITypeAlign(ValTy); Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; } @@ -20932,9 +20962,19 @@ Instruction *ARMTargetLowering::emitTrailingFence(IRBuilderBase &Builder, // are doomed anyway, so defer to the default libcall and blame the OS when // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit // anything for those. -bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { +TargetLoweringBase::AtomicExpansionKind +ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { + bool has64BitAtomicStore; + if (Subtarget->isMClass()) + has64BitAtomicStore = false; + else if (Subtarget->isThumb()) + has64BitAtomicStore = Subtarget->hasV7Ops(); + else + has64BitAtomicStore = Subtarget->hasV6Ops(); + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); - return (Size == 64) && !Subtarget->isMClass(); + return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand + : AtomicExpansionKind::None; } // Loads and stores less than 64-bits are already atomic; ones above that @@ -20946,9 +20986,17 @@ bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // sections A8.8.72-74 LDRD) TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { + bool has64BitAtomicLoad; + if (Subtarget->isMClass()) + has64BitAtomicLoad = false; + else if (Subtarget->isThumb()) + has64BitAtomicLoad = Subtarget->hasV7Ops(); + else + has64BitAtomicLoad = Subtarget->hasV6Ops(); + unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly - : AtomicExpansionKind::None; + return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly + : AtomicExpansionKind::None; } // For the real atomic operations, we have ldrex/strex up to 32 bits, @@ -20958,19 +21006,25 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { if (AI->isFloatingPointOperation()) return AtomicExpansionKind::CmpXChg; - // At -O0, fast-regalloc cannot cope with the live vregs necessary to - // implement atomicrmw without spilling. If the target address is also on the - // stack and close enough to the spill slot, this can lead to a situation - // where the monitor always gets cleared and the atomic operation can never - // succeed. So at -O0 lower this operation to a CAS loop. - if (getTargetMachine().getOptLevel() == CodeGenOpt::None) - return AtomicExpansionKind::CmpXChg; - unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); - return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) - ? AtomicExpansionKind::LLSC - : AtomicExpansionKind::None; + bool hasAtomicRMW; + if (Subtarget->isMClass()) + hasAtomicRMW = Subtarget->hasV8MBaselineOps(); + else if (Subtarget->isThumb()) + hasAtomicRMW = Subtarget->hasV7Ops(); + else + hasAtomicRMW = Subtarget->hasV6Ops(); + if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) { + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement atomicrmw without spilling. If the target address is also on + // the stack and close enough to the spill slot, this can lead to a + // situation where the monitor always gets cleared and the atomic operation + // can never succeed. So at -O0 lower this operation to a CAS loop. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return AtomicExpansionKind::CmpXChg; + return AtomicExpansionKind::LLSC; + } + return AtomicExpansionKind::None; } // Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32 @@ -20983,8 +21037,13 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { // situation where the monitor always gets cleared and the atomic operation // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits(); - bool HasAtomicCmpXchg = - !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); + bool HasAtomicCmpXchg; + if (Subtarget->isMClass()) + HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps(); + else if (Subtarget->isThumb()) + HasAtomicCmpXchg = Subtarget->hasV7Ops(); + else + HasAtomicCmpXchg = Subtarget->hasV6Ops(); if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U)) return AtomicExpansionKind::LLSC; @@ -21099,8 +21158,11 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Type *Tys[] = { Addr->getType() }; Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys); + CallInst *CI = Builder.CreateCall(Ldrex, Addr); - return Builder.CreateTruncOrBitCast(Builder.CreateCall(Ldrex, Addr), ValueTy); + CI->addParamAttr( + 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy)); + return Builder.CreateTruncOrBitCast(CI, ValueTy); } void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance( @@ -21138,10 +21200,13 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilderBase &Builder, Type *Tys[] = { Addr->getType() }; Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); - return Builder.CreateCall( + CallInst *CI = Builder.CreateCall( Strex, {Builder.CreateZExtOrBitCast( Val, Strex->getFunctionType()->getParamType(0)), Addr}); + CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType, + Val->getType())); + return CI; } @@ -21273,7 +21338,7 @@ bool ARMTargetLowering::lowerInterleavedLoad( SmallVector<Value *, 2> Ops; Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr)); - Ops.push_back(Builder.getInt32(LI->getAlignment())); + Ops.push_back(Builder.getInt32(LI->getAlign().value())); return Builder.CreateCall(VldnFunc, Ops, "vldN"); } else { @@ -21443,7 +21508,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, SmallVector<Value *, 6> Ops; Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr)); append_range(Ops, Shuffles); - Ops.push_back(Builder.getInt32(SI->getAlignment())); + Ops.push_back(Builder.getInt32(SI->getAlign().value())); Builder.CreateCall(VstNFunc, Ops); } else { assert((Factor == 2 || Factor == 4) && diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 1c5f8389f57c..10f60ab93ae3 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -581,7 +581,7 @@ class VectorType; getRegClassFor(MVT VT, bool isDivergent = false) const override; bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, - unsigned &PrefAlign) const override; + Align &PrefAlign) const override; /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. @@ -665,7 +665,8 @@ class VectorType; bool shouldInsertFencesForAtomic(const Instruction *I) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; - bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicStoreInIR(StoreInst *SI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; TargetLoweringBase::AtomicExpansionKind @@ -713,8 +714,8 @@ class VectorType; Align Alignment, const DataLayout &DL) const; - bool isMulAddWithConstProfitable(const SDValue &AddNode, - const SDValue &ConstNode) const override; + bool isMulAddWithConstProfitable(SDValue AddNode, + SDValue ConstNode) const override; bool alignLoopsWithOptSize() const override; @@ -845,8 +846,7 @@ class VectorType; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFSETCC(SDValue Op, SelectionDAG &DAG) const; - void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const; + SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td index ff5afd787c82..c9a2d21bec53 100644 --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -1589,9 +1589,9 @@ class VFPXI<dag oops, dag iops, AddrMode am, int sz, } class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> + string opc, string asm, string cstr, list<dag> pattern> : VFPI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin, - opc, asm, "", pattern> { + opc, asm, cstr, pattern> { let PostEncoderMethod = "VFPThumb2PostEncoder"; } @@ -1751,8 +1751,8 @@ class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, // Double precision, unary class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, - string asm, list<dag> pattern> - : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> { + string asm, string cstr, list<dag> pattern> + : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, cstr, pattern> { // Instruction operands. bits<5> Dd; bits<5> Dm; @@ -1804,7 +1804,7 @@ class ADuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> { + : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, "", pattern> { // Instruction operands. bits<5> Dd; bits<5> Dn; @@ -1862,8 +1862,8 @@ class ADbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, // Single precision, unary, predicated class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, - string asm, list<dag> pattern> - : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> { + string asm, string cstr, list<dag> pattern> + : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, cstr, pattern> { // Instruction operands. bits<5> Sd; bits<5> Sm; @@ -1916,14 +1916,14 @@ class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : ASuI<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc, asm, - pattern> { + "", pattern> { list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP]; } // Single precision, binary class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> { + : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, "", pattern> { // Instruction operands. bits<5> Sd; bits<5> Sn; @@ -2000,7 +2000,7 @@ class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, class AHuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> { + : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, "", pattern> { list<Predicate> Predicates = [HasFullFP16]; // Instruction operands. @@ -2056,7 +2056,7 @@ class AHuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, // Half precision, binary class AHbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> { + : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, "", pattern> { list<Predicate> Predicates = [HasFullFP16]; // Instruction operands. @@ -2116,7 +2116,7 @@ class AHbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, pattern> { + : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, "", pattern> { let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{19-16} = opcod3; @@ -2149,7 +2149,7 @@ class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : VFPAI<oops, iops, f, itin, opc, asm, pattern> { + : VFPAI<oops, iops, f, itin, opc, asm, "", pattern> { let Inst{27-20} = opcod1; let Inst{11-8} = opcod2; let Inst{4} = 1; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 32a3911d3369..88bb74d1fc54 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -5129,6 +5129,7 @@ let hasNoSchedulingInfo = 1 in def TSB : AInoP<(outs), (ins tsb_opt:$opt), MiscFrm, NoItinerary, "tsb", "\t$opt", []>, Requires<[IsARM, HasV8_4a]> { let Inst{31-0} = 0xe320f012; + let DecoderMethod = "DecodeTSBInstruction"; } } @@ -6387,7 +6388,7 @@ def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>; // Pre-v6, 'mov r0, r0' was used as a NOP encoding. -def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>, +def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg), 0>, Requires<[IsARM, NoV6]>; // MUL/UMLAL/SMLAL/UMULL/SMULL are available on all arches, but @@ -6415,8 +6416,7 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", // 'it' blocks in ARM mode just validate the predicates. The IT itself // is discarded. -def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>, - ComplexDeprecationPredicate<"IT">; +def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>; let mayLoad = 1, mayStore =1, hasSideEffects = 1, hasNoSchedulingInfo = 1 in def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), @@ -6476,3 +6476,24 @@ def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary, let AsmString = "@ COMPILER BARRIER"; let hasNoSchedulingInfo = 1; } + +//===----------------------------------------------------------------------===// +// Instructions used for emitting unwind opcodes on Windows. +//===----------------------------------------------------------------------===// +let isPseudo = 1 in { + def SEH_StackAlloc : PseudoInst<(outs), (ins i32imm:$size, i32imm:$wide), NoItinerary, []>, Sched<[]>; + def SEH_SaveRegs : PseudoInst<(outs), (ins i32imm:$mask, i32imm:$wide), NoItinerary, []>, Sched<[]>; + let isTerminator = 1 in + def SEH_SaveRegs_Ret : PseudoInst<(outs), (ins i32imm:$mask, i32imm:$wide), NoItinerary, []>, Sched<[]>; + def SEH_SaveSP : PseudoInst<(outs), (ins i32imm:$reg), NoItinerary, []>, Sched<[]>; + def SEH_SaveFRegs : PseudoInst<(outs), (ins i32imm:$first, i32imm:$last), NoItinerary, []>, Sched<[]>; + let isTerminator = 1 in + def SEH_SaveLR : PseudoInst<(outs), (ins i32imm:$offst), NoItinerary, []>, Sched<[]>; + def SEH_Nop : PseudoInst<(outs), (ins i32imm:$wide), NoItinerary, []>, Sched<[]>; + let isTerminator = 1 in + def SEH_Nop_Ret : PseudoInst<(outs), (ins i32imm:$wide), NoItinerary, []>, Sched<[]>; + def SEH_PrologEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; + def SEH_EpilogStart : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; + let isTerminator = 1 in + def SEH_EpilogEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; +} diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 1ae0354ffc37..15c33014e988 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2192,36 +2192,29 @@ def subnsw : PatFrag<(ops node:$lhs, node:$rhs), return N->getFlags().hasNoSignedWrap(); }]>; -multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI, - SDNode unpred_op, Intrinsic pred_int> { +multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI, SDNode Op, + SDNode unpred_op, Intrinsic PredInt> { def "" : MVE_VRHADD_Base<VTI.Suffix, VTI.Unsigned, VTI.Size>; defvar Inst = !cast<Instruction>(NAME); + defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>; let Predicates = [HasMVEInt] in { - // Unpredicated rounding add-with-divide-by-two + // Unpredicated rounding add-with-divide-by-two intrinsic def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned))), (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; - - // Predicated add-with-divide-by-two - def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), - (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask), - (VTI.Vec MQPR:$inactive))), - (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), - ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg, - (VTI.Vec MQPR:$inactive)))>; } } -multiclass MVE_VRHADD<MVEVectorVTInfo VTI> - : MVE_VRHADD_m<VTI, int_arm_mve_vrhadd, int_arm_mve_rhadd_predicated>; +multiclass MVE_VRHADD<MVEVectorVTInfo VTI, SDNode rhadd> + : MVE_VRHADD_m<VTI, rhadd, int_arm_mve_vrhadd, int_arm_mve_rhadd_predicated>; -defm MVE_VRHADDs8 : MVE_VRHADD<MVE_v16s8>; -defm MVE_VRHADDs16 : MVE_VRHADD<MVE_v8s16>; -defm MVE_VRHADDs32 : MVE_VRHADD<MVE_v4s32>; -defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8>; -defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16>; -defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32>; +defm MVE_VRHADDs8 : MVE_VRHADD<MVE_v16s8, avgceils>; +defm MVE_VRHADDs16 : MVE_VRHADD<MVE_v8s16, avgceils>; +defm MVE_VRHADDs32 : MVE_VRHADD<MVE_v4s32, avgceils>; +defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8, avgceilu>; +defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16, avgceilu>; +defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32, avgceilu>; // Rounding Halving Add perform the arithemtic operation with an extra bit of // precision, before performing the shift, to void clipping errors. We're not @@ -2303,11 +2296,12 @@ class MVE_VHSUB_<string suffix, bit U, bits<2> size, list<dag> pattern=[]> : MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>; -multiclass MVE_VHADD_m<MVEVectorVTInfo VTI, - SDNode unpred_op, Intrinsic pred_int, PatFrag add_op, +multiclass MVE_VHADD_m<MVEVectorVTInfo VTI, SDNode Op, + SDNode unpred_op, Intrinsic PredInt, PatFrag add_op, SDNode shift_op> { def "" : MVE_VHADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>; defvar Inst = !cast<Instruction>(NAME); + defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>; let Predicates = [HasMVEInt] in { // Unpredicated add-and-divide-by-two @@ -2316,30 +2310,23 @@ multiclass MVE_VHADD_m<MVEVectorVTInfo VTI, def : Pat<(VTI.Vec (shift_op (add_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)), (i32 1))), (Inst MQPR:$Qm, MQPR:$Qn)>; - - // Predicated add-and-divide-by-two - def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned), - (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), - (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), - ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg, - (VTI.Vec MQPR:$inactive)))>; } } -multiclass MVE_VHADD<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op> - : MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, add_op, +multiclass MVE_VHADD<MVEVectorVTInfo VTI, SDNode Op, PatFrag add_op, SDNode shift_op> + : MVE_VHADD_m<VTI, Op, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, add_op, shift_op>; // Halving add/sub perform the arithemtic operation with an extra bit of // precision, before performing the shift, to void clipping errors. We're not // modelling that here with these patterns, but we're using no wrap forms of // add/sub to ensure that the extra bit of information is not needed. -defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8, addnsw, ARMvshrsImm>; -defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16, addnsw, ARMvshrsImm>; -defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32, addnsw, ARMvshrsImm>; -defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8, addnuw, ARMvshruImm>; -defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16, addnuw, ARMvshruImm>; -defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32, addnuw, ARMvshruImm>; +defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8, avgfloors, addnsw, ARMvshrsImm>; +defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16, avgfloors, addnsw, ARMvshrsImm>; +defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32, avgfloors, addnsw, ARMvshrsImm>; +defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8, avgflooru, addnuw, ARMvshruImm>; +defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16, avgflooru, addnuw, ARMvshruImm>; +defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32, avgflooru, addnuw, ARMvshruImm>; multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI, SDNode unpred_op, Intrinsic pred_int, PatFrag sub_op, @@ -5372,10 +5359,10 @@ class MVE_VxADDSUB_qr<string iname, string suffix, let validForTailPredication = 1; } -multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract, - Intrinsic unpred_int, Intrinsic pred_int, PatFrag add_op, - SDNode shift_op> { +multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract, SDNode Op, + Intrinsic unpred_int, Intrinsic pred_int, PatFrag add_op, PatFrag shift_op> { def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, subtract, VTI.Size>; + defm : MVE_TwoOpPatternDup<VTI, Op, pred_int, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>; defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI, unpred_int, pred_int, 1, 1>; defvar Inst = !cast<Instruction>(NAME); @@ -5386,20 +5373,20 @@ multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract, } } -multiclass MVE_VHADD_qr_m<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op> : - MVE_VHADDSUB_qr_m<"vhadd", VTI, 0b0, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, - add_op, shift_op>; +multiclass MVE_VHADD_qr_m<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op, SDNode Op> : + MVE_VHADDSUB_qr_m<"vhadd", VTI, 0b0, Op, int_arm_mve_vhadd, + int_arm_mve_hadd_predicated, add_op, shift_op>; multiclass MVE_VHSUB_qr_m<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op> : - MVE_VHADDSUB_qr_m<"vhsub", VTI, 0b1, int_arm_mve_vhsub, int_arm_mve_hsub_predicated, - add_op, shift_op>; - -defm MVE_VHADD_qr_s8 : MVE_VHADD_qr_m<MVE_v16s8, addnsw, ARMvshrsImm>; -defm MVE_VHADD_qr_s16 : MVE_VHADD_qr_m<MVE_v8s16, addnsw, ARMvshrsImm>; -defm MVE_VHADD_qr_s32 : MVE_VHADD_qr_m<MVE_v4s32, addnsw, ARMvshrsImm>; -defm MVE_VHADD_qr_u8 : MVE_VHADD_qr_m<MVE_v16u8, addnuw, ARMvshruImm>; -defm MVE_VHADD_qr_u16 : MVE_VHADD_qr_m<MVE_v8u16, addnuw, ARMvshruImm>; -defm MVE_VHADD_qr_u32 : MVE_VHADD_qr_m<MVE_v4u32, addnuw, ARMvshruImm>; + MVE_VHADDSUB_qr_m<"vhsub", VTI, 0b1, null_frag, int_arm_mve_vhsub, + int_arm_mve_hsub_predicated, add_op, shift_op>; + +defm MVE_VHADD_qr_s8 : MVE_VHADD_qr_m<MVE_v16s8, addnsw, ARMvshrsImm, avgfloors>; +defm MVE_VHADD_qr_s16 : MVE_VHADD_qr_m<MVE_v8s16, addnsw, ARMvshrsImm, avgfloors>; +defm MVE_VHADD_qr_s32 : MVE_VHADD_qr_m<MVE_v4s32, addnsw, ARMvshrsImm, avgfloors>; +defm MVE_VHADD_qr_u8 : MVE_VHADD_qr_m<MVE_v16u8, addnuw, ARMvshruImm, avgflooru>; +defm MVE_VHADD_qr_u16 : MVE_VHADD_qr_m<MVE_v8u16, addnuw, ARMvshruImm, avgflooru>; +defm MVE_VHADD_qr_u32 : MVE_VHADD_qr_m<MVE_v4u32, addnuw, ARMvshruImm, avgflooru>; defm MVE_VHSUB_qr_s8 : MVE_VHSUB_qr_m<MVE_v16s8, subnsw, ARMvshrsImm>; defm MVE_VHSUB_qr_s16 : MVE_VHSUB_qr_m<MVE_v8s16, subnsw, ARMvshrsImm>; diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 357aa6d062e9..cdad8e106de6 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -6946,6 +6946,9 @@ def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, v4f32, v4i16, int_arm_neon_vcvthf2fp>, Requires<[HasNEON, HasFP16]>; +def : Pat<(v4f16 (fpround (v4f32 QPR:$src))), (VCVTf2h QPR:$src)>; +def : Pat<(v4f32 (fpextend (v4f16 DPR:$src))), (VCVTh2f DPR:$src)>; + // Vector Reverse. // VREV64 : Vector Reverse elements within 64-bit doublewords diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index f80b9a5053f7..20d8a45aaf49 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -3561,6 +3561,7 @@ let hasNoSchedulingInfo = 1 in def t2TSB : T2I<(outs), (ins tsb_opt:$opt), NoItinerary, "tsb", "\t$opt", []>, Requires<[IsThumb, HasV8_4a]> { let Inst{31-0} = 0xf3af8012; + let DecoderMethod = "DecodeTSBInstruction"; } } @@ -3950,6 +3951,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, // Tail calls. The MachO version of thumb tail calls uses a t2 branch, so // it goes here. +// Windows SEH unwinding also needs a strict t2 branch for tail calls. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS version. let Uses = [SP] in @@ -3957,15 +3959,14 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { (ins thumb_br_target:$dst, pred:$p), 4, IIC_Br, [], (t2B thumb_br_target:$dst, pred:$p)>, - Requires<[IsThumb2, IsMachO]>, Sched<[WriteBr]>; + Requires<[IsThumb2]>, Sched<[WriteBr]>; } // IT block let Defs = [ITSTATE] in def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), AddrModeNone, 2, IIC_iALUx, - "it$mask\t$cc", "", []>, - ComplexDeprecationPredicate<"IT"> { + "it$mask\t$cc", "", []> { // 16-bit instruction. let Inst{31-16} = 0x0000; let Inst{15-8} = 0b10111111; diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index dc5f1b92a6c2..b233555d5225 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -584,12 +584,12 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b), let Defs = [FPSCR_NZCV] in { def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$Dd, DPR:$Dm), - IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", + IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", "", [(arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm))]>; def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), - IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", + IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", "", [(arm_cmpfpe SPR:$Sd, SPR:$Sm)]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -603,12 +603,12 @@ def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0, def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$Dd, DPR:$Dm), - IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", + IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", "", [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>; def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), - IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", + IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", "", [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -627,7 +627,7 @@ def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0, def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm", + IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm", "", [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>; def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, @@ -647,7 +647,7 @@ def VABSH : AHuI<0b11101, 0b11, 0b0000, 0b11, 0, let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), - IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", + IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", "", [(arm_cmpfpe0 (f64 DPR:$Dd))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -655,7 +655,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$Sd), - IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", + IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", "", [(arm_cmpfpe0 SPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -675,7 +675,7 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$Dd), - IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", + IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", "", [(arm_cmpfp0 (f64 DPR:$Dd))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -683,7 +683,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$Sd), - IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", + IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", "", [(arm_cmpfp0 SPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -704,7 +704,7 @@ def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), - IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", + IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", "", [(set DPR:$Dd, (fpextend SPR:$Sm))]>, Sched<[WriteFPCVT]> { // Instruction operands. @@ -723,7 +723,7 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, // Special case encoding: bits 11-8 is 0b1011. def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, - IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", + IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", "", [(set SPR:$Sd, (fpround DPR:$Dm))]>, Sched<[WriteFPCVT]> { // Instruction operands. @@ -749,7 +749,7 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, // Between half, single and double-precision. let hasSideEffects = 0 in def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", + /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", "", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; @@ -760,26 +760,30 @@ def : FP16Pat<(f16_to_fp GPR:$a), (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; let hasSideEffects = 0 in -def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", +def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda, SPR:$Sm), + /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", "$Sd = $Sda", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; def : FP16Pat<(f16 (fpround SPR:$Sm)), - (COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>; + (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>; def : FP16Pat<(fp_to_f16 SPR:$a), - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; + (i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>; def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane), - (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTBSH SPR:$src2), + (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), + (VCVTBSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane), - (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTBSH SPR:$src2), + (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), + (VCVTBSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; let hasSideEffects = 0 in def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", + /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", "", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; @@ -792,22 +796,26 @@ def : FP16Pat<(f32 (fpextend (extractelt (v4f16 DPR:$src), imm_odd:$lane))), (SSubReg_f16_reg imm_odd:$lane)))>; let hasSideEffects = 0 in -def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", +def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sda, SPR:$Sm), + /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", "$Sd = $Sda", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane), - (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTTSH SPR:$src2), + (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), + (VCVTTSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane), - (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTTSH SPR:$src2), + (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), + (VCVTTSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)), + SPR:$src2), (SSubReg_f16_reg imm:$lane)))>; def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs DPR:$Dd), (ins SPR:$Sm), - NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", + NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", "", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFPARMv8, HasDPVFP]>, Sched<[WriteFPCVT]> { @@ -829,8 +837,8 @@ def : FP16Pat<(f64 (f16_to_fp GPR:$a)), Requires<[HasFPARMv8, HasDPVFP]>; def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, - (outs SPR:$Sd), (ins DPR:$Dm), - NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", + (outs SPR:$Sd), (ins SPR:$Sda, DPR:$Dm), + NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", "$Sd = $Sda", [/* Intentionally left blank, see patterns below */]>, Requires<[HasFPARMv8, HasDPVFP]> { // Instruction operands. @@ -847,15 +855,15 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, } def : FullFP16Pat<(f16 (fpround DPR:$Dm)), - (COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>, + (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>, Requires<[HasFPARMv8, HasDPVFP]>; def : FP16Pat<(fp_to_f16 (f64 DPR:$a)), - (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>, + (i32 (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$a), GPR))>, Requires<[HasFPARMv8, HasDPVFP]>; def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), - NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", + NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", "", []>, Requires<[HasFPARMv8, HasDPVFP]> { // Instruction operands. bits<5> Sm; @@ -868,8 +876,8 @@ def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, } def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, - (outs SPR:$Sd), (ins DPR:$Dm), - NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", + (outs SPR:$Sd), (ins SPR:$Sda, DPR:$Dm), + NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", "$Sd = $Sda", []>, Requires<[HasFPARMv8, HasDPVFP]> { // Instruction operands. bits<5> Sd; @@ -990,7 +998,7 @@ defm VCVTM : vcvt_inst<"m", 0b11, ffloor>; def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", + IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", "", [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>; def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, @@ -1019,7 +1027,7 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> { def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", + NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", "", [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>, Requires<[HasFPARMv8]> { let Inst{7} = op2; @@ -1027,7 +1035,7 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> { } def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), - NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", + NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", "", [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>, Requires<[HasFPARMv8, HasDPVFP]> { let Inst{7} = op2; @@ -1094,13 +1102,13 @@ defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>; def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", + IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", "", [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>, Sched<[WriteFPSQRT64]>; def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), - IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", + IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", "", [(set SPR:$Sd, (fsqrt SPR:$Sm))]>, Sched<[WriteFPSQRT32]>; @@ -1113,12 +1121,12 @@ let hasSideEffects = 0 in { let isMoveReg = 1 in { def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), - IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>, + IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", "", []>, Requires<[HasFPRegs64]>; def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), - IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>, + IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", "", []>, Requires<[HasFPRegs]>; } // isMoveReg @@ -1984,7 +1992,7 @@ def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1, class BF16_VCVT<string opc, bits<2> op7_6> : VFPAI<(outs SPR:$Sd), (ins SPR:$dst, SPR:$Sm), VFPUnaryFrm, NoItinerary, - opc, ".bf16.f32\t$Sd, $Sm", []>, + opc, ".bf16.f32\t$Sd, $Sm", "", []>, RegConstraint<"$dst = $Sd">, Requires<[HasBF16]>, Sched<[]> { @@ -2440,7 +2448,7 @@ def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p), class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm, list<dag> pattern>: - VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> { + VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, "", pattern> { // Instruction operand. bits<4> Rt; @@ -2525,7 +2533,7 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in { class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm, list<dag> pattern>: - VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> { + VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, "", pattern> { // Instruction operand. bits<4> Rt; @@ -2598,7 +2606,7 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in { let isReMaterializable = 1 in { def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_fpUNA64, - "vmov", ".f64\t$Dd, $imm", + "vmov", ".f64\t$Dd, $imm", "", [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3,HasDPVFP]> { bits<5> Dd; @@ -2617,7 +2625,7 @@ def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm), def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), VFPMiscFrm, IIC_fpUNA32, - "vmov", ".f32\t$Sd, $imm", + "vmov", ".f32\t$Sd, $imm", "", [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { bits<5> Sd; bits<8> imm; @@ -2635,7 +2643,7 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm), VFPMiscFrm, IIC_fpUNA16, - "vmov", ".f16\t$Sd, $imm", + "vmov", ".f16\t$Sd, $imm", "", [(set (f16 HPR:$Sd), vfp_f16imm:$imm)]>, Requires<[HasFullFP16]> { bits<5> Sd; diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp index 188b5562cac9..1c44893581f9 100644 --- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -624,12 +624,12 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB, bool UseMovt = STI.useMovt(); - unsigned Size = TM.getPointerSize(0); + LLT PtrTy = MRI.getType(MIB->getOperand(0).getReg()); const Align Alignment(4); - auto addOpsForConstantPoolLoad = [&MF, Alignment, - Size](MachineInstrBuilder &MIB, - const GlobalValue *GV, bool IsSBREL) { + auto addOpsForConstantPoolLoad = [&MF, Alignment, PtrTy]( + MachineInstrBuilder &MIB, + const GlobalValue *GV, bool IsSBREL) { assert((MIB->getOpcode() == ARM::LDRi12 || MIB->getOpcode() == ARM::t2LDRpci) && "Unsupported instruction"); @@ -644,7 +644,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB, MIB.addConstantPoolIndex(CPIndex, /*Offset*/ 0, /*TargetFlags*/ 0) .addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, - Size, Alignment)); + PtrTy, Alignment)); if (MIB->getOpcode() == ARM::LDRi12) MIB.addImm(0); MIB.add(predOps(ARMCC::AL)); @@ -733,7 +733,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB, // Add the offset to the SB register. MIB->setDesc(TII.get(Opcodes.ADDrr)); - MIB->RemoveOperand(1); + MIB->removeOperand(1); MIB.addReg(ARM::R9) // FIXME: don't hardcode R9 .addReg(Offset) .add(predOps(ARMCC::AL)) @@ -748,7 +748,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB, } else { // Load the global's address from the constant pool. MIB->setDesc(TII.get(Opcodes.ConstPoolLoad)); - MIB->RemoveOperand(1); + MIB->removeOperand(1); addOpsForConstantPoolLoad(MIB, GV, /*IsSBREL*/ false); } } else if (STI.isTargetMachO()) { @@ -997,7 +997,7 @@ bool ARMInstructionSelector::select(MachineInstr &I) { auto CPIndex = ConstPool->getConstantPoolIndex(I.getOperand(1).getFPImm(), Alignment); MIB->setDesc(TII.get(LoadOpcode)); - MIB->RemoveOperand(1); + MIB->removeOperand(1); MIB.addConstantPoolIndex(CPIndex, /*Offset*/ 0, /*TargetFlags*/ 0) .addMemOperand( MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index de88ffab1c28..52b6b6f3bcf7 100644 --- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -14,6 +14,7 @@ #include "ARMCallLowering.h" #include "ARMSubtarget.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index ef5fc12feb54..0a38f5633ae3 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -33,6 +34,7 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -2108,7 +2110,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { return false; MF = &Fn; - STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); + STI = &Fn.getSubtarget<ARMSubtarget>(); TL = STI->getTargetLowering(); AFI = Fn.getInfo<ARMFunctionInfo>(); TII = STI->getInstrInfo(); @@ -2199,7 +2201,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { return false; TD = &Fn.getDataLayout(); - STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); + STI = &Fn.getSubtarget<ARMSubtarget>(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); MRI = &Fn.getRegInfo(); @@ -2894,10 +2896,12 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) { LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg " << Base.virtRegIndex() << "\n"); - // Make sure that Increment has no uses before BaseAccess. + // Make sure that Increment has no uses before BaseAccess that are not PHI + // uses. for (MachineInstr &Use : MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) { - if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) { + if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI && + !DT->dominates(BaseAccess, &Use))) { LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n"); return false; } diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index f822672c4477..aa739db44da2 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -59,8 +59,10 @@ #include "MVETailPredUtils.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineLoopUtils.h" @@ -1297,7 +1299,7 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) { } bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { - const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(mf.getSubtarget()); + const ARMSubtarget &ST = mf.getSubtarget<ARMSubtarget>(); if (!ST.hasLOB()) return false; diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp index 308d5e7889f2..9596e88deb18 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -73,3 +73,10 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) std::tie(SignReturnAddress, SignReturnAddressAll) = GetSignReturnAddress(MF.getFunction()); } + +MachineFunctionInfo * +ARMFunctionInfo::clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap<MachineBasicBlock *, MachineBasicBlock *> + &Src2DstMBB) const { + return DestMF.cloneInfo<ARMFunctionInfo>(*this); +} diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index d8d937055d23..e906fea1a810 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -86,6 +86,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills /// areas. unsigned FPCXTSaveSize = 0; + unsigned FRSaveSize = 0; unsigned GPRCS1Size = 0; unsigned GPRCS2Size = 0; unsigned DPRCSAlignGapSize = 0; @@ -158,6 +159,11 @@ public: explicit ARMFunctionInfo(MachineFunction &MF); + MachineFunctionInfo * + clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, + const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB) + const override; + bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } bool isThumb2Function() const { return isThumb && hasThumb2; } @@ -198,12 +204,14 @@ public: void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; } unsigned getFPCXTSaveAreaSize() const { return FPCXTSaveSize; } + unsigned getFrameRecordSavedAreaSize() const { return FRSaveSize; } unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; } unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; } unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; } unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; } void setFPCXTSaveAreaSize(unsigned s) { FPCXTSaveSize = s; } + void setFrameRecordSavedAreaSize(unsigned s) { FRSaveSize = s; } void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; } void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; } diff --git a/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/llvm/lib/Target/ARM/ARMParallelDSP.cpp index 46baf8930939..6effd84041b5 100644 --- a/llvm/lib/Target/ARM/ARMParallelDSP.cpp +++ b/llvm/lib/Target/ARM/ARMParallelDSP.cpp @@ -459,6 +459,10 @@ bool ARMParallelDSP::Search(Value *V, BasicBlock *BB, Reduction &R) { if (ValidLHS && ValidRHS) return true; + // Ensure we don't add the root as the incoming accumulator. + if (R.getRoot() == I) + return false; + return R.InsertAcc(I); } case Instruction::Mul: { @@ -535,6 +539,7 @@ bool ARMParallelDSP::MatchSMLAD(Function &F) { InsertParallelMACs(R); Changed = true; AllAdds.insert(R.getAdds().begin(), R.getAdds().end()); + LLVM_DEBUG(dbgs() << "BB after inserting parallel MACs:\n" << BB); } } diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp index 1a7f10a13ed3..527fefbd291e 100644 --- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -13,9 +13,9 @@ #include "ARMRegisterBankInfo.h" #include "ARMInstrInfo.h" // For the register classes #include "ARMSubtarget.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #define GET_TARGET_REGBANK_IMPL @@ -129,8 +129,7 @@ static void checkValueMappings() { } // end namespace arm } // end namespace llvm -ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) - : ARMGenRegisterBankInfo() { +ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) { // We have only one set of register banks, whatever the subtarget // is. Therefore, the initialization of the RegBanks table should be // done only once. Indeed the table of all register banks diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.h b/llvm/lib/Target/ARM/ARMRegisterBankInfo.h index b8aff65a967e..c56134aab38c 100644 --- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.h +++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.h @@ -13,7 +13,7 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMREGISTERBANKINFO_H #define LLVM_LIB_TARGET_ARM_ARMREGISTERBANKINFO_H -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #define GET_REGBANK_DECLARATIONS #include "ARMGenRegisterBank.inc" diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp index ff4647dd46fd..d1d30e614fc9 100644 --- a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp @@ -15,4 +15,4 @@ using namespace llvm; void ARMRegisterInfo::anchor() { } -ARMRegisterInfo::ARMRegisterInfo() {} +ARMRegisterInfo::ARMRegisterInfo() = default; diff --git a/llvm/lib/Target/ARM/ARMSLSHardening.cpp b/llvm/lib/Target/ARM/ARMSLSHardening.cpp index 332acb453124..fa80b75484e1 100644 --- a/llvm/lib/Target/ARM/ARMSLSHardening.cpp +++ b/llvm/lib/Target/ARM/ARMSLSHardening.cpp @@ -322,8 +322,8 @@ MachineBasicBlock &ARMSLSHardening::ConvertIndirectCallToIndirectJump( assert(ImpSPOpIdx != -1); int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx); int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx); - BL->RemoveOperand(FirstOpIdxToRemove); - BL->RemoveOperand(SecondOpIdxToRemove); + BL->removeOperand(FirstOpIdxToRemove); + BL->removeOperand(SecondOpIdxToRemove); // Now copy over the implicit operands from the original IndirectCall BL->copyImplicitOps(MF, IndirectCall); MF.moveCallSiteInfo(&IndirectCall, BL); diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 12d4ad889897..379521752261 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -296,7 +296,7 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove( SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVolatile, + SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const { const ARMSubtarget &Subtarget = @@ -314,6 +314,9 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset( DAG.getZExtOrTrunc(Size, dl, MVT::i32)); } - return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, - Alignment.value(), RTLIB::MEMSET); + if (!AlwaysInline) + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, + Alignment.value(), RTLIB::MEMSET); + + return SDValue(); } diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h index 7aa831c09248..ffa8b5049351 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -55,6 +55,7 @@ public: SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, Align Alignment, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override; SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 32160b109343..79244f634ce3 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -52,19 +53,15 @@ UseFusedMulOps("arm-use-mulops", enum ITMode { DefaultIT, - RestrictedIT, - NoRestrictedIT + RestrictedIT }; static cl::opt<ITMode> -IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), - cl::ZeroOrMore, - cl::values(clEnumValN(DefaultIT, "arm-default-it", - "Generate IT block based on arch"), - clEnumValN(RestrictedIT, "arm-restrict-it", - "Disallow deprecated IT based on ARMv8"), - clEnumValN(NoRestrictedIT, "arm-no-restrict-it", - "Allow IT blocks based on ARMv7"))); + IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), + cl::values(clEnumValN(DefaultIT, "arm-default-it", + "Generate any type of IT block"), + clEnumValN(RestrictedIT, "arm-restrict-it", + "Disallow complex IT blocks"))); /// ForceFastISel - Use the fast-isel, even for subtargets where it is not /// currently supported (for testing only). @@ -237,21 +234,18 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { switch (IT) { case DefaultIT: - RestrictIT = hasV8Ops() && !hasMinSize(); + RestrictIT = false; break; case RestrictedIT: RestrictIT = true; break; - case NoRestrictedIT: - RestrictIT = false; - break; } // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. const FeatureBitset &Bits = getFeatureBits(); if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters (Options.UnsafeFPMath || isTargetDarwin())) - UseNEONForSinglePrecisionFP = true; + HasNEONForFP = true; if (isRWPI()) ReserveR9 = true; @@ -399,6 +393,14 @@ bool ARMSubtarget::enableSubRegLiveness() const { return hasMVEIntegerOps(); } +bool ARMSubtarget::enableMachinePipeliner() const { + // Enable the MachinePipeliner before register allocation for subtargets + // with the use-mipipeliner feature. + return getSchedModel().hasInstrSchedModel() && useMachinePipeliner(); +} + +bool ARMSubtarget::useDFAforSMS() const { return false; } + // This overrides the PostRAScheduler bit in the SchedModel for any CPU. bool ARMSubtarget::enablePostRAScheduler() const { if (enableMachineScheduler()) @@ -417,8 +419,6 @@ bool ARMSubtarget::enablePostRAMachineScheduler() const { return !isThumb1Only(); } -bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); } - bool ARMSubtarget::useStride4VFPs() const { // For general targets, the prologue can grow when VFPs are allocated with // stride 4 (more vpush instructions). But WatchOS uses a compact unwind @@ -491,3 +491,12 @@ bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF, return isThumb2() && MF.getFunction().hasMinSize() && ARM::GPRRegClass.contains(PhysReg); } + +bool ARMSubtarget::splitFramePointerPush(const MachineFunction &MF) const { + const Function &F = MF.getFunction(); + if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || + !F.needsUnwindTableEntry()) + return false; + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF); +} diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 7cbdc014299f..460ec62d5a33 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -25,8 +25,8 @@ #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" @@ -150,6 +150,11 @@ public: }; protected: +// Bool members corresponding to the SubtargetFeatures defined in tablegen +#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ + bool ATTRIBUTE = DEFAULT; +#include "ARMGenSubtargetInfo.inc" + /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. ARMProcFamilyEnum ARMProcFamily = Others; @@ -159,343 +164,22 @@ protected: /// ARMArch - ARM architecture ARMArchEnum ARMArch = ARMv4t; - /// HasV4TOps, HasV5TOps, HasV5TEOps, - /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops - - /// Specify whether target support specific ARM ISA variants. - bool HasV4TOps = false; - bool HasV5TOps = false; - bool HasV5TEOps = false; - bool HasV6Ops = false; - bool HasV6MOps = false; - bool HasV6KOps = false; - bool HasV6T2Ops = false; - bool HasV7Ops = false; - bool HasV8Ops = false; - bool HasV8_1aOps = false; - bool HasV8_2aOps = false; - bool HasV8_3aOps = false; - bool HasV8_4aOps = false; - bool HasV8_5aOps = false; - bool HasV8_6aOps = false; - bool HasV8_8aOps = false; - bool HasV8_7aOps = false; - bool HasV9_0aOps = false; - bool HasV9_1aOps = false; - bool HasV9_2aOps = false; - bool HasV9_3aOps = false; - bool HasV8MBaselineOps = false; - bool HasV8MMainlineOps = false; - bool HasV8_1MMainlineOps = false; - bool HasMVEIntegerOps = false; - bool HasMVEFloatOps = false; - bool HasCDEOps = false; - - /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what - /// floating point ISAs are supported. - bool HasVFPv2 = false; - bool HasVFPv3 = false; - bool HasVFPv4 = false; - bool HasFPARMv8 = false; - bool HasNEON = false; - bool HasFPRegs = false; - bool HasFPRegs16 = false; - bool HasFPRegs64 = false; - - /// Versions of the VFP flags restricted to single precision, or to - /// 16 d-registers, or both. - bool HasVFPv2SP = false; - bool HasVFPv3SP = false; - bool HasVFPv4SP = false; - bool HasFPARMv8SP = false; - bool HasVFPv3D16 = false; - bool HasVFPv4D16 = false; - bool HasFPARMv8D16 = false; - bool HasVFPv3D16SP = false; - bool HasVFPv4D16SP = false; - bool HasFPARMv8D16SP = false; - - /// HasDotProd - True if the ARMv8.2A dot product instructions are supported. - bool HasDotProd = false; - - /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been - /// specified. Use the method useNEONForSinglePrecisionFP() to - /// determine if NEON should actually be used. - bool UseNEONForSinglePrecisionFP = false; - /// UseMulOps - True if non-microcoded fused integer multiply-add and /// multiply-subtract instructions should be used. bool UseMulOps = false; - /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates - /// whether the FP VML[AS] instructions are slow (if so, don't use them). - bool SlowFPVMLx = false; - - /// SlowFPVFMx - If the VFP4 / NEON instructions are available, indicates - /// whether the FP VFM[AS] instructions are slow (if so, don't use them). - bool SlowFPVFMx = false; - - /// HasVMLxForwarding - If true, NEON has special multiplier accumulator - /// forwarding to allow mul + mla being issued back to back. - bool HasVMLxForwarding = false; - - /// SlowFPBrcc - True if floating point compare + branch is slow. - bool SlowFPBrcc = false; - - /// InThumbMode - True if compiling for Thumb, false for ARM. - bool InThumbMode = false; - - /// UseSoftFloat - True if we're using software floating point features. - bool UseSoftFloat = false; - - /// UseMISched - True if MachineScheduler should be used for this subtarget. - bool UseMISched = false; - - /// DisablePostRAScheduler - False if scheduling should happen again after - /// register allocation. - bool DisablePostRAScheduler = false; - - /// HasThumb2 - True if Thumb2 instructions are supported. - bool HasThumb2 = false; - - /// NoARM - True if subtarget does not support ARM mode execution. - bool NoARM = false; - - /// ReserveR9 - True if R9 is not available as a general purpose register. - bool ReserveR9 = false; - - /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of - /// 32-bit imms (including global addresses). - bool NoMovt = false; - /// SupportsTailCall - True if the OS supports tail call. The dynamic linker /// must be able to synthesize call stubs for interworking between ARM and /// Thumb. bool SupportsTailCall = false; - /// HasFP16 - True if subtarget supports half-precision FP conversions - bool HasFP16 = false; - - /// HasFullFP16 - True if subtarget supports half-precision FP operations - bool HasFullFP16 = false; - - /// HasFP16FML - True if subtarget supports half-precision FP fml operations - bool HasFP16FML = false; - - /// HasBF16 - True if subtarget supports BFloat16 floating point operations - bool HasBF16 = false; - - /// HasMatMulInt8 - True if subtarget supports 8-bit integer matrix multiply - bool HasMatMulInt8 = false; - - /// HasD32 - True if subtarget has the full 32 double precision - /// FP registers for VFPv3. - bool HasD32 = false; - - /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode - bool HasHardwareDivideInThumb = false; - - /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode - bool HasHardwareDivideInARM = false; - - /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier - /// instructions. - bool HasDataBarrier = false; - - /// HasFullDataBarrier - True if the subtarget supports DFB data barrier - /// instruction. - bool HasFullDataBarrier = false; - - /// HasV7Clrex - True if the subtarget supports CLREX instructions - bool HasV7Clrex = false; - - /// HasAcquireRelease - True if the subtarget supports v8 atomics (LDA/LDAEX etc) - /// instructions - bool HasAcquireRelease = false; - - /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions - /// over 16-bit ones. - bool Pref32BitThumb = false; - - /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions - /// that partially update CPSR and add false dependency on the previous - /// CPSR setting instruction. - bool AvoidCPSRPartialUpdate = false; - - /// CheapPredicableCPSRDef - If true, disable +1 predication cost - /// for instructions updating CPSR. Enabled for Cortex-A57. - bool CheapPredicableCPSRDef = false; - - /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting - /// movs with shifter operand (i.e. asr, lsl, lsr). - bool AvoidMOVsShifterOperand = false; - - /// HasRetAddrStack - Some processors perform return stack prediction. CodeGen should - /// avoid issue "normal" call instructions to callees which do not return. - bool HasRetAddrStack = false; - - /// HasBranchPredictor - True if the subtarget has a branch predictor. Having - /// a branch predictor or not changes the expected cost of taking a branch - /// which affects the choice of whether to use predicated instructions. - bool HasBranchPredictor = true; - - /// HasMPExtension - True if the subtarget supports Multiprocessing - /// extension (ARMv7 only). - bool HasMPExtension = false; - - /// HasVirtualization - True if the subtarget supports the Virtualization - /// extension. - bool HasVirtualization = false; - - /// HasFP64 - If true, the floating point unit supports double - /// precision. - bool HasFP64 = false; - - /// If true, the processor supports the Performance Monitor Extensions. These - /// include a generic cycle-counter as well as more fine-grained (often - /// implementation-specific) events. - bool HasPerfMon = false; - - /// HasTrustZone - if true, processor supports TrustZone security extensions - bool HasTrustZone = false; - - /// Has8MSecExt - if true, processor supports ARMv8-M Security Extensions - bool Has8MSecExt = false; - - /// HasSHA2 - if true, processor supports SHA1 and SHA256 - bool HasSHA2 = false; - - /// HasAES - if true, processor supports AES - bool HasAES = false; - - /// HasCrypto - if true, processor supports Cryptography extensions - bool HasCrypto = false; - - /// HasCRC - if true, processor supports CRC instructions - bool HasCRC = false; - - /// HasRAS - if true, the processor supports RAS extensions - bool HasRAS = false; - - /// HasLOB - if true, the processor supports the Low Overhead Branch extension - bool HasLOB = false; - - bool HasPACBTI = false; - - /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are - /// particularly effective at zeroing a VFP register. - bool HasZeroCycleZeroing = false; - - /// HasFPAO - if true, processor does positive address offset computation faster - bool HasFPAO = false; - - /// HasFuseAES - if true, processor executes back to back AES instruction - /// pairs faster. - bool HasFuseAES = false; - - /// HasFuseLiterals - if true, processor executes back to back - /// bottom and top halves of literal generation faster. - bool HasFuseLiterals = false; - - /// If true, if conversion may decide to leave some instructions unpredicated. - bool IsProfitableToUnpredicate = false; - - /// If true, VMOV will be favored over VGETLNi32. - bool HasSlowVGETLNi32 = false; - - /// If true, VMOV will be favored over VDUP. - bool HasSlowVDUP32 = false; - - /// If true, VMOVSR will be favored over VMOVDRR. - bool PreferVMOVSR = false; - - /// If true, ISHST barriers will be used for Release semantics. - bool PreferISHST = false; - - /// If true, a VLDM/VSTM starting with an odd register number is considered to - /// take more microops than single VLDRS/VSTRS. - bool SlowOddRegister = false; - - /// If true, loading into a D subregister will be penalized. - bool SlowLoadDSubregister = false; - - /// If true, use a wider stride when allocating VFP registers. - bool UseWideStrideVFP = false; - - /// If true, the AGU and NEON/FPU units are multiplexed. - bool HasMuxedUnits = false; - - /// If true, VMOVS will never be widened to VMOVD. - bool DontWidenVMOVS = false; - - /// If true, splat a register between VFP and NEON instructions. - bool SplatVFPToNeon = false; - - /// If true, run the MLx expansion pass. - bool ExpandMLx = false; - - /// If true, VFP/NEON VMLA/VMLS have special RAW hazards. - bool HasVMLxHazards = false; - - // If true, read thread pointer from coprocessor register. - bool ReadTPHard = false; - - /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. - bool UseNEONForFPMovs = false; - - /// If true, VLDn instructions take an extra cycle for unaligned accesses. - bool CheckVLDnAlign = false; - - /// If true, VFP instructions are not pipelined. - bool NonpipelinedVFP = false; - - /// StrictAlign - If true, the subtarget disallows unaligned memory - /// accesses for some types. For details, see - /// ARMTargetLowering::allowsMisalignedMemoryAccesses(). - bool StrictAlign = false; - - /// RestrictIT - If true, the subtarget disallows generation of deprecated IT - /// blocks to conform to ARMv8 rule. + /// RestrictIT - If true, the subtarget disallows generation of complex IT + /// blocks. bool RestrictIT = false; - /// HasDSP - If true, the subtarget supports the DSP (saturating arith - /// and such) instructions. - bool HasDSP = false; - - /// NaCl TRAP instruction is generated instead of the regular TRAP. - bool UseNaClTrap = false; - - /// Generate calls via indirect call instructions. - bool GenLongCalls = false; - - /// Generate code that does not contain data access to code sections. - bool GenExecuteOnly = false; - - /// Target machine allowed unsafe FP math (such as use of NEON fp) - bool UnsafeFPMath = false; - /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS). bool UseSjLjEH = false; - /// Has speculation barrier - bool HasSB = false; - - /// Implicitly convert an instruction to a different one if its immediates - /// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1. - bool NegativeImmediates = true; - - /// Mitigate against the cve-2021-35465 security vulnurability. - bool FixCMSE_CVE_2021_35465 = false; - - /// Harden against Straight Line Speculation for Returns and Indirect - /// Branches. - bool HardenSlsRetBr = false; - - /// Harden against Straight Line Speculation for indirect calls. - bool HardenSlsBlr = false; - - /// Generate thunk code for SLS mitigation in the normal text section. - bool HardenSlsNoComdat = false; - /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. Align stackAlignment = Align(4); @@ -540,10 +224,6 @@ protected: /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; - /// NoBTIAtReturnTwice - Don't place a BTI instruction after - /// return-twice constructs (setjmp) - bool NoBTIAtReturnTwice = false; - /// Options passed via command line that could influence the target const TargetOptions &Options; @@ -622,38 +302,12 @@ private: std::bitset<8> CoprocCDE = {}; public: - void computeIssueWidth(); +// Getters for SubtargetFeatures defined in tablegen +#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ + bool GETTER() const { return ATTRIBUTE; } +#include "ARMGenSubtargetInfo.inc" - bool hasV4TOps() const { return HasV4TOps; } - bool hasV5TOps() const { return HasV5TOps; } - bool hasV5TEOps() const { return HasV5TEOps; } - bool hasV6Ops() const { return HasV6Ops; } - bool hasV6MOps() const { return HasV6MOps; } - bool hasV6KOps() const { return HasV6KOps; } - bool hasV6T2Ops() const { return HasV6T2Ops; } - bool hasV7Ops() const { return HasV7Ops; } - bool hasV8Ops() const { return HasV8Ops; } - bool hasV8_1aOps() const { return HasV8_1aOps; } - bool hasV8_2aOps() const { return HasV8_2aOps; } - bool hasV8_3aOps() const { return HasV8_3aOps; } - bool hasV8_4aOps() const { return HasV8_4aOps; } - bool hasV8_5aOps() const { return HasV8_5aOps; } - bool hasV8_6aOps() const { return HasV8_6aOps; } - bool hasV8_7aOps() const { return HasV8_7aOps; } - bool hasV8_8aOps() const { return HasV8_8aOps; } - bool hasV9_0aOps() const { return HasV9_0aOps; } - bool hasV9_1aOps() const { return HasV9_1aOps; } - bool hasV9_2aOps() const { return HasV9_2aOps; } - bool hasV9_3aOps() const { return HasV9_3aOps; } - bool hasV8MBaselineOps() const { return HasV8MBaselineOps; } - bool hasV8MMainlineOps() const { return HasV8MMainlineOps; } - bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; } - bool hasMVEIntegerOps() const { return HasMVEIntegerOps; } - bool hasMVEFloatOps() const { return HasMVEFloatOps; } - bool hasCDEOps() const { return HasCDEOps; } - bool hasFPRegs() const { return HasFPRegs; } - bool hasFPRegs16() const { return HasFPRegs16; } - bool hasFPRegs64() const { return HasFPRegs64; } + void computeIssueWidth(); /// @{ /// These functions are obsolete, please consider adding subtarget features @@ -673,31 +327,14 @@ public: bool hasARMOps() const { return !NoARM; } - bool hasVFP2Base() const { return HasVFPv2SP; } - bool hasVFP3Base() const { return HasVFPv3D16SP; } - bool hasVFP4Base() const { return HasVFPv4D16SP; } - bool hasFPARMv8Base() const { return HasFPARMv8D16SP; } - bool hasNEON() const { return HasNEON; } - bool hasSHA2() const { return HasSHA2; } - bool hasAES() const { return HasAES; } - bool hasCrypto() const { return HasCrypto; } - bool hasDotProd() const { return HasDotProd; } - bool hasCRC() const { return HasCRC; } - bool hasRAS() const { return HasRAS; } - bool hasLOB() const { return HasLOB; } - bool hasPACBTI() const { return HasPACBTI; } - bool hasVirtualization() const { return HasVirtualization; } - bool useNEONForSinglePrecisionFP() const { - return hasNEON() && UseNEONForSinglePrecisionFP; + return hasNEON() && hasNEONForFP(); } - bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; } - bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } - bool hasDataBarrier() const { return HasDataBarrier; } - bool hasFullDataBarrier() const { return HasFullDataBarrier; } - bool hasV7Clrex() const { return HasV7Clrex; } - bool hasAcquireRelease() const { return HasAcquireRelease; } + bool hasVFP2Base() const { return hasVFPv2SP(); } + bool hasVFP3Base() const { return hasVFPv3D16SP(); } + bool hasVFP4Base() const { return hasVFPv4D16SP(); } + bool hasFPARMv8Base() const { return hasFPARMv8D16SP(); } bool hasAnyDataBarrier() const { return HasDataBarrier || (hasV6Ops() && !isThumb()); @@ -710,43 +347,7 @@ public: } bool useFPVFMx16() const { return useFPVFMx() && hasFullFP16(); } bool useFPVFMx64() const { return useFPVFMx() && hasFP64(); } - bool hasVMLxForwarding() const { return HasVMLxForwarding; } - bool isFPBrccSlow() const { return SlowFPBrcc; } - bool hasFP64() const { return HasFP64; } - bool hasPerfMon() const { return HasPerfMon; } - bool hasTrustZone() const { return HasTrustZone; } - bool has8MSecExt() const { return Has8MSecExt; } - bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } - bool hasFPAO() const { return HasFPAO; } - bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; } - bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; } - bool hasSlowVDUP32() const { return HasSlowVDUP32; } - bool preferVMOVSR() const { return PreferVMOVSR; } - bool preferISHSTBarriers() const { return PreferISHST; } - bool expandMLx() const { return ExpandMLx; } - bool hasVMLxHazards() const { return HasVMLxHazards; } - bool hasSlowOddRegister() const { return SlowOddRegister; } - bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; } - bool useWideStrideVFP() const { return UseWideStrideVFP; } - bool hasMuxedUnits() const { return HasMuxedUnits; } - bool dontWidenVMOVS() const { return DontWidenVMOVS; } - bool useSplatVFPToNeon() const { return SplatVFPToNeon; } - bool useNEONForFPMovs() const { return UseNEONForFPMovs; } - bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; } - bool nonpipelinedVFP() const { return NonpipelinedVFP; } - bool prefers32BitThumb() const { return Pref32BitThumb; } - bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } - bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; } - bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } - bool hasRetAddrStack() const { return HasRetAddrStack; } - bool hasBranchPredictor() const { return HasBranchPredictor; } - bool hasMPExtension() const { return HasMPExtension; } - bool hasDSP() const { return HasDSP; } - bool useNaClTrap() const { return UseNaClTrap; } bool useSjLjEH() const { return UseSjLjEH; } - bool hasSB() const { return HasSB; } - bool genLongCalls() const { return GenLongCalls; } - bool genExecuteOnly() const { return GenExecuteOnly; } bool hasBaseDSP() const { if (isThumb()) return hasDSP(); @@ -754,25 +355,16 @@ public: return hasV5TEOps(); } - bool hasFP16() const { return HasFP16; } - bool hasD32() const { return HasD32; } - bool hasFullFP16() const { return HasFullFP16; } - bool hasFP16FML() const { return HasFP16FML; } - bool hasBF16() const { return HasBF16; } - - bool hasFuseAES() const { return HasFuseAES; } - bool hasFuseLiterals() const { return HasFuseLiterals; } /// Return true if the CPU supports any kind of instruction fusion. bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); } - bool hasMatMulInt8() const { return HasMatMulInt8; } - const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetIOS() const { return TargetTriple.isiOS(); } bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); } bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); } + bool isTargetDriverKit() const { return TargetTriple.isDriverKit(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); } @@ -825,24 +417,21 @@ public: bool isRWPI() const; bool useMachineScheduler() const { return UseMISched; } - bool disablePostRAScheduler() const { return DisablePostRAScheduler; } - bool useSoftFloat() const { return UseSoftFloat; } - bool isThumb() const { return InThumbMode; } + bool useMachinePipeliner() const { return UseMIPipeliner; } bool hasMinSize() const { return OptMinSize; } - bool isThumb1Only() const { return InThumbMode && !HasThumb2; } - bool isThumb2() const { return InThumbMode && HasThumb2; } - bool hasThumb2() const { return HasThumb2; } + bool isThumb1Only() const { return isThumb() && !hasThumb2(); } + bool isThumb2() const { return isThumb() && hasThumb2(); } bool isMClass() const { return ARMProcClass == MClass; } bool isRClass() const { return ARMProcClass == RClass; } bool isAClass() const { return ARMProcClass == AClass; } - bool isReadTPHard() const { return ReadTPHard; } bool isR9Reserved() const { return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; } MCPhysReg getFramePointerReg() const { - if (isTargetDarwin() || (!isTargetWindows() && isThumb())) + if (isTargetDarwin() || + (!isTargetWindows() && isThumb() && !createAAPCSFrameChain())) return ARM::R7; return ARM::R11; } @@ -859,6 +448,8 @@ public: isThumb1Only(); } + bool splitFramePointerPush(const MachineFunction &MF) const; + bool useStride4VFPs() const; bool useMovt() const; @@ -878,6 +469,10 @@ public: /// Returns true if machine scheduler should be enabled. bool enableMachineScheduler() const override; + /// Returns true if machine pipeliner should be enabled. + bool enableMachinePipeliner() const override; + bool useDFAforSMS() const override; + /// True for some subtargets at > -O0. bool enablePostRAScheduler() const override; @@ -891,9 +486,6 @@ public: /// scheduling, DAGCombine, etc.). bool useAA() const override { return true; } - // enableAtomicExpand- True if we need to expand our atomics. - bool enableAtomicExpand() const override; - /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. const InstrItineraryData *getInstrItineraryData() const override { @@ -956,14 +548,6 @@ public: bool ignoreCSRForAllocationOrder(const MachineFunction &MF, unsigned PhysReg) const override; unsigned getGPRAllocationOrder(const MachineFunction &MF) const; - - bool fixCMSE_CVE_2021_35465() const { return FixCMSE_CVE_2021_35465; } - - bool hardenSlsRetBr() const { return HardenSlsRetBr; } - bool hardenSlsBlr() const { return HardenSlsBlr; } - bool hardenSlsNoComdat() const { return HardenSlsNoComdat; } - - bool getNoBTIAtReturnTwice() const { return NoBTIAtReturnTwice; } }; } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index c38970f8e341..d95c21d6504b 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/ExecutionDomainFix.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" @@ -30,20 +31,20 @@ #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" +#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/TargetParser.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" @@ -106,6 +107,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() { initializeMVEGatherScatterLoweringPass(Registry); initializeARMSLSHardeningPass(Registry); initializeMVELaneInterleavingPass(Registry); + initializeARMFixCortexA57AES1742098Pass(Registry); } static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { @@ -194,7 +196,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU, static Reloc::Model getEffectiveRelocModel(const Triple &TT, Optional<Reloc::Model> RM) { - if (!RM.hasValue()) + if (!RM) // Default relocation model on Darwin is PIC. return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static; @@ -307,7 +309,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { } TargetTransformInfo -ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) { +ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(ARMTTIImpl(this, F)); } @@ -434,6 +436,9 @@ void ARMPassConfig::addIRPasses() { // Add Control Flow Guard checks. if (TM->getTargetTriple().isOSWindows()) addPass(createCFGuardCheckPass()); + + if (TM->Options.JMCInstrument) + addPass(createJMCInstrumenterPass()); } void ARMPassConfig::addCodeGenPrepare() { @@ -505,6 +510,9 @@ bool ARMPassConfig::addGlobalInstructionSelect() { void ARMPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { + if (getOptLevel() == CodeGenOpt::Aggressive) + addPass(&MachinePipelinerID); + addPass(createMVETPAndVPTOptimisationsPass()); addPass(createMLxExpansionPass()); @@ -573,8 +581,20 @@ void ARMPassConfig::addPreEmitPass() { } void ARMPassConfig::addPreEmitPass2() { + // Inserts fixup instructions before unsafe AES operations. Instructions may + // be inserted at the start of blocks and at within blocks so this pass has to + // come before those below. + addPass(createARMFixCortexA57AES1742098Pass()); + // Inserts BTIs at the start of functions and indirectly-called basic blocks, + // so passes cannot add to the start of basic blocks once this has run. addPass(createARMBranchTargetsPass()); + // Inserts Constant Islands. Block sizes cannot be increased after this point, + // as this may push the branch ranges and load offsets of accessing constant + // pools out of range.. addPass(createARMConstantIslandPass()); + // Finalises Low-Overhead Loops. This replaces pseudo instructions with real + // instructions, but the pseudos all have conservative sizes so that block + // sizes will only be decreased by this pass. addPass(createARMLowOverheadLoopsPass()); if (TM->getTargetTriple().isOSWindows()) { diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.h b/llvm/lib/Target/ARM/ARMTargetMachine.h index 8428092bf179..8d33a038deeb 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -52,7 +52,7 @@ public: const ARMSubtarget *getSubtargetImpl() const = delete; bool isLittleEndian() const { return isLittle; } - TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index d9d563ead260..3a9946ee810b 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1202,7 +1202,8 @@ InstructionCost ARMTTIImpl::getMemcpyCost(const Instruction *I) { InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask, - int Index, VectorType *SubTp) { + int Index, VectorType *SubTp, + ArrayRef<const Value *> Args) { Kind = improveShuffleKindFromMask(Kind, Mask); if (ST->hasNEON()) { if (Kind == TTI::SK_Broadcast) { @@ -1290,7 +1291,8 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, if (!Mask.empty()) { std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); - if (Mask.size() <= LT.second.getVectorNumElements() && + if (LT.second.isVector() && + Mask.size() <= LT.second.getVectorNumElements() && (isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) || isVREVMask(Mask, LT.second, 64))) return ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput) * LT.first; @@ -1764,6 +1766,48 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return LT.first * ST->getMVEVectorCostFactor(CostKind); break; } + case Intrinsic::fptosi_sat: + case Intrinsic::fptoui_sat: { + if (ICA.getArgTypes().empty()) + break; + bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat; + auto LT = TLI->getTypeLegalizationCost(DL, ICA.getArgTypes()[0]); + EVT MTy = TLI->getValueType(DL, ICA.getReturnType()); + // Check for the legal types, with the corect subtarget features. + if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) || + (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) || + (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32)) + return LT.first; + + // Equally for MVE vector types + if (ST->hasMVEFloatOps() && + (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) && + LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits()) + return LT.first * ST->getMVEVectorCostFactor(CostKind); + + // Otherwise we use a legal convert followed by a min+max + if (((ST->hasVFP2Base() && LT.second == MVT::f32) || + (ST->hasFP64() && LT.second == MVT::f64) || + (ST->hasFullFP16() && LT.second == MVT::f16) || + (ST->hasMVEFloatOps() && + (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) && + LT.second.getScalarSizeInBits() >= MTy.getScalarSizeInBits()) { + Type *LegalTy = Type::getIntNTy(ICA.getReturnType()->getContext(), + LT.second.getScalarSizeInBits()); + InstructionCost Cost = + LT.second.isVector() ? ST->getMVEVectorCostFactor(CostKind) : 1; + IntrinsicCostAttributes Attrs1(IsSigned ? Intrinsic::smin + : Intrinsic::umin, + LegalTy, {LegalTy, LegalTy}); + Cost += getIntrinsicInstrCost(Attrs1, CostKind); + IntrinsicCostAttributes Attrs2(IsSigned ? Intrinsic::smax + : Intrinsic::umax, + LegalTy, {LegalTy, LegalTy}); + Cost += getIntrinsicInstrCost(Attrs2, CostKind); + return LT.first * Cost; + } + break; + } } return BaseT::getIntrinsicInstrCost(ICA, CostKind); @@ -1771,7 +1815,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, bool ARMTTIImpl::isLoweredToCall(const Function *F) { if (!F->isIntrinsic()) - BaseT::isLoweredToCall(F); + return BaseT::isLoweredToCall(F); // Assume all Arm-specific intrinsics map to an instruction. if (F->getName().startswith("llvm.arm")) diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 5bb84899e5ef..d7a2bdb3db15 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -213,7 +213,8 @@ public: InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask, int Index, - VectorType *SubTp); + VectorType *SubTp, + ArrayRef<const Value *> Args = None); bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index c7734cc2cf11..b725ea3a84e5 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -453,6 +453,7 @@ class ARMAsmParser : public MCTargetAsmParser { bool AllowRAAC = false); bool parseMemory(OperandVector &); bool parseOperand(OperandVector &, StringRef Mnemonic); + bool parseImmExpr(int64_t &Out); bool parsePrefix(ARMMCExpr::VariantKind &RefKind); bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType, unsigned &ShiftAmount); @@ -488,6 +489,17 @@ class ARMAsmParser : public MCTargetAsmParser { bool parseDirectiveAlign(SMLoc L); bool parseDirectiveThumbSet(SMLoc L); + bool parseDirectiveSEHAllocStack(SMLoc L, bool Wide); + bool parseDirectiveSEHSaveRegs(SMLoc L, bool Wide); + bool parseDirectiveSEHSaveSP(SMLoc L); + bool parseDirectiveSEHSaveFRegs(SMLoc L); + bool parseDirectiveSEHSaveLR(SMLoc L); + bool parseDirectiveSEHPrologEnd(SMLoc L, bool Fragment); + bool parseDirectiveSEHNop(SMLoc L, bool Wide); + bool parseDirectiveSEHEpilogStart(SMLoc L, bool Condition); + bool parseDirectiveSEHEpilogEnd(SMLoc L); + bool parseDirectiveSEHCustom(SMLoc L); + bool isMnemonicVPTPredicable(StringRef Mnemonic, StringRef ExtraToken); StringRef splitMnemonic(StringRef Mnemonic, StringRef ExtraToken, unsigned &PredicationCode, @@ -4528,9 +4540,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder, if (Reg == EndReg) continue; // The register must be in the same register class as the first. - if ((Reg == ARM::RA_AUTH_CODE && - RC != &ARMMCRegisterClasses[ARM::GPRRegClassID]) || - (Reg != ARM::RA_AUTH_CODE && !RC->contains(Reg))) + if (!RC->contains(Reg)) return Error(AfterMinusLoc, "invalid register in register list"); // Ranges must go from low to high. if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg)) @@ -6319,6 +6329,18 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { } } +bool ARMAsmParser::parseImmExpr(int64_t &Out) { + const MCExpr *Expr = nullptr; + SMLoc L = getParser().getTok().getLoc(); + if (check(getParser().parseExpression(Expr), L, "expected expression")) + return true; + const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr); + if (check(!Value, L, "expected constant expression")) + return true; + Out = Value->getValue(); + return false; +} + // parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e. // :lower16: and :upper16:. bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { @@ -6379,7 +6401,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { CurrentFormat = WASM; break; case MCContext::IsGOFF: + case MCContext::IsSPIRV: case MCContext::IsXCOFF: + case MCContext::IsDXContainer: llvm_unreachable("unexpected object format"); break; } @@ -10958,9 +10982,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return true; } - { // processInstruction() updates inITBlock state, we need to save it away - bool wasInITBlock = inITBlock(); - + { // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the // individual transformations can chain off each other. E.g., @@ -10969,12 +10991,6 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, LLVM_DEBUG(dbgs() << "Changed to: "; Inst.dump_pretty(dbgs(), MII.getName(Inst.getOpcode())); dbgs() << "\n"); - - // Only after the instruction is fully processed, we can validate it - if (wasInITBlock && hasV8Ops() && isThumb() && - !isV8EligibleForIT(&Inst) && !getTargetOptions().MCNoDeprecatedWarn) { - Warning(IDLoc, "deprecated instruction in IT block"); - } } // Only move forward at the very end so that everything in validate @@ -11090,6 +11106,39 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveTLSDescSeq(DirectiveID.getLoc()); else return true; + } else if (IsCOFF) { + if (IDVal == ".seh_stackalloc") + parseDirectiveSEHAllocStack(DirectiveID.getLoc(), /*Wide=*/false); + else if (IDVal == ".seh_stackalloc_w") + parseDirectiveSEHAllocStack(DirectiveID.getLoc(), /*Wide=*/true); + else if (IDVal == ".seh_save_regs") + parseDirectiveSEHSaveRegs(DirectiveID.getLoc(), /*Wide=*/false); + else if (IDVal == ".seh_save_regs_w") + parseDirectiveSEHSaveRegs(DirectiveID.getLoc(), /*Wide=*/true); + else if (IDVal == ".seh_save_sp") + parseDirectiveSEHSaveSP(DirectiveID.getLoc()); + else if (IDVal == ".seh_save_fregs") + parseDirectiveSEHSaveFRegs(DirectiveID.getLoc()); + else if (IDVal == ".seh_save_lr") + parseDirectiveSEHSaveLR(DirectiveID.getLoc()); + else if (IDVal == ".seh_endprologue") + parseDirectiveSEHPrologEnd(DirectiveID.getLoc(), /*Fragment=*/false); + else if (IDVal == ".seh_endprologue_fragment") + parseDirectiveSEHPrologEnd(DirectiveID.getLoc(), /*Fragment=*/true); + else if (IDVal == ".seh_nop") + parseDirectiveSEHNop(DirectiveID.getLoc(), /*Wide=*/false); + else if (IDVal == ".seh_nop_w") + parseDirectiveSEHNop(DirectiveID.getLoc(), /*Wide=*/true); + else if (IDVal == ".seh_startepilogue") + parseDirectiveSEHEpilogStart(DirectiveID.getLoc(), /*Condition=*/false); + else if (IDVal == ".seh_startepilogue_cond") + parseDirectiveSEHEpilogStart(DirectiveID.getLoc(), /*Condition=*/true); + else if (IDVal == ".seh_endepilogue") + parseDirectiveSEHEpilogEnd(DirectiveID.getLoc()); + else if (IDVal == ".seh_custom") + parseDirectiveSEHCustom(DirectiveID.getLoc()); + else + return true; } else return true; return false; @@ -11113,8 +11162,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) { /// parseDirectiveThumb /// ::= .thumb bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive") || - check(!hasThumb(), L, "target does not support Thumb mode")) + if (parseEOL() || check(!hasThumb(), L, "target does not support Thumb mode")) return true; if (!isThumb()) @@ -11127,8 +11175,7 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { /// parseDirectiveARM /// ::= .arm bool ARMAsmParser::parseDirectiveARM(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive") || - check(!hasARM(), L, "target does not support ARM mode")) + if (parseEOL() || check(!hasARM(), L, "target does not support ARM mode")) return true; if (isThumb()) @@ -11167,15 +11214,13 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { Parser.getTok().getIdentifier()); getParser().getStreamer().emitThumbFunc(Func); Parser.Lex(); - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.thumb_func' directive")) + if (parseEOL()) return true; return false; } } - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.thumb_func' directive")) + if (parseEOL()) return true; // .thumb_func implies .thumb @@ -11204,7 +11249,7 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) { "'.syntax divided' arm assembly not supported") || check(Mode != "unified" && Mode != "UNIFIED", L, "unrecognized syntax mode in .syntax directive") || - parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + parseEOL()) return true; // TODO tell the MC streamer the mode @@ -11226,7 +11271,7 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) { } Parser.Lex(); - if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + if (parseEOL()) return true; if (Val == 16) { @@ -11257,8 +11302,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { SMLoc SRegLoc, ERegLoc; if (check(ParseRegister(Reg, SRegLoc, ERegLoc), SRegLoc, "register name expected") || - parseToken(AsmToken::EndOfStatement, - "unexpected input in .req directive.")) + parseEOL()) return true; if (RegisterReqs.insert(std::make_pair(Name, Reg)).first->second != Reg) @@ -11276,10 +11320,7 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { return Error(L, "unexpected input in .unreq directive."); RegisterReqs.erase(Parser.getTok().getIdentifier().lower()); Parser.Lex(); // Eat the identifier. - if (parseToken(AsmToken::EndOfStatement, - "unexpected input in '.unreq' directive")) - return true; - return false; + return parseEOL(); } // After changing arch/CPU, try to put the ARM/Thumb mode back to what it was @@ -11340,11 +11381,11 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { StringRef Name = Parser.getTok().getIdentifier(); Optional<unsigned> Ret = ELFAttrs::attrTypeFromString( Name, ARMBuildAttrs::getARMAttributeTags()); - if (!Ret.hasValue()) { + if (!Ret) { Error(TagLoc, "attribute name not recognised: " + Name); return false; } - Tag = Ret.getValue(); + Tag = *Ret; Parser.Lex(); } else { const MCExpr *AttrExpr; @@ -11406,8 +11447,7 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { Parser.Lex(); } - if (Parser.parseToken(AsmToken::EndOfStatement, - "unexpected token in '.eabi_attribute' directive")) + if (Parser.parseEOL()) return true; if (IsIntegerValue && IsStringValue) { @@ -11463,8 +11503,7 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { /// parseDirectiveFnStart /// ::= .fnstart bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.fnstart' directive")) + if (parseEOL()) return true; if (UC.hasFnStart()) { @@ -11485,8 +11524,7 @@ bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) { /// parseDirectiveFnEnd /// ::= .fnend bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.fnend' directive")) + if (parseEOL()) return true; // Check the ordering of unwind directives if (!UC.hasFnStart()) @@ -11502,8 +11540,7 @@ bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) { /// parseDirectiveCantUnwind /// ::= .cantunwind bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.cantunwind' directive")) + if (parseEOL()) return true; UC.recordCantUnwind(L); @@ -11538,8 +11575,7 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) { StringRef Name(Parser.getTok().getIdentifier()); Parser.Lex(); - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.personality' directive")) + if (parseEOL()) return true; UC.recordPersonality(L); @@ -11571,8 +11607,7 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) { /// parseDirectiveHandlerData /// ::= .handlerdata bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.handlerdata' directive")) + if (parseEOL()) return true; UC.recordHandlerData(L); @@ -11670,8 +11705,7 @@ bool ARMAsmParser::parseDirectivePad(SMLoc L) { if (!CE) return Error(ExLoc, "pad offset must be an immediate"); - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.pad' directive")) + if (parseEOL()) return true; getTargetStreamer().emitPad(CE->getValue()); @@ -11692,8 +11726,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands; // Parse the register list - if (parseRegisterList(Operands, true, true) || - parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + if (parseRegisterList(Operands, true, true) || parseEOL()) return true; ARMOperand &Op = (ARMOperand &)*Operands[0]; if (!IsVector && !Op.isRegList()) @@ -11776,7 +11809,7 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) { /// parseDirectiveLtorg /// ::= .ltorg | .pool bool ARMAsmParser::parseDirectiveLtorg(SMLoc L) { - if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + if (parseEOL()) return true; getTargetStreamer().emitCurrentConstantPool(); return false; @@ -11785,7 +11818,7 @@ bool ARMAsmParser::parseDirectiveLtorg(SMLoc L) { bool ARMAsmParser::parseDirectiveEven(SMLoc L) { const MCSection *Section = getStreamer().getCurrentSectionOnly(); - if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + if (parseEOL()) return true; if (!Section) { @@ -11794,7 +11827,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) { } assert(Section && "must have section to emit alignment"); - if (Section->UseCodeAlign()) + if (Section->useCodeAlign()) getStreamer().emitCodeAlignment(2, &getSTI()); else getStreamer().emitValueToAlignment(2); @@ -11810,9 +11843,7 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) { const MCExpr *IndexExpression; SMLoc IndexLoc = Parser.getTok().getLoc(); - if (Parser.parseExpression(IndexExpression) || - parseToken(AsmToken::EndOfStatement, - "unexpected token in '.personalityindex' directive")) { + if (Parser.parseExpression(IndexExpression) || parseEOL()) { return true; } @@ -11913,11 +11944,10 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) { MCSymbolRefExpr::VK_ARM_TLSDESCSEQ, getContext()); Lex(); - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.tlsdescseq' directive")) + if (parseEOL()) return true; - getTargetStreamer().AnnotateTLSDescriptorSequence(SRE); + getTargetStreamer().annotateTLSDescriptorSequence(SRE); return false; } @@ -11955,8 +11985,7 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) { Offset = CE->getValue(); } - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.movsp' directive")) + if (parseEOL()) return true; getTargetStreamer().emitMovSP(SPReg, Offset); @@ -11996,7 +12025,7 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) { // '.align' is target specifically handled to mean 2**2 byte alignment. const MCSection *Section = getStreamer().getCurrentSectionOnly(); assert(Section && "must have section to emit alignment"); - if (Section->UseCodeAlign()) + if (Section->useCodeAlign()) getStreamer().emitCodeAlignment(4, &getSTI(), 0); else getStreamer().emitValueToAlignment(4, 0, 1, 0); @@ -12026,6 +12055,175 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) { return false; } +/// parseDirectiveSEHAllocStack +/// ::= .seh_stackalloc +/// ::= .seh_stackalloc_w +bool ARMAsmParser::parseDirectiveSEHAllocStack(SMLoc L, bool Wide) { + int64_t Size; + if (parseImmExpr(Size)) + return true; + getTargetStreamer().emitARMWinCFIAllocStack(Size, Wide); + return false; +} + +/// parseDirectiveSEHSaveRegs +/// ::= .seh_save_regs +/// ::= .seh_save_regs_w +bool ARMAsmParser::parseDirectiveSEHSaveRegs(SMLoc L, bool Wide) { + SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands; + + if (parseRegisterList(Operands) || parseEOL()) + return true; + ARMOperand &Op = (ARMOperand &)*Operands[0]; + if (!Op.isRegList()) + return Error(L, ".seh_save_regs{_w} expects GPR registers"); + const SmallVectorImpl<unsigned> &RegList = Op.getRegList(); + uint32_t Mask = 0; + for (size_t i = 0; i < RegList.size(); ++i) { + unsigned Reg = MRI->getEncodingValue(RegList[i]); + if (Reg == 15) // pc -> lr + Reg = 14; + if (Reg == 13) + return Error(L, ".seh_save_regs{_w} can't include SP"); + assert(Reg < 16U && "Register out of range"); + unsigned Bit = (1u << Reg); + Mask |= Bit; + } + if (!Wide && (Mask & 0x1f00) != 0) + return Error(L, + ".seh_save_regs cannot save R8-R12, needs .seh_save_regs_w"); + getTargetStreamer().emitARMWinCFISaveRegMask(Mask, Wide); + return false; +} + +/// parseDirectiveSEHSaveSP +/// ::= .seh_save_sp +bool ARMAsmParser::parseDirectiveSEHSaveSP(SMLoc L) { + int Reg = tryParseRegister(); + if (Reg == -1 || !MRI->getRegClass(ARM::GPRRegClassID).contains(Reg)) + return Error(L, "expected GPR"); + unsigned Index = MRI->getEncodingValue(Reg); + if (Index > 14 || Index == 13) + return Error(L, "invalid register for .seh_save_sp"); + getTargetStreamer().emitARMWinCFISaveSP(Index); + return false; +} + +/// parseDirectiveSEHSaveFRegs +/// ::= .seh_save_fregs +bool ARMAsmParser::parseDirectiveSEHSaveFRegs(SMLoc L) { + SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands; + + if (parseRegisterList(Operands) || parseEOL()) + return true; + ARMOperand &Op = (ARMOperand &)*Operands[0]; + if (!Op.isDPRRegList()) + return Error(L, ".seh_save_fregs expects DPR registers"); + const SmallVectorImpl<unsigned> &RegList = Op.getRegList(); + uint32_t Mask = 0; + for (size_t i = 0; i < RegList.size(); ++i) { + unsigned Reg = MRI->getEncodingValue(RegList[i]); + assert(Reg < 32U && "Register out of range"); + unsigned Bit = (1u << Reg); + Mask |= Bit; + } + + if (Mask == 0) + return Error(L, ".seh_save_fregs missing registers"); + + unsigned First = 0; + while ((Mask & 1) == 0) { + First++; + Mask >>= 1; + } + if (((Mask + 1) & Mask) != 0) + return Error(L, + ".seh_save_fregs must take a contiguous range of registers"); + unsigned Last = First; + while ((Mask & 2) != 0) { + Last++; + Mask >>= 1; + } + if (First < 16 && Last >= 16) + return Error(L, ".seh_save_fregs must be all d0-d15 or d16-d31"); + getTargetStreamer().emitARMWinCFISaveFRegs(First, Last); + return false; +} + +/// parseDirectiveSEHSaveLR +/// ::= .seh_save_lr +bool ARMAsmParser::parseDirectiveSEHSaveLR(SMLoc L) { + int64_t Offset; + if (parseImmExpr(Offset)) + return true; + getTargetStreamer().emitARMWinCFISaveLR(Offset); + return false; +} + +/// parseDirectiveSEHPrologEnd +/// ::= .seh_endprologue +/// ::= .seh_endprologue_fragment +bool ARMAsmParser::parseDirectiveSEHPrologEnd(SMLoc L, bool Fragment) { + getTargetStreamer().emitARMWinCFIPrologEnd(Fragment); + return false; +} + +/// parseDirectiveSEHNop +/// ::= .seh_nop +/// ::= .seh_nop_w +bool ARMAsmParser::parseDirectiveSEHNop(SMLoc L, bool Wide) { + getTargetStreamer().emitARMWinCFINop(Wide); + return false; +} + +/// parseDirectiveSEHEpilogStart +/// ::= .seh_startepilogue +/// ::= .seh_startepilogue_cond +bool ARMAsmParser::parseDirectiveSEHEpilogStart(SMLoc L, bool Condition) { + unsigned CC = ARMCC::AL; + if (Condition) { + MCAsmParser &Parser = getParser(); + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + if (!Tok.is(AsmToken::Identifier)) + return Error(S, ".seh_startepilogue_cond missing condition"); + CC = ARMCondCodeFromString(Tok.getString()); + if (CC == ~0U) + return Error(S, "invalid condition"); + Parser.Lex(); // Eat the token. + } + + getTargetStreamer().emitARMWinCFIEpilogStart(CC); + return false; +} + +/// parseDirectiveSEHEpilogEnd +/// ::= .seh_endepilogue +bool ARMAsmParser::parseDirectiveSEHEpilogEnd(SMLoc L) { + getTargetStreamer().emitARMWinCFIEpilogEnd(); + return false; +} + +/// parseDirectiveSEHCustom +/// ::= .seh_custom +bool ARMAsmParser::parseDirectiveSEHCustom(SMLoc L) { + unsigned Opcode = 0; + do { + int64_t Byte; + if (parseImmExpr(Byte)) + return true; + if (Byte > 0xff || Byte < 0) + return Error(L, "Invalid byte value in .seh_custom"); + if (Opcode > 0x00ffffff) + return Error(L, "Too many bytes in .seh_custom"); + // Store the bytes as one big endian number in Opcode. In a multi byte + // opcode sequence, the first byte can't be zero. + Opcode = (Opcode << 8) | Byte; + } while (parseOptionalToken(AsmToken::Comma)); + getTargetStreamer().emitARMWinCFICustom(Opcode); + return false; +} + /// Force static initialization. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMAsmParser() { RegisterMCAsmParser<ARMAsmParser> X(getTheARMLETarget()); @@ -12338,8 +12536,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { SMLoc ExtLoc = Parser.getTok().getLoc(); Lex(); - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.arch_extension' directive")) + if (parseEOL()) return true; if (Name == "nocrypto") { diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index c3df7dc88d79..9acd49292268 100644 --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -13,8 +13,8 @@ #include "TargetInfo/ARMTargetInfo.h" #include "Utils/ARMBaseInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -175,408 +175,529 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) { // Forward declare these because the autogenerated code will reference them. // Definitions are further down. static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPRwithZRRegisterClass(MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPRwithZRnospRegisterClass( - MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus +DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSPR_8RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst, - unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMQQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeDPairSpacedRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode2IdxInstruction(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst,unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeTSBInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); -static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst & Inst, - unsigned Insn, - uint64_t Adddress, - const void *Decoder); +static DecodeStatus +DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned Insn, + uint64_t Adddress, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst,unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst,unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Val, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst, unsigned Val, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -template<int shift> + uint64_t Address, + const MCDisassembler *Decoder); +template <int shift> static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVCVTImmOperand(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeNEONComplexLane64Instruction(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder); -static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder); -static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -template<int shift> -static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst, unsigned Val, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +template <int shift> +static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -template<int shift> + uint64_t Address, + const MCDisassembler *Decoder); +template <int shift> static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -template<int shift, int WriteBack> + uint64_t Address, + const MCDisassembler *Decoder); +template <int shift, int WriteBack> static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst,unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst,unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst,unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); -static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); +static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); template <bool isSigned, bool isNeg, bool zeroPermitted, int size> static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned val, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); -template<bool Writeback> + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); +template <bool Writeback> static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); -template<int shift> + const MCDisassembler *Decoder); +template <int shift> static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -template<int shift> + uint64_t Address, + const MCDisassembler *Decoder); +template <int shift> static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -template<int shift> + uint64_t Address, + const MCDisassembler *Decoder); +template <int shift> static DecodeStatus DecodeMVE_MEM_3_pre(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -template<unsigned MinLog, unsigned MaxLog> + uint64_t Address, + const MCDisassembler *Decoder); +template <unsigned MinLog, unsigned MaxLog> static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder); -template<unsigned start> -static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); +template <unsigned start> +static DecodeStatus +DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); + const MCDisassembler *Decoder); static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); typedef DecodeStatus OperandDecoder(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -template<bool scalar, OperandDecoder predicate_decoder> -static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +template <bool scalar, OperandDecoder predicate_decoder> +static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); +static DecodeStatus +DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder); + uint64_t Address, + const MCDisassembler *Decoder); #include "ARMGenDisassemblerTables.inc" @@ -710,11 +831,12 @@ extern const MCInstrDesc ARMInsts[]; /// operand to the MCInst and false otherwise. static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, bool isBranch, uint64_t InstSize, - MCInst &MI, const void *Decoder) { - const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); + MCInst &MI, + const MCDisassembler *Decoder) { // FIXME: Does it make sense for value to be negative? - return Dis->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address, isBranch, - /* Offset */ 0, InstSize); + return Decoder->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address, + isBranch, /*Offset=*/0, /*OpSize=*/0, + InstSize); } /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being @@ -727,7 +849,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, /// a literal 'C' string if the referenced address of the literal pool's entry /// is an address into a section with 'C' string literals. static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value, - const void *Decoder) { + const MCDisassembler *Decoder) { const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); Dis->tryAddingPcLoadReferenceComment(Value, Address); } @@ -1142,7 +1264,8 @@ static const uint16_t CLRMGPRDecoderTable[] = { }; static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; @@ -1153,7 +1276,7 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; @@ -1165,9 +1288,9 @@ static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus -DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (RegNo == 15) @@ -1180,7 +1303,7 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (RegNo == 13) @@ -1192,8 +1315,8 @@ static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (RegNo == 15) @@ -1207,8 +1330,8 @@ DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (RegNo == 15) @@ -1225,8 +1348,8 @@ DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (RegNo == 13) return MCDisassembler::Fail; @@ -1235,7 +1358,8 @@ DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); @@ -1247,7 +1371,8 @@ static const uint16_t GPRPairDecoderTable[] = { }; static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; // According to the Arm ARM RegNo = 14 is undefined, but we return fail @@ -1263,8 +1388,9 @@ static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo, return S; } -static DecodeStatus DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +static DecodeStatus +DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 13) return MCDisassembler::Fail; @@ -1278,7 +1404,7 @@ static DecodeStatus DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo != 13) return MCDisassembler::Fail; @@ -1288,7 +1414,8 @@ static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Register = 0; switch (RegNo) { case 0: @@ -1318,7 +1445,8 @@ static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; const FeatureBitset &featureBits = @@ -1343,7 +1471,8 @@ static const uint16_t SPRDecoderTable[] = { }; static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -1353,7 +1482,8 @@ static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeSPRRegisterClass(Inst, RegNo, Address, Decoder); } @@ -1369,7 +1499,8 @@ static const uint16_t DPRDecoderTable[] = { }; static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { const FeatureBitset &featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits(); @@ -1384,22 +1515,24 @@ static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder); } static DecodeStatus DecodeSPR_8RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; return DecodeSPRRegisterClass(Inst, RegNo, Address, Decoder); } -static DecodeStatus -DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder); @@ -1413,7 +1546,8 @@ static const uint16_t QPRDecoderTable[] = { }; static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 31 || (RegNo & 1) != 0) return MCDisassembler::Fail; RegNo >>= 1; @@ -1433,7 +1567,8 @@ static const uint16_t DPairDecoderTable[] = { }; static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 30) return MCDisassembler::Fail; @@ -1453,10 +1588,9 @@ static const uint16_t DPairSpacedDecoderTable[] = { ARM::D28_D30, ARM::D29_D31 }; -static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeDPairSpacedRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 29) return MCDisassembler::Fail; @@ -1466,7 +1600,8 @@ static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst, } static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (Val == 0xF) return MCDisassembler::Fail; // AL predicate is not allowed on Thumb1 branches. @@ -1483,7 +1618,8 @@ static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (Val) Inst.addOperand(MCOperand::createReg(ARM::CPSR)); else @@ -1492,7 +1628,8 @@ static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rm = fieldFromInstruction(Val, 0, 4); @@ -1529,7 +1666,8 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rm = fieldFromInstruction(Val, 0, 4); @@ -1564,7 +1702,8 @@ static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; bool NeedDisjointWriteback = false; @@ -1611,7 +1750,8 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Vd = fieldFromInstruction(Val, 8, 5); @@ -1635,7 +1775,8 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Vd = fieldFromInstruction(Val, 8, 5); @@ -1660,7 +1801,8 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // This operand encodes a mask of contiguous zeros between a specified MSB // and LSB. To decode it, we create the mask of all bits MSB-and-lower, // the mask of all bits LSB-and-lower, and then xor them to create @@ -1687,7 +1829,8 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned pred = fieldFromInstruction(Insn, 28, 4); @@ -1865,8 +2008,8 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -1971,7 +2114,8 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 13, 4); @@ -2013,9 +2157,22 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val, return S; } -static DecodeStatus -DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeTSBInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { + if (Inst.getOpcode() != ARM::TSB && Inst.getOpcode() != ARM::t2TSB) + return MCDisassembler::Fail; + + // The "csync" operand is not encoded into the "tsb" instruction (as this is + // the only available operand), but LLVM expects the instruction to have one + // operand, so we need to add the csync when decoding. + Inst.addOperand(MCOperand::createImm(ARM_TSB::CSYNC)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); @@ -2206,7 +2363,8 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -2235,7 +2393,8 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -2257,9 +2416,10 @@ static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, - unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus +DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -2350,7 +2510,8 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, // Check for UNPREDICTABLE predicated ESB instruction static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned pred = fieldFromInstruction(Insn, 28, 4); unsigned imm8 = fieldFromInstruction(Insn, 0, 8); const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); @@ -2372,7 +2533,8 @@ static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned imod = fieldFromInstruction(Insn, 18, 2); unsigned M = fieldFromInstruction(Insn, 17, 1); unsigned iflags = fieldFromInstruction(Insn, 6, 3); @@ -2419,7 +2581,8 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned imod = fieldFromInstruction(Insn, 9, 2); unsigned M = fieldFromInstruction(Insn, 8, 1); unsigned iflags = fieldFromInstruction(Insn, 5, 3); @@ -2460,9 +2623,9 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { unsigned imm = fieldFromInstruction(Insn, 0, 8); unsigned Opcode = ARM::t2HINT; @@ -2486,7 +2649,8 @@ static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 8, 4); @@ -2510,7 +2674,8 @@ static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -2537,7 +2702,8 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 16, 4); @@ -2565,7 +2731,8 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Pred = fieldFromInstruction(Insn, 28, 4); @@ -2586,7 +2753,8 @@ static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Imm = fieldFromInstruction(Insn, 9, 1); @@ -2614,7 +2782,8 @@ static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned add = fieldFromInstruction(Val, 12, 1); @@ -2634,7 +2803,8 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 9, 4); @@ -2654,7 +2824,8 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 9, 4); @@ -2674,13 +2845,14 @@ static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeGPRRegisterClass(Inst, Val, Address, Decoder); } -static DecodeStatus -DecodeT2BInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus Status = MCDisassembler::Success; // Note the J1 and J2 values are from the encoded instruction. So here @@ -2705,9 +2877,9 @@ DecodeT2BInstruction(MCInst &Inst, unsigned Insn, return Status; } -static DecodeStatus -DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned pred = fieldFromInstruction(Insn, 28, 4); @@ -2736,7 +2908,8 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rm = fieldFromInstruction(Val, 0, 4); @@ -2753,7 +2926,8 @@ static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3029,7 +3203,8 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned type = fieldFromInstruction(Insn, 8, 4); unsigned align = fieldFromInstruction(Insn, 4, 2); if (type == 6 && (align & 2)) return MCDisassembler::Fail; @@ -3042,7 +3217,8 @@ static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned size = fieldFromInstruction(Insn, 6, 2); if (size == 3) return MCDisassembler::Fail; @@ -3057,7 +3233,8 @@ static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned size = fieldFromInstruction(Insn, 6, 2); if (size == 3) return MCDisassembler::Fail; @@ -3070,7 +3247,8 @@ static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned size = fieldFromInstruction(Insn, 6, 2); if (size == 3) return MCDisassembler::Fail; @@ -3080,7 +3258,8 @@ static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3350,7 +3529,8 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3397,7 +3577,8 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3445,7 +3626,8 @@ static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3480,7 +3662,8 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3531,9 +3714,9 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus -DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3577,9 +3760,9 @@ DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus -DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) | @@ -3607,7 +3790,8 @@ DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Qd = fieldFromInstruction(Insn, 13, 3); @@ -3632,7 +3816,8 @@ static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3651,31 +3836,36 @@ static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(8 - Val)); return MCDisassembler::Success; } static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(16 - Val)); return MCDisassembler::Success; } static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(32 - Val)); return MCDisassembler::Success; } static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm(64 - Val)); return MCDisassembler::Success; } static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -3711,7 +3901,8 @@ static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned dst = fieldFromInstruction(Insn, 8, 3); @@ -3735,7 +3926,8 @@ static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, } static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4, true, 2, Inst, Decoder)) Inst.addOperand(MCOperand::createImm(SignExtend32<12>(Val << 1))); @@ -3743,7 +3935,8 @@ static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<21>(Val) + 4, true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::createImm(SignExtend32<21>(Val))); @@ -3751,7 +3944,8 @@ static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4, true, 2, Inst, Decoder)) Inst.addOperand(MCOperand::createImm(Val << 1)); @@ -3759,7 +3953,8 @@ static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 0, 3); @@ -3774,7 +3969,8 @@ static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 0, 3); @@ -3788,7 +3984,8 @@ static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned imm = Val << 2; Inst.addOperand(MCOperand::createImm(imm)); @@ -3798,7 +3995,8 @@ static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createReg(ARM::SP)); Inst.addOperand(MCOperand::createImm(Val)); @@ -3806,7 +4004,8 @@ static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 6, 4); @@ -3835,7 +4034,8 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); @@ -3918,7 +4118,8 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -4002,7 +4203,8 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -4081,8 +4283,8 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder) { +static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -4121,7 +4323,8 @@ static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, - uint64_t Address, const void* Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); @@ -4173,8 +4376,8 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { if (Val == 0) Inst.addOperand(MCOperand::createImm(INT32_MIN)); else { @@ -4188,7 +4391,7 @@ static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (Val == 0) Inst.addOperand(MCOperand::createImm(INT32_MIN)); else { @@ -4203,7 +4406,8 @@ static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address, } static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 9, 4); @@ -4219,7 +4423,7 @@ static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 8, 4); @@ -4233,8 +4437,9 @@ static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst, unsigned Val, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 8, 4); @@ -4248,8 +4453,8 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, return S; } -static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { int imm = Val & 0xFF; if (Val == 0) imm = INT32_MIN; @@ -4260,9 +4465,9 @@ static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -template<int shift> -static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +template <int shift> +static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { int imm = Val & 0x7F; if (Val == 0) imm = INT32_MIN; @@ -4276,7 +4481,8 @@ static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 9, 4); @@ -4321,10 +4527,10 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, return S; } -template<int shift> +template <int shift> static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 8, 3); @@ -4338,10 +4544,10 @@ static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val, return S; } -template<int shift, int WriteBack> +template <int shift, int WriteBack> static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 8, 4); @@ -4358,7 +4564,8 @@ static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); @@ -4419,7 +4626,8 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 13, 4); @@ -4445,7 +4653,8 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned imm = fieldFromInstruction(Insn, 0, 7); Inst.addOperand(MCOperand::createReg(ARM::SP)); @@ -4456,7 +4665,8 @@ static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn, } static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (Inst.getOpcode() == ARM::tADDrSP) { @@ -4481,7 +4691,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, } static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned imod = fieldFromInstruction(Insn, 4, 1) | 0x2; unsigned flags = fieldFromInstruction(Insn, 0, 3); @@ -4492,7 +4703,8 @@ static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, } static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rm = fieldFromInstruction(Insn, 0, 4); unsigned add = fieldFromInstruction(Insn, 4, 1); @@ -4505,7 +4717,8 @@ static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 3, 4); unsigned Qm = fieldFromInstruction(Insn, 0, 3); @@ -4518,9 +4731,10 @@ static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn, return S; } -template<int shift> +template <int shift> static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Qm = fieldFromInstruction(Insn, 8, 3); int imm = fieldFromInstruction(Insn, 0, 7); @@ -4542,7 +4756,8 @@ static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // Val is passed in as S:J1:J2:imm10H:imm10L:'0' // Note only one trailing zero not two. Also the J1 and J2 values are from // the encoded instruction. So here change to I1 and I2 values via: @@ -4566,7 +4781,8 @@ static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (Val == 0xA || Val == 0xB) return MCDisassembler::Fail; @@ -4580,9 +4796,9 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus -DecodeThumbTableBranch(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { const FeatureBitset &FeatureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits(); DecodeStatus S = MCDisassembler::Success; @@ -4598,9 +4814,9 @@ DecodeThumbTableBranch(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus -DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned pred = fieldFromInstruction(Insn, 22, 4); @@ -4641,8 +4857,8 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, // Decode a shifted immediate operand. These basically consist // of an 8-bit value, and a 4-bit directive that specifies either // a splat operation or a rotation. -static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { unsigned ctrl = fieldFromInstruction(Val, 10, 2); if (ctrl == 0) { unsigned byte = fieldFromInstruction(Val, 8, 2); @@ -4672,9 +4888,9 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus -DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, + uint64_t Address, + const MCDisassembler *Decoder) { if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4, true, 2, Inst, Decoder)) Inst.addOperand(MCOperand::createImm(SignExtend32<9>(Val << 1))); @@ -4683,7 +4899,7 @@ DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { // Val is passed in as S:J1:J2:imm10:imm11 // Note no trailing zero after imm11. Also the J1 and J2 values are from // the encoded instruction. So here change to I1 and I2 values via: @@ -4706,7 +4922,8 @@ static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (Val & ~0xf) return MCDisassembler::Fail; @@ -4715,7 +4932,8 @@ static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (Val & ~0xf) return MCDisassembler::Fail; @@ -4723,8 +4941,8 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; const FeatureBitset &FeatureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits(); @@ -4825,7 +5043,8 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { unsigned R = fieldFromInstruction(Val, 5, 1); unsigned SysM = fieldFromInstruction(Val, 0, 5); @@ -4840,7 +5059,8 @@ static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); @@ -4862,7 +5082,7 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -4887,7 +5107,8 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -4912,7 +5133,8 @@ static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -4939,7 +5161,8 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -4964,7 +5187,8 @@ static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -4988,8 +5212,8 @@ static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5055,8 +5279,8 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5120,8 +5344,8 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5187,8 +5411,8 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5250,8 +5474,8 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5320,8 +5544,8 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5383,8 +5607,8 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5464,8 +5688,8 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5536,8 +5760,8 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); unsigned Rt2 = fieldFromInstruction(Insn, 16, 4); @@ -5562,8 +5786,8 @@ static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); unsigned Rt2 = fieldFromInstruction(Insn, 16, 4); @@ -5588,8 +5812,8 @@ static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned pred = fieldFromInstruction(Insn, 4, 4); unsigned mask = fieldFromInstruction(Insn, 0, 4); @@ -5617,9 +5841,9 @@ static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus -DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); @@ -5654,9 +5878,9 @@ DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus -DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 12, 4); @@ -5689,8 +5913,8 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, uint64_t Address, + const MCDisassembler *Decoder) { unsigned sign1 = fieldFromInstruction(Insn, 21, 1); unsigned sign2 = fieldFromInstruction(Insn, 23, 1); if (sign1 != sign2) return MCDisassembler::Fail; @@ -5717,7 +5941,7 @@ static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; // Shift of "asr #32" is not allowed in Thumb2 mode. @@ -5726,8 +5950,8 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val, return S; } -static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { unsigned Rt = fieldFromInstruction(Insn, 12, 4); unsigned Rt2 = fieldFromInstruction(Insn, 0, 4); unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -5753,8 +5977,8 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { const FeatureBitset &featureBits = ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits(); bool hasFullFP16 = featureBits[ARM::FeatureFullFP16]; @@ -5812,8 +6036,8 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { const FeatureBitset &featureBits = ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits(); bool hasFullFP16 = featureBits[ARM::FeatureFullFP16]; @@ -5871,10 +6095,10 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeNEONComplexLane64Instruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0); Vd |= (fieldFromInstruction(Insn, 22, 1) << 4); unsigned Vn = (fieldFromInstruction(Insn, 16, 4) << 0); @@ -5904,8 +6128,8 @@ static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst, return S; } -static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 16, 4); @@ -5932,7 +6156,8 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, } static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned CRm = fieldFromInstruction(Val, 0, 4); @@ -5978,7 +6203,7 @@ static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val, static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { const FeatureBitset &featureBits = ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits(); DecodeStatus S = MCDisassembler::Success; @@ -6030,7 +6255,7 @@ static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val, template <bool isSigned, bool isNeg, bool zeroPermitted, int size> static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (Val == 0 && !zeroPermitted) S = MCDisassembler::Fail; @@ -6049,7 +6274,7 @@ static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { uint64_t LocImm = Inst.getOperand(0).getImm(); Val = LocImm + (2 << Val); @@ -6061,7 +6286,7 @@ static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (Val >= ARMCC::AL) // also exclude the non-condition NV return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(Val)); @@ -6069,7 +6294,7 @@ static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (Inst.getOpcode() == ARM::MVE_LCTP) @@ -6132,7 +6357,7 @@ static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address, static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (Val == 0) @@ -6144,7 +6369,8 @@ static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if ((RegNo) + 1 > 11) return MCDisassembler::Fail; @@ -6154,7 +6380,8 @@ static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if ((RegNo) > 14) return MCDisassembler::Fail; @@ -6165,7 +6392,8 @@ static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo == 15) { Inst.addOperand(MCOperand::createReg(ARM::APSR_NZCV)); return MCDisassembler::Success; @@ -6181,7 +6409,7 @@ DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; Inst.addOperand(MCOperand::createImm(ARMCC::AL)); @@ -6207,8 +6435,8 @@ static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address, } static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; @@ -6224,7 +6452,7 @@ static const uint16_t QQPRDecoderTable[] = { static DecodeStatus DecodeMQQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 6) return MCDisassembler::Fail; @@ -6240,7 +6468,7 @@ static const uint16_t QQQQPRDecoderTable[] = { static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { if (RegNo > 4) return MCDisassembler::Fail; @@ -6251,7 +6479,7 @@ static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; // Parse VPT mask and encode it in the MCInst as an immediate with the same @@ -6281,7 +6509,8 @@ static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { // The vpred_r operand type includes an MQPR register field derived // from the encoding. But we don't actually want to add an operand // to the MCInst at this stage, because AddThumbPredicate will do it @@ -6292,18 +6521,16 @@ static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm((Val & 0x1) == 0 ? ARMCC::EQ : ARMCC::NE)); return MCDisassembler::Success; } -static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { unsigned Code; switch (Val & 0x3) { case 0: @@ -6323,17 +6550,16 @@ static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeRestrictedUPredicateOperand(MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { Inst.addOperand(MCOperand::createImm((Val & 0x1) == 0 ? ARMCC::HS : ARMCC::HI)); return MCDisassembler::Success; } -static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { +static DecodeStatus +DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { unsigned Code; switch (Val) { default: @@ -6363,7 +6589,8 @@ static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Va } static DecodeStatus DecodeVCVTImmOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned DecodedVal = 64 - Val; @@ -6404,10 +6631,10 @@ static unsigned FixedRegForVSTRVLDR_SYSREG(unsigned Opcode) { } } -template<bool Writeback> +template <bool Writeback> static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { switch (Inst.getOpcode()) { case ARM::VSTR_FPSCR_pre: case ARM::VSTR_FPSCR_NZCVQC_pre: @@ -6448,9 +6675,10 @@ static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Val, return S; } -static inline DecodeStatus DecodeMVE_MEM_pre( - MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder, - unsigned Rn, OperandDecoder RnDecoder, OperandDecoder AddrDecoder) { +static inline DecodeStatus +DecodeMVE_MEM_pre(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder, unsigned Rn, + OperandDecoder RnDecoder, OperandDecoder AddrDecoder) { DecodeStatus S = MCDisassembler::Success; unsigned Qd = fieldFromInstruction(Val, 13, 3); @@ -6469,7 +6697,8 @@ static inline DecodeStatus DecodeMVE_MEM_pre( template <int shift> static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder, fieldFromInstruction(Val, 16, 3), DecodetGPRRegisterClass, @@ -6478,7 +6707,8 @@ static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val, template <int shift> static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder, fieldFromInstruction(Val, 16, 4), DecoderGPRRegisterClass, @@ -6487,17 +6717,18 @@ static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val, template <int shift> static DecodeStatus DecodeMVE_MEM_3_pre(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder, fieldFromInstruction(Val, 17, 3), DecodeMQPRRegisterClass, DecodeMveAddrModeQ<shift>); } -template<unsigned MinLog, unsigned MaxLog> +template <unsigned MinLog, unsigned MaxLog> static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; if (Val < MinLog || Val > MaxLog) @@ -6507,10 +6738,10 @@ static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val, return S; } -template<unsigned start> -static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { +template <unsigned start> +static DecodeStatus +DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; Inst.addOperand(MCOperand::createImm(start + Val)); @@ -6519,7 +6750,8 @@ static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 0, 4); unsigned Rt2 = fieldFromInstruction(Insn, 16, 4); @@ -6542,7 +6774,8 @@ static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction(Insn, 0, 4); unsigned Rt2 = fieldFromInstruction(Insn, 16, 4); @@ -6566,8 +6799,9 @@ static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeMVEOverlappingLongShift( - MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { +static DecodeStatus +DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned RdaLo = fieldFromInstruction(Insn, 17, 3) << 1; @@ -6645,8 +6879,9 @@ static DecodeStatus DecodeMVEOverlappingLongShift( return S; } -static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) | fieldFromInstruction(Insn, 13, 3)); @@ -6664,9 +6899,9 @@ static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, uint64_t Addr return S; } -template<bool scalar, OperandDecoder predicate_decoder> +template <bool scalar, OperandDecoder predicate_decoder> static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; Inst.addOperand(MCOperand::createReg(ARM::VPR)); unsigned Qn = fieldFromInstruction(Insn, 17, 3); @@ -6703,7 +6938,7 @@ static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address, } static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; Inst.addOperand(MCOperand::createReg(ARM::VPR)); unsigned Rn = fieldFromInstruction(Insn, 16, 4); @@ -6712,8 +6947,9 @@ static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address, return S; } -static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, + uint64_t Address, + const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; Inst.addOperand(MCOperand::createReg(ARM::VPR)); Inst.addOperand(MCOperand::createReg(ARM::VPR)); @@ -6721,7 +6957,8 @@ static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, uint64_t Address } static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder) { + uint64_t Address, + const MCDisassembler *Decoder) { const unsigned Rd = fieldFromInstruction(Insn, 8, 4); const unsigned Rn = fieldFromInstruction(Insn, 16, 4); const unsigned Imm12 = fieldFromInstruction(Insn, 26, 1) << 11 | diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 16bc0ca179a7..d74da27fbc4f 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -17,8 +17,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" @@ -98,9 +98,20 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer { void emitInst(uint32_t Inst, char Suffix = '\0') override; void finishAttributeSection() override; - void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override; + void annotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override; void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override; + void emitARMWinCFIAllocStack(unsigned Size, bool Wide) override; + void emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) override; + void emitARMWinCFISaveSP(unsigned Reg) override; + void emitARMWinCFISaveFRegs(unsigned First, unsigned Last) override; + void emitARMWinCFISaveLR(unsigned Offset) override; + void emitARMWinCFIPrologEnd(bool Fragment) override; + void emitARMWinCFINop(bool Wide) override; + void emitARMWinCFIEpilogStart(unsigned Condition) override; + void emitARMWinCFIEpilogEnd() override; + void emitARMWinCFICustom(unsigned Opcode) override; + public: ARMTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter &InstPrinter, bool VerboseAsm); @@ -239,8 +250,8 @@ void ARMTargetAsmStreamer::emitFPU(unsigned FPU) { void ARMTargetAsmStreamer::finishAttributeSection() {} -void -ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { +void ARMTargetAsmStreamer::annotateTLSDescriptorSequence( + const MCSymbolRefExpr *S) { OS << "\t.tlsdescseq\t" << S->getSymbol().getName() << "\n"; } @@ -269,6 +280,101 @@ void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset, OS << '\n'; } +void ARMTargetAsmStreamer::emitARMWinCFIAllocStack(unsigned Size, bool Wide) { + if (Wide) + OS << "\t.seh_stackalloc_w\t" << Size << "\n"; + else + OS << "\t.seh_stackalloc\t" << Size << "\n"; +} + +static void printRegs(formatted_raw_ostream &OS, ListSeparator &LS, int First, + int Last) { + if (First != Last) + OS << LS << "r" << First << "-r" << Last; + else + OS << LS << "r" << First; +} + +void ARMTargetAsmStreamer::emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) { + if (Wide) + OS << "\t.seh_save_regs_w\t"; + else + OS << "\t.seh_save_regs\t"; + ListSeparator LS; + int First = -1; + OS << "{"; + for (int I = 0; I <= 12; I++) { + if (Mask & (1 << I)) { + if (First < 0) + First = I; + } else { + if (First >= 0) { + printRegs(OS, LS, First, I - 1); + First = -1; + } + } + } + if (First >= 0) + printRegs(OS, LS, First, 12); + if (Mask & (1 << 14)) + OS << LS << "lr"; + OS << "}\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFISaveSP(unsigned Reg) { + OS << "\t.seh_save_sp\tr" << Reg << "\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFISaveFRegs(unsigned First, + unsigned Last) { + if (First != Last) + OS << "\t.seh_save_fregs\t{d" << First << "-d" << Last << "}\n"; + else + OS << "\t.seh_save_fregs\t{d" << First << "}\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFISaveLR(unsigned Offset) { + OS << "\t.seh_save_lr\t" << Offset << "\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFIPrologEnd(bool Fragment) { + if (Fragment) + OS << "\t.seh_endprologue_fragment\n"; + else + OS << "\t.seh_endprologue\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFINop(bool Wide) { + if (Wide) + OS << "\t.seh_nop_w\n"; + else + OS << "\t.seh_nop\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFIEpilogStart(unsigned Condition) { + if (Condition == ARMCC::AL) + OS << "\t.seh_startepilogue\n"; + else + OS << "\t.seh_startepilogue_cond\t" + << ARMCondCodeToString(static_cast<ARMCC::CondCodes>(Condition)) << "\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFIEpilogEnd() { + OS << "\t.seh_endepilogue\n"; +} + +void ARMTargetAsmStreamer::emitARMWinCFICustom(unsigned Opcode) { + int I; + for (I = 3; I > 0; I--) + if (Opcode & (0xffu << (8 * I))) + break; + ListSeparator LS; + OS << "\t.seh_custom\t"; + for (; I >= 0; I--) + OS << LS << ((Opcode >> (8 * I)) & 0xff); + OS << "\n"; +} + class ARMTargetELFStreamer : public ARMTargetStreamer { private: StringRef CurrentVendor; @@ -309,7 +415,7 @@ private: void finishAttributeSection() override; void emitLabel(MCSymbol *Symbol) override; - void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override; + void annotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override; void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override; // Reset state between object emissions @@ -984,8 +1090,8 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) { Streamer.emitThumbFunc(Symbol); } -void -ARMTargetELFStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { +void ARMTargetELFStreamer::annotateTLSDescriptorSequence( + const MCSymbolRefExpr *S) { getStreamer().EmitFixup(S, FK_Data_4); } @@ -1057,7 +1163,7 @@ inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix, assert(EHSection && "Failed to get the required EH section"); // Switch to .ARM.extab or .ARM.exidx section - SwitchSection(EHSection); + switchSection(EHSection); emitValueToAlignment(4, 0, 1, 0); } @@ -1150,7 +1256,7 @@ void ARMELFStreamer::emitFnEnd() { } // Switch to the section containing FnStart - SwitchSection(&FnStart->getSection()); + switchSection(&FnStart->getSection()); // Clean exception handling frame information EHReset(); @@ -1369,12 +1475,8 @@ MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) { return new ARMTargetStreamer(S); } -MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S, - const MCSubtargetInfo &STI) { - const Triple &TT = STI.getTargetTriple(); - if (TT.isOSBinFormatELF()) - return new ARMTargetELFStreamer(S); - return new ARMTargetStreamer(S); +MCTargetStreamer *createARMObjectTargetELFStreamer(MCStreamer &S) { + return new ARMTargetELFStreamer(S); } MCELFStreamer *createARMELFStreamer(MCContext &Context, diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 77c0e3522911..febd8ab8bbc0 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -89,6 +89,7 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() { AlignmentIsInBytes = false; SupportsDebugInformation = true; ExceptionsType = ExceptionHandling::WinEH; + WinEHEncodingType = WinEH::EncodingType::Itanium; PrivateGlobalPrefix = "$M"; PrivateLabelPrefix = "$M"; CommentString = "@"; @@ -110,7 +111,8 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() { PrivateLabelPrefix = ".L"; SupportsDebugInformation = true; - ExceptionsType = ExceptionHandling::DwarfCFI; + ExceptionsType = ExceptionHandling::WinEH; + WinEHEncodingType = WinEH::EncodingType::Itanium; UseParensForSymbolVariant = true; DwarfRegNumForCFI = false; diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 5ecacdab390f..c33bbfcc7114 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -2006,13 +2006,11 @@ getMVEPairVectorIndexOpValue(const MCInst &MI, unsigned OpIdx, #include "ARMGenMCCodeEmitter.inc" MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new ARMMCCodeEmitter(MCII, Ctx, true); } MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx) { return new ARMMCCodeEmitter(MCII, Ctx, false); } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 17ca1866cf95..3f1379f135d1 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -87,18 +87,6 @@ static bool getMRCDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, return false; } -static bool getITDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, - std::string &Info) { - if (STI.getFeatureBits()[llvm::ARM::HasV8Ops] && MI.getOperand(1).isImm() && - MI.getOperand(1).getImm() != 8) { - Info = "applying IT instruction to more than one subsequent instruction is " - "deprecated"; - return true; - } - - return false; -} - static bool getARMStoreDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, std::string &Info) { assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] && diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 5c8f9bfdca08..e0c992f4fae2 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -71,13 +71,13 @@ MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S, bool isVerboseAsm); MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI); +MCTargetStreamer *createARMObjectTargetELFStreamer(MCStreamer &S); +MCTargetStreamer *createARMObjectTargetWinCOFFStreamer(MCStreamer &S); MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, MCContext &Ctx); MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCSubtargetInfo &STI, diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index ed4000c7e5be..0ea51839824b 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -9,6 +9,7 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmLayout.h" @@ -21,7 +22,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ScopedPrinter.h" using namespace llvm; @@ -149,7 +149,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, if (FixupOffset & 0xff000000) { Asm.getContext().reportError(Fixup.getLoc(), "can not encode offset '0x" + - to_hexString(FixupOffset) + + utohexstr(FixupOffset) + "' in resulting scattered relocation."); return; } @@ -264,7 +264,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, if (FixupOffset & 0xff000000) { Asm.getContext().reportError(Fixup.getLoc(), "can not encode offset '0x" + - to_hexString(FixupOffset) + + utohexstr(FixupOffset) + "' in resulting scattered relocation."); return; } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index 02a2d01176fc..16d1ae62053e 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -114,15 +114,28 @@ void ARMTargetStreamer::emitArchExtension(uint64_t ArchExt) {} void ARMTargetStreamer::emitObjectArch(ARM::ArchKind Arch) {} void ARMTargetStreamer::emitFPU(unsigned FPU) {} void ARMTargetStreamer::finishAttributeSection() {} -void -ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {} +void ARMTargetStreamer::annotateTLSDescriptorSequence( + const MCSymbolRefExpr *SRE) {} void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {} +void ARMTargetStreamer::emitARMWinCFIAllocStack(unsigned Size, bool Wide) {} +void ARMTargetStreamer::emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) {} +void ARMTargetStreamer::emitARMWinCFISaveSP(unsigned Reg) {} +void ARMTargetStreamer::emitARMWinCFISaveFRegs(unsigned First, unsigned Last) {} +void ARMTargetStreamer::emitARMWinCFISaveLR(unsigned Offset) {} +void ARMTargetStreamer::emitARMWinCFINop(bool Wide) {} +void ARMTargetStreamer::emitARMWinCFIPrologEnd(bool Fragment) {} +void ARMTargetStreamer::emitARMWinCFIEpilogStart(unsigned Condition) {} +void ARMTargetStreamer::emitARMWinCFIEpilogEnd() {} +void ARMTargetStreamer::emitARMWinCFICustom(unsigned Opcode) {} + static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) { if (STI.getCPU() == "xscale") return ARMBuildAttrs::v5TEJ; - if (STI.hasFeature(ARM::HasV8Ops)) { + if (STI.hasFeature(ARM::HasV9_0aOps)) + return ARMBuildAttrs::v9_A; + else if (STI.hasFeature(ARM::HasV8Ops)) { if (STI.hasFeature(ARM::FeatureRClass)) return ARMBuildAttrs::v8_R; return ARMBuildAttrs::v8_A; @@ -305,3 +318,13 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { emitAttribute(ARMBuildAttrs::BTI_extension, ARMBuildAttrs::AllowBTI); } } + +MCTargetStreamer * +llvm::createARMObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + const Triple &TT = STI.getTargetTriple(); + if (TT.isOSBinFormatELF()) + return createARMObjectTargetELFStreamer(S); + if (TT.isOSBinFormatCOFF()) + return createARMObjectTargetWinCOFFStreamer(S); + return new ARMTargetStreamer(S); +} diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index e6f649164a29..cdd7f6fb715a 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -8,30 +8,59 @@ #include "ARMMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCWin64EH.h" #include "llvm/MC/MCWinCOFFStreamer.h" using namespace llvm; namespace { class ARMWinCOFFStreamer : public MCWinCOFFStreamer { + Win64EH::ARMUnwindEmitter EHStreamer; + public: ARMWinCOFFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> AB, std::unique_ptr<MCCodeEmitter> CE, std::unique_ptr<MCObjectWriter> OW) : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {} + void emitWinEHHandlerData(SMLoc Loc) override; + void emitWindowsUnwindTables() override; + void emitWindowsUnwindTables(WinEH::FrameInfo *Frame) override; + void emitThumbFunc(MCSymbol *Symbol) override; void finishImpl() override; }; +void ARMWinCOFFStreamer::emitWinEHHandlerData(SMLoc Loc) { + MCStreamer::emitWinEHHandlerData(Loc); + + // We have to emit the unwind info now, because this directive + // actually switches to the .xdata section! + EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo(), + /* HandlerData = */ true); +} + +void ARMWinCOFFStreamer::emitWindowsUnwindTables(WinEH::FrameInfo *Frame) { + EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false); +} + +void ARMWinCOFFStreamer::emitWindowsUnwindTables() { + if (!getNumWinFrameInfos()) + return; + EHStreamer.Emit(*this); +} + void ARMWinCOFFStreamer::emitThumbFunc(MCSymbol *Symbol) { getAssembler().setIsThumbFunc(Symbol); } void ARMWinCOFFStreamer::finishImpl() { emitFrames(nullptr); + emitWindowsUnwindTables(); MCWinCOFFStreamer::finishImpl(); } @@ -48,3 +77,201 @@ MCStreamer *llvm::createARMWinCOFFStreamer( return S; } +namespace { +class ARMTargetWinCOFFStreamer : public llvm::ARMTargetStreamer { +private: + // True if we are processing SEH directives in an epilogue. + bool InEpilogCFI = false; + + // Symbol of the current epilog for which we are processing SEH directives. + MCSymbol *CurrentEpilog = nullptr; + +public: + ARMTargetWinCOFFStreamer(llvm::MCStreamer &S) : ARMTargetStreamer(S) {} + + // The unwind codes on ARM Windows are documented at + // https://docs.microsoft.com/en-us/cpp/build/arm-exception-handling + void emitARMWinCFIAllocStack(unsigned Size, bool Wide) override; + void emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) override; + void emitARMWinCFISaveSP(unsigned Reg) override; + void emitARMWinCFISaveFRegs(unsigned First, unsigned Last) override; + void emitARMWinCFISaveLR(unsigned Offset) override; + void emitARMWinCFIPrologEnd(bool Fragment) override; + void emitARMWinCFINop(bool Wide) override; + void emitARMWinCFIEpilogStart(unsigned Condition) override; + void emitARMWinCFIEpilogEnd() override; + void emitARMWinCFICustom(unsigned Opcode) override; + +private: + void emitARMWinUnwindCode(unsigned UnwindCode, int Reg, int Offset); +}; + +// Helper function to common out unwind code setup for those codes that can +// belong to both prolog and epilog. +void ARMTargetWinCOFFStreamer::emitARMWinUnwindCode(unsigned UnwindCode, + int Reg, int Offset) { + auto &S = getStreamer(); + WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc()); + if (!CurFrame) + return; + MCSymbol *Label = S.emitCFILabel(); + auto Inst = WinEH::Instruction(UnwindCode, Label, Reg, Offset); + if (InEpilogCFI) + CurFrame->EpilogMap[CurrentEpilog].Instructions.push_back(Inst); + else + CurFrame->Instructions.push_back(Inst); +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFIAllocStack(unsigned Size, + bool Wide) { + unsigned Op = Win64EH::UOP_AllocSmall; + if (!Wide) { + if (Size / 4 > 0xffff) + Op = Win64EH::UOP_AllocHuge; + else if (Size / 4 > 0x7f) + Op = Win64EH::UOP_AllocLarge; + } else { + Op = Win64EH::UOP_WideAllocMedium; + if (Size / 4 > 0xffff) + Op = Win64EH::UOP_WideAllocHuge; + else if (Size / 4 > 0x3ff) + Op = Win64EH::UOP_WideAllocLarge; + } + emitARMWinUnwindCode(Op, -1, Size); +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFISaveRegMask(unsigned Mask, + bool Wide) { + assert(Mask != 0); + int Lr = (Mask & 0x4000) ? 1 : 0; + Mask &= ~0x4000; + if (Wide) + assert((Mask & ~0x1fff) == 0); + else + assert((Mask & ~0x00ff) == 0); + if (Mask && ((Mask + (1 << 4)) & Mask) == 0) { + if (Wide && (Mask & 0x1000) == 0 && (Mask & 0xff) == 0xf0) { + // One continuous range from r4 to r8-r11 + for (int I = 11; I >= 8; I--) { + if (Mask & (1 << I)) { + emitARMWinUnwindCode(Win64EH::UOP_WideSaveRegsR4R11LR, I, Lr); + return; + } + } + // If it actually was from r4 to r4-r7, continue below. + } else if (!Wide) { + // One continuous range from r4 to r4-r7 + for (int I = 7; I >= 4; I--) { + if (Mask & (1 << I)) { + emitARMWinUnwindCode(Win64EH::UOP_SaveRegsR4R7LR, I, Lr); + return; + } + } + llvm_unreachable("logic error"); + } + } + Mask |= Lr << 14; + if (Wide) + emitARMWinUnwindCode(Win64EH::UOP_WideSaveRegMask, Mask, 0); + else + emitARMWinUnwindCode(Win64EH::UOP_SaveRegMask, Mask, 0); +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFISaveSP(unsigned Reg) { + emitARMWinUnwindCode(Win64EH::UOP_SaveSP, Reg, 0); +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFISaveFRegs(unsigned First, + unsigned Last) { + assert(First <= Last); + assert(First >= 16 || Last < 16); + assert(First <= 31 && Last <= 31); + if (First == 8) + emitARMWinUnwindCode(Win64EH::UOP_SaveFRegD8D15, Last, 0); + else if (First <= 15) + emitARMWinUnwindCode(Win64EH::UOP_SaveFRegD0D15, First, Last); + else + emitARMWinUnwindCode(Win64EH::UOP_SaveFRegD16D31, First, Last); +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFISaveLR(unsigned Offset) { + emitARMWinUnwindCode(Win64EH::UOP_SaveLR, 0, Offset); +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFINop(bool Wide) { + if (Wide) + emitARMWinUnwindCode(Win64EH::UOP_WideNop, -1, 0); + else + emitARMWinUnwindCode(Win64EH::UOP_Nop, -1, 0); +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFIPrologEnd(bool Fragment) { + auto &S = getStreamer(); + WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc()); + if (!CurFrame) + return; + + MCSymbol *Label = S.emitCFILabel(); + CurFrame->PrologEnd = Label; + WinEH::Instruction Inst = + WinEH::Instruction(Win64EH::UOP_End, /*Label=*/nullptr, -1, 0); + auto it = CurFrame->Instructions.begin(); + CurFrame->Instructions.insert(it, Inst); + CurFrame->Fragment = Fragment; +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFIEpilogStart(unsigned Condition) { + auto &S = getStreamer(); + WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc()); + if (!CurFrame) + return; + + InEpilogCFI = true; + CurrentEpilog = S.emitCFILabel(); + CurFrame->EpilogMap[CurrentEpilog].Condition = Condition; +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFIEpilogEnd() { + auto &S = getStreamer(); + WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc()); + if (!CurFrame) + return; + + if (!CurrentEpilog) { + S.getContext().reportError(SMLoc(), "Stray .seh_endepilogue in " + + CurFrame->Function->getName()); + return; + } + + std::vector<WinEH::Instruction> &Epilog = + CurFrame->EpilogMap[CurrentEpilog].Instructions; + + unsigned UnwindCode = Win64EH::UOP_End; + if (!Epilog.empty()) { + WinEH::Instruction EndInstr = Epilog.back(); + if (EndInstr.Operation == Win64EH::UOP_Nop) { + UnwindCode = Win64EH::UOP_EndNop; + Epilog.pop_back(); + } else if (EndInstr.Operation == Win64EH::UOP_WideNop) { + UnwindCode = Win64EH::UOP_WideEndNop; + Epilog.pop_back(); + } + } + + InEpilogCFI = false; + WinEH::Instruction Inst = WinEH::Instruction(UnwindCode, nullptr, -1, 0); + CurFrame->EpilogMap[CurrentEpilog].Instructions.push_back(Inst); + MCSymbol *Label = S.emitCFILabel(); + CurFrame->EpilogMap[CurrentEpilog].End = Label; + CurrentEpilog = nullptr; +} + +void ARMTargetWinCOFFStreamer::emitARMWinCFICustom(unsigned Opcode) { + emitARMWinUnwindCode(Win64EH::UOP_Custom, 0, Opcode); +} + +} // end anonymous namespace + +MCTargetStreamer *llvm::createARMObjectTargetWinCOFFStreamer(MCStreamer &S) { + return new ARMTargetWinCOFFStreamer(S); +} diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp index cfd275bc0621..30785340ef12 100644 --- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp +++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp @@ -145,7 +145,8 @@ private: // Optimise the base and offsets of the given address bool optimiseAddress(Value *Address, BasicBlock *BB, LoopInfo *LI); // Try to fold consecutive geps together into one - Value *foldGEP(GetElementPtrInst *GEP, Value *&Offsets, IRBuilder<> &Builder); + Value *foldGEP(GetElementPtrInst *GEP, Value *&Offsets, unsigned &Scale, + IRBuilder<> &Builder); // Check whether these offsets could be moved out of the loop they're in bool optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI); // Pushes the given add out of the loop @@ -390,7 +391,7 @@ MVEGatherScatterLowering::getVarAndConst(Value *Inst, int TypeScale) { return ReturnFalse; // Check that the constant is small enough for an incrementing gather - int64_t Immediate = Const.getValue() << TypeScale; + int64_t Immediate = *Const << TypeScale; if (Immediate > 512 || Immediate < -512 || Immediate % 4 != 0) return ReturnFalse; @@ -964,7 +965,7 @@ static bool hasAllGatScatUsers(Instruction *I, const DataLayout &DL) { bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI) { - LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to optimize\n" + LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to optimize: " << *Offsets << "\n"); // Optimise the addresses of gathers/scatters by moving invariant // calculations out of the loop @@ -1103,8 +1104,8 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB, return true; } -static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP, - IRBuilder<> &Builder) { +static Value *CheckAndCreateOffsetAdd(Value *X, unsigned ScaleX, Value *Y, + unsigned ScaleY, IRBuilder<> &Builder) { // Splat the non-vector value to a vector of the given type - if the value is // a constant (and its value isn't too big), we can even use this opportunity // to scale it to the size of the vector elements @@ -1156,40 +1157,49 @@ static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP, ConstantInt *ConstYEl = dyn_cast<ConstantInt>(ConstY->getAggregateElement(i)); if (!ConstXEl || !ConstYEl || - ConstXEl->getZExtValue() + ConstYEl->getZExtValue() >= + ConstXEl->getZExtValue() * ScaleX + + ConstYEl->getZExtValue() * ScaleY >= (unsigned)(1 << (TargetElemSize - 1))) return nullptr; } } - Value *Add = Builder.CreateAdd(X, Y); + Value *XScale = Builder.CreateVectorSplat( + XElType->getNumElements(), + Builder.getIntN(XElType->getScalarSizeInBits(), ScaleX)); + Value *YScale = Builder.CreateVectorSplat( + YElType->getNumElements(), + Builder.getIntN(YElType->getScalarSizeInBits(), ScaleY)); + Value *Add = Builder.CreateAdd(Builder.CreateMul(X, XScale), + Builder.CreateMul(Y, YScale)); - FixedVectorType *GEPType = cast<FixedVectorType>(GEP->getType()); - if (checkOffsetSize(Add, GEPType->getNumElements())) + if (checkOffsetSize(Add, XElType->getNumElements())) return Add; else return nullptr; } Value *MVEGatherScatterLowering::foldGEP(GetElementPtrInst *GEP, - Value *&Offsets, + Value *&Offsets, unsigned &Scale, IRBuilder<> &Builder) { Value *GEPPtr = GEP->getPointerOperand(); Offsets = GEP->getOperand(1); + Scale = DL->getTypeAllocSize(GEP->getSourceElementType()); // We only merge geps with constant offsets, because only for those // we can make sure that we do not cause an overflow - if (!isa<Constant>(Offsets)) + if (GEP->getNumIndices() != 1 || !isa<Constant>(Offsets)) return nullptr; - GetElementPtrInst *BaseGEP; - if ((BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr))) { + if (GetElementPtrInst *BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr)) { // Merge the two geps into one - Value *BaseBasePtr = foldGEP(BaseGEP, Offsets, Builder); + Value *BaseBasePtr = foldGEP(BaseGEP, Offsets, Scale, Builder); if (!BaseBasePtr) return nullptr; - Offsets = - CheckAndCreateOffsetAdd(Offsets, GEP->getOperand(1), GEP, Builder); + Offsets = CheckAndCreateOffsetAdd( + Offsets, Scale, GEP->getOperand(1), + DL->getTypeAllocSize(GEP->getSourceElementType()), Builder); if (Offsets == nullptr) return nullptr; + Scale = 1; // Scale is always an i8 at this point. return BaseBasePtr; } return GEPPtr; @@ -1206,15 +1216,24 @@ bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB, Builder.SetInsertPoint(GEP); Builder.SetCurrentDebugLocation(GEP->getDebugLoc()); Value *Offsets; - Value *Base = foldGEP(GEP, Offsets, Builder); + unsigned Scale; + Value *Base = foldGEP(GEP, Offsets, Scale, Builder); // We only want to merge the geps if there is a real chance that they can be // used by an MVE gather; thus the offset has to have the correct size // (always i32 if it is not of vector type) and the base has to be a // pointer. if (Offsets && Base && Base != GEP) { + assert(Scale == 1 && "Expected to fold GEP to a scale of 1"); + Type *BaseTy = Builder.getInt8PtrTy(); + if (auto *VecTy = dyn_cast<FixedVectorType>(Base->getType())) + BaseTy = FixedVectorType::get(BaseTy, VecTy); GetElementPtrInst *NewAddress = GetElementPtrInst::Create( - GEP->getSourceElementType(), Base, Offsets, "gep.merged", GEP); - GEP->replaceAllUsesWith(NewAddress); + Builder.getInt8Ty(), Builder.CreateBitCast(Base, BaseTy), Offsets, + "gep.merged", GEP); + LLVM_DEBUG(dbgs() << "Folded GEP: " << *GEP + << "\n new : " << *NewAddress << "\n"); + GEP->replaceAllUsesWith( + Builder.CreateBitCast(NewAddress, GEP->getType())); GEP = NewAddress; Changed = true; } diff --git a/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp b/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp index 538bd10685b0..3e76efb5133f 100644 --- a/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp +++ b/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp @@ -45,6 +45,7 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -176,9 +177,8 @@ static bool tryInterleave(Instruction *Start, // Truncs case Instruction::Trunc: case Instruction::FPTrunc: - if (Truncs.count(I)) + if (!Truncs.insert(I)) continue; - Truncs.insert(I); Visited.insert(I); break; @@ -235,9 +235,8 @@ static bool tryInterleave(Instruction *Start, case Instruction::FAdd: case Instruction::FMul: case Instruction::Select: - if (Ops.count(I)) + if (!Ops.insert(I)) continue; - Ops.insert(I); for (Use &Op : I->operands()) { if (!isa<FixedVectorType>(Op->getType())) diff --git a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp index 7e31ea77f4f5..6bad9d61238e 100644 --- a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp +++ b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp @@ -404,6 +404,17 @@ bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) { LoopPhi->getOperand(3).setReg(DecReg); } + SmallVector<MachineOperand, 4> Cond; // For analyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch. + if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) { + // If the LoopEnd falls through, need to insert a t2B to the fall-through + // block so that the non-analyzable t2LoopEndDec doesn't fall through. + MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator(); + BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B)) + .addMBB(&*MBBI) + .add(predOps(ARMCC::AL)); + } + // Replace the loop dec and loop end as a single instruction. MachineInstrBuilder MI = BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(), @@ -1041,8 +1052,7 @@ bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) { } bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) { - const ARMSubtarget &STI = - static_cast<const ARMSubtarget &>(Fn.getSubtarget()); + const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); if (!STI.isThumb2() || !STI.hasLOB()) return false; diff --git a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp index c7f451cba14f..d6d43b9143d6 100644 --- a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp +++ b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp @@ -312,8 +312,7 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { } bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { - const ARMSubtarget &STI = - static_cast<const ARMSubtarget &>(Fn.getSubtarget()); + const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) return false; diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 71a82a1e3271..df64710712cc 100644 --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -176,7 +176,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. - unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; + unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; if (ArgRegsSaveSize) { @@ -205,26 +205,38 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, return; } + bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(FramePtr); + for (const CalleeSavedInfo &I : CSI) { Register Reg = I.getReg(); int FI = I.getFrameIdx(); + if (Reg == FramePtr) + FramePtrSpillFI = FI; switch (Reg) { + case ARM::R11: + if (HasFrameRecordArea) { + FRSize += 4; + break; + } + LLVM_FALLTHROUGH; case ARM::R8: case ARM::R9: case ARM::R10: - case ARM::R11: if (STI.splitFramePushPop(MF)) { GPRCS2Size += 4; break; } LLVM_FALLTHROUGH; + case ARM::LR: + if (HasFrameRecordArea) { + FRSize += 4; + break; + } + LLVM_FALLTHROUGH; case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; GPRCS1Size += 4; break; default: @@ -232,18 +244,53 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, } } + MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push; + if (HasFrameRecordArea) { + // Skip Frame Record setup: + // push {lr} + // mov lr, r11 + // push {lr} + std::advance(MBBI, 2); + FRPush = MBBI++; + } + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { + GPRCS1Push = MBBI; ++MBBI; } + // Find last push instruction for GPRCS2 - spilling of high registers + // (r8-r11) could consist of multiple tPUSH and tMOVr instructions. + while (true) { + MachineBasicBlock::iterator OldMBBI = MBBI; + // Skip a run of tMOVr instructions + while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && + MBBI->getFlag(MachineInstr::FrameSetup)) + MBBI++; + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH && + MBBI->getFlag(MachineInstr::FrameSetup)) { + GPRCS2Push = MBBI; + MBBI++; + } else { + // We have reached an instruction which is not a push, so the previous + // run of tMOVr instructions (which may have been empty) was not part of + // the prologue. Reset MBBI back to the last PUSH of the prologue. + MBBI = OldMBBI; + break; + } + } + // Determine starting offsets of spill areas. - unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - + (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; bool HasFP = hasFP(MF); if (HasFP) AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); + if (HasFrameRecordArea) + AFI->setFrameRecordSavedAreaSize(FRSize); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); @@ -252,71 +299,45 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, int FramePtrOffsetInBlock = 0; unsigned adjustedGPRCS1Size = GPRCS1Size; if (GPRCS1Size > 0 && GPRCS2Size == 0 && - tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) { + tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) { FramePtrOffsetInBlock = NumBytes; adjustedGPRCS1Size += NumBytes; NumBytes = 0; } - - if (adjustedGPRCS1Size) { - CFAOffset += adjustedGPRCS1Size; - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - } - for (const CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: - if (STI.splitFramePushPop(MF)) - break; - LLVM_FALLTHROUGH; - case ARM::R0: - case ARM::R1: - case ARM::R2: - case ARM::R3: - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - break; - } - } + CFAOffset += adjustedGPRCS1Size; // Adjust FP so it point to the stack slot that contains the previous FP. if (HasFP) { - FramePtrOffsetInBlock += - MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; - BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addReg(ARM::SP) - .addImm(FramePtrOffsetInBlock / 4) - .setMIFlags(MachineInstr::FrameSetup) - .add(predOps(ARMCC::AL)); + MachineBasicBlock::iterator AfterPush = + HasFrameRecordArea ? std::next(FRPush) : std::next(GPRCS1Push); + if (HasFrameRecordArea) { + // We have just finished pushing the previous FP into the stack, + // so simply capture the SP value as the new Frame Pointer. + BuildMI(MBB, AfterPush, dl, TII.get(ARM::tMOVr), FramePtr) + .addReg(ARM::SP) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); + } else { + FramePtrOffsetInBlock += + MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; + BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr) + .addReg(ARM::SP) + .addImm(FramePtrOffsetInBlock / 4) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); + } + if(FramePtrOffsetInBlock) { - CFAOffset -= FramePtrOffsetInBlock; unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( - nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + nullptr, MRI->getDwarfRegNum(FramePtr, true), (CFAOffset - FramePtrOffsetInBlock))); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } @@ -326,45 +347,69 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, AFI->setShouldRestoreSPFromFP(true); } - // Skip past the spilling of r8-r11, which could consist of multiple tPUSH - // and tMOVr instructions. We don't need to add any call frame information - // in-between these instructions, because they do not modify the high - // registers. - while (true) { - MachineBasicBlock::iterator OldMBBI = MBBI; - // Skip a run of tMOVr instructions - while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr) - MBBI++; - if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { - MBBI++; - } else { - // We have reached an instruction which is not a push, so the previous - // run of tMOVr instructions (which may have been empty) was not part of - // the prologue. Reset MBBI back to the last PUSH of the prologue. - MBBI = OldMBBI; - break; + // Emit call frame information for the callee-saved low registers. + if (GPRCS1Size > 0) { + MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); + if (adjustedGPRCS1Size) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.splitFramePushPop(MF)) + break; + LLVM_FALLTHROUGH; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + break; + } } } // Emit call frame information for the callee-saved high registers. - for (auto &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: { - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - break; - } - default: - break; + if (GPRCS2Size > 0) { + MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); + for (auto &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: { + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + break; + } + default: + break; + } } } @@ -453,21 +498,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs); } -static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) { - if (MI.getOpcode() == ARM::tLDRspi && MI.getOperand(1).isFI() && - isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs)) - return true; - else if (MI.getOpcode() == ARM::tPOP) { - return true; - } else if (MI.getOpcode() == ARM::tMOVr) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) && - ARM::hGPRRegClass.contains(Dst)); - } - return false; -} - void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); @@ -483,26 +513,26 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, int NumBytes = (int)MFI.getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); Register FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes - ArgRegsSaveSize, ARM::NoRegister, - MachineInstr::NoFlags); + MachineInstr::FrameDestroy); } else { // Unwind MBBI to point to first LDR / VLDRD. if (MBBI != MBB.begin()) { do --MBBI; - while (MBBI != MBB.begin() && isCSRestore(*MBBI, CSRegs)); - if (!isCSRestore(*MBBI, CSRegs)) + while (MBBI != MBB.begin() && MBBI->getFlag(MachineInstr::FrameDestroy)); + if (!MBBI->getFlag(MachineInstr::FrameDestroy)) ++MBBI; } // Move SP to start of FP callee save spill area. - NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + + NumBytes -= (AFI->getFrameRecordSavedAreaSize() + + AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize() + ArgRegsSaveSize); @@ -516,14 +546,16 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, assert(!MFI.getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, - TII, *RegInfo); + TII, *RegInfo, MachineInstr::FrameDestroy); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(ARM::R4) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); } else BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(FramePtr) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); } else { // For a large stack frame, we might need a scratch register to store // the size of the frame. We know all callee-save registers are free @@ -542,10 +574,10 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock::iterator PMBBI = std::prev(MBBI); if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes)) emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes, - ScratchRegister, MachineInstr::NoFlags); + ScratchRegister, MachineInstr::FrameDestroy); } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes, - ScratchRegister, MachineInstr::NoFlags); + ScratchRegister, MachineInstr::FrameDestroy); } } @@ -637,7 +669,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, return true; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); // Copy implicit ops and popped registers, if any. for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef())) @@ -725,18 +758,20 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, .addReg(PopReg, RegState::Define) .addReg(ARM::SP) .addImm(MBBI->getNumExplicitOperands() - 2) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); // Move from the temporary register to the LR. BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(ARM::LR, RegState::Define) .addReg(PopReg, RegState::Kill) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); // Advance past the pop instruction. MBBI++; // Increment the SP. emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize + 4, ARM::NoRegister, - MachineInstr::NoFlags); + MachineInstr::FrameDestroy); return true; } @@ -746,7 +781,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(TemporaryReg, RegState::Define) .addReg(PopReg, RegState::Kill) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { @@ -754,7 +790,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // perform the opposite conversion: tPOP_RET to tPOP. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); bool Popped = false; for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && @@ -769,90 +806,82 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // Erase the old instruction. MBB.erase(MBBI); MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); } assert(PopReg && "Do not know how to get LR"); BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)) .add(predOps(ARMCC::AL)) - .addReg(PopReg, RegState::Define); + .addReg(PopReg, RegState::Define) + .setMIFlag(MachineInstr::FrameDestroy); emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize, - ARM::NoRegister, MachineInstr::NoFlags); + ARM::NoRegister, MachineInstr::FrameDestroy); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(ARM::LR, RegState::Define) .addReg(PopReg, RegState::Kill) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); if (TemporaryReg) BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(PopReg, RegState::Define) .addReg(TemporaryReg, RegState::Kill) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); return true; } -using ARMRegSet = std::bitset<ARM::NUM_TARGET_REGS>; - -// Return the first iteraror after CurrentReg which is present in EnabledRegs, -// or OrderEnd if no further registers are in that set. This does not advance -// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs. -static const unsigned *findNextOrderedReg(const unsigned *CurrentReg, - const ARMRegSet &EnabledRegs, - const unsigned *OrderEnd) { - while (CurrentReg != OrderEnd && !EnabledRegs[*CurrentReg]) - ++CurrentReg; - return CurrentReg; -} - -bool Thumb1FrameLowering::spillCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL; - const TargetInstrInfo &TII = *STI.getInstrInfo(); - MachineFunction &MF = *MBB.getParent(); - const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( - MF.getSubtarget().getRegisterInfo()); - - ARMRegSet LoRegsToSave; // r0-r7, lr - ARMRegSet HiRegsToSave; // r8-r11 - ARMRegSet CopyRegs; // Registers which can be used after pushing - // LoRegs for saving HiRegs. - - for (const CalleeSavedInfo &I : llvm::reverse(CSI)) { - Register Reg = I.getReg(); - +static const SmallVector<Register> OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6, + ARM::R7, ARM::LR}; +static const SmallVector<Register> OrderedHighRegs = {ARM::R8, ARM::R9, + ARM::R10, ARM::R11}; +static const SmallVector<Register> OrderedCopyRegs = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, + ARM::R5, ARM::R6, ARM::R7, ARM::LR}; + +static void splitLowAndHighRegs(const std::set<Register> &Regs, + std::set<Register> &LowRegs, + std::set<Register> &HighRegs) { + for (Register Reg : Regs) { if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LoRegsToSave[Reg] = true; + LowRegs.insert(Reg); } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HiRegsToSave[Reg] = true; + HighRegs.insert(Reg); } else { llvm_unreachable("callee-saved register of unexpected class"); } - - if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && - !MF.getRegInfo().isLiveIn(Reg) && - !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) - CopyRegs[Reg] = true; } +} - // Unused argument registers can be used for the high register saving. - for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) - if (!MF.getRegInfo().isLiveIn(ArgReg)) - CopyRegs[ArgReg] = true; +template <typename It> +It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt, + const std::set<Register> &RegSet) { + return std::find_if(OrderedStartIt, OrderedEndIt, + [&](Register Reg) { return RegSet.count(Reg); }); +} - // Push the low registers and lr +static void pushRegsToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const TargetInstrInfo &TII, + const std::set<Register> &RegsToSave, + const std::set<Register> &CopyRegs) { + MachineFunction &MF = *MBB.getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (!LoRegsToSave.none()) { + DebugLoc DL; + + std::set<Register> LowRegs, HighRegs; + splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs); + + // Push low regs first + if (!LowRegs.empty()) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); - for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { - if (LoRegsToSave[Reg]) { + for (unsigned Reg : OrderedLowRegs) { + if (LowRegs.count(Reg)) { bool isKill = !MRI.isLiveIn(Reg); if (isKill && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); @@ -863,31 +892,26 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters( MIB.setMIFlags(MachineInstr::FrameSetup); } - // Push the high registers. There are no store instructions that can access - // these registers directly, so we have to move them to low registers, and - // push them. This might take multiple pushes, as it is possible for there to + // Now push the high registers + // There are no store instructions that can access high registers directly, + // so we have to move them to low registers, and push them. + // This might take multiple pushes, as it is possible for there to // be fewer low registers available than high registers which need saving. - // These are in reverse order so that in the case where we need to use + // Find the first register to save. + // Registers must be processed in reverse order so that in case we need to use // multiple PUSH instructions, the order of the registers on the stack still // matches the unwind info. They need to be swicthed back to ascending order // before adding to the PUSH instruction. - static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6, - ARM::R5, ARM::R4, ARM::R3, - ARM::R2, ARM::R1, ARM::R0}; - static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8}; + auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(), + OrderedHighRegs.rend(), + HighRegs); - const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); - const unsigned *AllHighRegsEnd = std::end(AllHighRegs); - - // Find the first register to save. - const unsigned *HiRegToSave = findNextOrderedReg( - std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd); - - while (HiRegToSave != AllHighRegsEnd) { + while (HiRegToSave != OrderedHighRegs.rend()) { // Find the first low register to use. - const unsigned *CopyReg = - findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); + auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(), + OrderedCopyRegs.rend(), + CopyRegs); // Create the PUSH, but don't insert it yet (the MOVs need to come first). MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)) @@ -895,25 +919,29 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters( .setMIFlags(MachineInstr::FrameSetup); SmallVector<unsigned, 4> RegsToPush; - while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { - if (HiRegsToSave[*HiRegToSave]) { + while (HiRegToSave != OrderedHighRegs.rend() && + CopyRegIt != OrderedCopyRegs.rend()) { + if (HighRegs.count(*HiRegToSave)) { bool isKill = !MRI.isLiveIn(*HiRegToSave); if (isKill && !MRI.isReserved(*HiRegToSave)) MBB.addLiveIn(*HiRegToSave); // Emit a MOV from the high reg to the low reg. BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) - .addReg(*CopyReg, RegState::Define) + .addReg(*CopyRegIt, RegState::Define) .addReg(*HiRegToSave, getKillRegState(isKill)) .add(predOps(ARMCC::AL)) .setMIFlags(MachineInstr::FrameSetup); // Record the register that must be added to the PUSH. - RegsToPush.push_back(*CopyReg); - - CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); - HiRegToSave = - findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd); + RegsToPush.push_back(*CopyRegIt); + + CopyRegIt = getNextOrderedReg(std::next(CopyRegIt), + OrderedCopyRegs.rend(), + CopyRegs); + HiRegToSave = getNextOrderedReg(std::next(HiRegToSave), + OrderedHighRegs.rend(), + HighRegs); } } @@ -924,84 +952,63 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters( // Insert the PUSH instruction after the MOVs. MBB.insert(MI, PushMIB); } - - return true; } -bool Thumb1FrameLowering::restoreCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; +static void popRegsFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MI, + const TargetInstrInfo &TII, + const std::set<Register> &RegsToRestore, + const std::set<Register> &AvailableCopyRegs, + bool IsVarArg, bool HasV5Ops) { + if (RegsToRestore.empty()) + return; MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const TargetInstrInfo &TII = *STI.getInstrInfo(); - const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( - MF.getSubtarget().getRegisterInfo()); - - bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); - ARMRegSet LoRegsToRestore; - ARMRegSet HiRegsToRestore; - // Low registers (r0-r7) which can be used to restore the high registers. - ARMRegSet CopyRegs; + std::set<Register> LowRegs, HighRegs; + splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs); - for (CalleeSavedInfo I : CSI) { - Register Reg = I.getReg(); - - if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LoRegsToRestore[Reg] = true; - } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HiRegsToRestore[Reg] = true; - } else { - llvm_unreachable("callee-saved register of unexpected class"); - } - - // If this is a low register not used as the frame pointer, we may want to - // use it for restoring the high registers. - if ((ARM::tGPRRegClass.contains(Reg)) && - !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) - CopyRegs[Reg] = true; - } - - // If this is a return block, we may be able to use some unused return value - // registers for restoring the high regs. - auto Terminator = MBB.getFirstTerminator(); - if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { - CopyRegs[ARM::R0] = true; - CopyRegs[ARM::R1] = true; - CopyRegs[ARM::R2] = true; - CopyRegs[ARM::R3] = true; - for (auto Op : Terminator->implicit_operands()) { - if (Op.isReg()) - CopyRegs[Op.getReg()] = false; - } - } - - static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7}; - static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11}; - - const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); - const unsigned *AllHighRegsEnd = std::end(AllHighRegs); + // Pop the high registers first + // There are no store instructions that can access high registers directly, + // so we have to pop into low registers and them move to the high registers. + // This might take multiple pops, as it is possible for there to + // be fewer low registers available than high registers which need restoring. // Find the first register to restore. - auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs), - HiRegsToRestore, AllHighRegsEnd); + auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(), + OrderedHighRegs.end(), + HighRegs); + + std::set<Register> CopyRegs = AvailableCopyRegs; + Register LowScratchReg; + if (!HighRegs.empty() && CopyRegs.empty()) { + // No copy regs are available to pop high regs. Let's make use of a return + // register and the scratch register (IP/R12) to copy things around. + LowScratchReg = ARM::R0; + BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) + .addReg(ARM::R12, RegState::Define) + .addReg(LowScratchReg, RegState::Kill) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); + CopyRegs.insert(LowScratchReg); + } - while (HiRegToRestore != AllHighRegsEnd) { - assert(!CopyRegs.none()); + while (HiRegToRestore != OrderedHighRegs.end()) { + assert(!CopyRegs.empty()); // Find the first low register to use. - auto CopyReg = - findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); + auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(), + OrderedCopyRegs.end(), + CopyRegs); // Create the POP instruction. - MachineInstrBuilder PopMIB = - BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); + MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); - while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { + while (HiRegToRestore != OrderedHighRegs.end() && + CopyReg != OrderedCopyRegs.end()) { // Add the low register to the POP. PopMIB.addReg(*CopyReg, RegState::Define); @@ -1009,64 +1016,189 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters( BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) .addReg(*HiRegToRestore, RegState::Define) .addReg(*CopyReg, RegState::Kill) - .add(predOps(ARMCC::AL)); - - CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); - HiRegToRestore = - findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd); + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); + + CopyReg = getNextOrderedReg(std::next(CopyReg), + OrderedCopyRegs.end(), + CopyRegs); + HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore), + OrderedHighRegs.end(), + HighRegs); } } - MachineInstrBuilder MIB = - BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); - - bool NeedsPop = false; - for (CalleeSavedInfo &Info : llvm::reverse(CSI)) { - Register Reg = Info.getReg(); - - // High registers (excluding lr) have already been dealt with - if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR)) - continue; - - if (Reg == ARM::LR) { - Info.setRestored(false); - if (!MBB.succ_empty() || - MI->getOpcode() == ARM::TCRETURNdi || - MI->getOpcode() == ARM::TCRETURNri) - // LR may only be popped into PC, as part of return sequence. - // If this isn't the return sequence, we'll need emitPopSpecialFixUp - // to restore LR the hard way. - // FIXME: if we don't pass any stack arguments it would be actually - // advantageous *and* correct to do the conversion to an ordinary call - // instruction here. - continue; - // Special epilogue for vararg functions. See emitEpilogue - if (isVarArg) - continue; - // ARMv4T requires BX, see emitEpilogue - if (!STI.hasV5TOps()) - continue; + // Restore low register used as scratch if necessary + if (LowScratchReg.isValid()) { + BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) + .addReg(LowScratchReg, RegState::Define) + .addReg(ARM::R12, RegState::Kill) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); + } - // CMSE entry functions must return via BXNS, see emitEpilogue. - if (AFI->isCmseNSEntryFunction()) + // Now pop the low registers + if (!LowRegs.empty()) { + MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); + + bool NeedsPop = false; + for (Register Reg : OrderedLowRegs) { + if (!LowRegs.count(Reg)) continue; - // Pop LR into PC. - Reg = ARM::PC; - (*MIB).setDesc(TII.get(ARM::tPOP_RET)); - if (MI != MBB.end()) - MIB.copyImplicitOps(*MI); - MI = MBB.erase(MI); + if (Reg == ARM::LR) { + if (!MBB.succ_empty() || + MI->getOpcode() == ARM::TCRETURNdi || + MI->getOpcode() == ARM::TCRETURNri) + // LR may only be popped into PC, as part of return sequence. + // If this isn't the return sequence, we'll need emitPopSpecialFixUp + // to restore LR the hard way. + // FIXME: if we don't pass any stack arguments it would be actually + // advantageous *and* correct to do the conversion to an ordinary call + // instruction here. + continue; + // Special epilogue for vararg functions. See emitEpilogue + if (IsVarArg) + continue; + // ARMv4T requires BX, see emitEpilogue + if (!HasV5Ops) + continue; + + // CMSE entry functions must return via BXNS, see emitEpilogue. + if (AFI->isCmseNSEntryFunction()) + continue; + + // Pop LR into PC. + Reg = ARM::PC; + (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + if (MI != MBB.end()) + MIB.copyImplicitOps(*MI); + MI = MBB.erase(MI); + } + MIB.addReg(Reg, getDefRegState(true)); + NeedsPop = true; } - MIB.addReg(Reg, getDefRegState(true)); - NeedsPop = true; + + // It's illegal to emit pop instruction without operands. + if (NeedsPop) + MBB.insert(MI, &*MIB); + else + MF.deleteMachineInstr(MIB); + } +} + +bool Thumb1FrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + const TargetInstrInfo &TII = *STI.getInstrInfo(); + MachineFunction &MF = *MBB.getParent(); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); + Register FPReg = RegInfo->getFrameRegister(MF); + + // In case FP is a high reg, we need a separate push sequence to generate + // a correct Frame Record + bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); + + std::set<Register> FrameRecord; + std::set<Register> SpilledGPRs; + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR)) + FrameRecord.insert(Reg); + else + SpilledGPRs.insert(Reg); } - // It's illegal to emit pop instruction without operands. - if (NeedsPop) - MBB.insert(MI, &*MIB); - else - MF.deleteMachineInstr(MIB); + pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR}); + + // Determine intermediate registers which can be used for pushing high regs: + // - Spilled low regs + // - Unused argument registers + std::set<Register> CopyRegs; + for (Register Reg : SpilledGPRs) + if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && + !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg)) + CopyRegs.insert(Reg); + for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) + if (!MF.getRegInfo().isLiveIn(ArgReg)) + CopyRegs.insert(ArgReg); + + pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs); + + return true; +} + +bool Thumb1FrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); + bool IsVarArg = AFI->getArgRegsSaveSize() > 0; + Register FPReg = RegInfo->getFrameRegister(MF); + + // In case FP is a high reg, we need a separate pop sequence to generate + // a correct Frame Record + bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); + + std::set<Register> FrameRecord; + std::set<Register> SpilledGPRs; + for (CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR)) + FrameRecord.insert(Reg); + else + SpilledGPRs.insert(Reg); + + if (Reg == ARM::LR) + I.setRestored(false); + } + + // Determine intermidiate registers which can be used for popping high regs: + // - Spilled low regs + // - Unused return registers + std::set<Register> CopyRegs; + std::set<Register> UnusedReturnRegs; + for (Register Reg : SpilledGPRs) + if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg)) + CopyRegs.insert(Reg); + auto Terminator = MBB.getFirstTerminator(); + if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { + UnusedReturnRegs.insert(ARM::R0); + UnusedReturnRegs.insert(ARM::R1); + UnusedReturnRegs.insert(ARM::R2); + UnusedReturnRegs.insert(ARM::R3); + for (auto Op : Terminator->implicit_operands()) { + if (Op.isReg()) + UnusedReturnRegs.erase(Op.getReg()); + } + } + CopyRegs.insert(UnusedReturnRegs.begin(), UnusedReturnRegs.end()); + + // First pop regular spilled regs. + popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg, + STI.hasV5TOps()); + + // LR may only be popped into pc, as part of a return sequence. + // Check that no other pop instructions are inserted after that. + assert((!SpilledGPRs.count(ARM::LR) || FrameRecord.empty()) && + "Can't insert pop after return sequence"); + + // Now pop Frame Record regs. + // Only unused return registers can be used as copy regs at this point. + popRegsFromStack(MBB, MI, TII, FrameRecord, UnusedReturnRegs, IsVarArg, + STI.hasV5TOps()); return true; } diff --git a/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp index 5cdaa7f02201..155555152ced 100644 --- a/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -226,9 +226,10 @@ bool Thumb2ITBlock::InsertITInstructions(MachineBasicBlock &MBB) { ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; - // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it + // IT blocks are limited to one conditional op if -arm-restrict-it // is set: skip the loop if (!restrictIT) { + LLVM_DEBUG(dbgs() << "Allowing complex IT block\n";); // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && @@ -283,8 +284,7 @@ bool Thumb2ITBlock::InsertITInstructions(MachineBasicBlock &MBB) { } bool Thumb2ITBlock::runOnMachineFunction(MachineFunction &Fn) { - const ARMSubtarget &STI = - static_cast<const ARMSubtarget &>(Fn.getSubtarget()); + const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); if (!STI.isThumb2()) return false; AFI = Fn.getInfo<ARMFunctionInfo>(); diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index ebd139af2219..60dbc7b92013 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -555,7 +555,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, MI.setDesc(TII.get(ARM::tMOVr)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); // Remove offset and remaining explicit predicate operands. - do MI.RemoveOperand(FrameRegIdx+1); + do MI.removeOperand(FrameRegIdx+1); while (MI.getNumOperands() > FrameRegIdx+1); MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI); MIB.add(predOps(ARMCC::AL)); @@ -592,7 +592,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); // Remove the cc_out operand. if (HasCCOut) - MI.RemoveOperand(MI.getNumOperands()-1); + MI.removeOperand(MI.getNumOperands()-1); Offset = 0; return true; } @@ -626,7 +626,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return Offset == 0; } - MI.RemoveOperand(FrameRegIdx+1); + MI.removeOperand(FrameRegIdx+1); MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0); NewOpc = immediateOffsetOpcode(Opcode); AddrMode = ARMII::AddrModeT2_i12; diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index 1cc5422523f1..7ae4b19afb60 100644 --- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" @@ -205,11 +206,11 @@ namespace { bool IsSelfLoop); /// ReduceMI - Attempt to reduce MI, return true on success. - bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, - bool LiveCPSR, bool IsSelfLoop); + bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, bool LiveCPSR, + bool IsSelfLoop, bool SkipPrologueEpilogue); /// ReduceMBB - Reduce width of instructions in the specified basic block. - bool ReduceMBB(MachineBasicBlock &MBB); + bool ReduceMBB(MachineBasicBlock &MBB, bool SkipPrologueEpilogue); bool OptimizeSize; bool MinimizeSize; @@ -620,7 +621,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); - LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI + LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); @@ -668,7 +669,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); - LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI + LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); @@ -848,7 +849,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); - LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI + LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); @@ -971,7 +972,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); - LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI + LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase_instr(MI); @@ -1012,11 +1013,15 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { } bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, - bool LiveCPSR, bool IsSelfLoop) { + bool LiveCPSR, bool IsSelfLoop, + bool SkipPrologueEpilogue) { unsigned Opcode = MI->getOpcode(); DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); if (OPI == ReduceOpcodeMap.end()) return false; + if (SkipPrologueEpilogue && (MI->getFlag(MachineInstr::FrameSetup) || + MI->getFlag(MachineInstr::FrameDestroy))) + return false; const ReduceEntry &Entry = ReduceTable[OPI->second]; // Don't attempt normal reductions on "special" cases for now. @@ -1036,7 +1041,8 @@ bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, return false; } -bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { +bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB, + bool SkipPrologueEpilogue) { bool Modified = false; // Yes, CPSR could be livein. @@ -1080,7 +1086,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Does NextMII belong to the same bundle as MI? bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); - if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) { + if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) { Modified = true; MachineBasicBlock::instr_iterator I = std::prev(NextMII); MI = &*I; @@ -1130,7 +1136,7 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { if (PredicateFtor && !PredicateFtor(MF.getFunction())) return false; - STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget()); + STI = &MF.getSubtarget<ARMSubtarget>(); if (STI->isThumb1Only() || STI->prefers32BitThumb()) return false; @@ -1147,8 +1153,10 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { // predecessors. ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); bool Modified = false; + bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + MF.getFunction().needsUnwindTableEntry(); for (MachineBasicBlock *MBB : RPOT) - Modified |= ReduceMBB(*MBB); + Modified |= ReduceMBB(*MBB, /*SkipPrologueEpilogue=*/NeedsWinCFI); return Modified; } diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp index 5d2bc4ebe191..2a3fa3b31512 100644 --- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -37,7 +37,7 @@ extern cl::opt<bool> ReuseFrameIndexVals; using namespace llvm; -ThumbRegisterInfo::ThumbRegisterInfo() {} +ThumbRegisterInfo::ThumbRegisterInfo() = default; const TargetRegisterClass * ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, @@ -338,7 +338,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, static void removeOperands(MachineInstr &MI, unsigned i) { unsigned Op = i; for (unsigned e = MI.getNumOperands(); i != e; ++i) - MI.RemoveOperand(Op); + MI.removeOperand(Op); } /// convertToNonSPOpcode - Change the opcode to the non-SP version, because @@ -361,6 +361,7 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II, const ARMBaseInstrInfo &TII) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); assert(MBB.getParent()->getSubtarget<ARMSubtarget>().isThumb1Only() && "This isn't needed for thumb2!"); DebugLoc dl = MI.getDebugLoc(); @@ -396,7 +397,18 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II, if ((unsigned)Offset <= Mask * Scale) { // Replace the FrameIndex with the frame register (e.g., sp). - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + Register DestReg = FrameReg; + + // In case FrameReg is a high register, move it to a low reg to ensure it + // can be used as an operand. + if (ARM::hGPRRegClass.contains(FrameReg) && FrameReg != ARM::SP) { + DestReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); + BuildMI(MBB, II, dl, TII.get(ARM::tMOVr), DestReg) + .addReg(FrameReg) + .add(predOps(ARMCC::AL)); + } + + MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false); ImmOp.ChangeToImmediate(ImmedOffset); // If we're using a register where sp was stored, convert the instruction @@ -517,7 +529,16 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset, false, TII, *this); else { emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); - UseRR = true; + if (!ARM::hGPRRegClass.contains(FrameReg)) { + UseRR = true; + } else { + // If FrameReg is a high register, add the reg values in a separate + // instruction as the load won't be able to access it. + BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), TmpReg) + .addReg(TmpReg) + .addReg(FrameReg) + .add(predOps(ARMCC::AL)); + } } } else { emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII, @@ -526,11 +547,14 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi)); MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true); - if (UseRR) + if (UseRR) { + assert(!ARM::hGPRRegClass.contains(FrameReg) && + "Thumb1 loads can't use high register"); // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, false); + } } else if (MI.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); bool UseRR = false; @@ -541,18 +565,30 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset, false, TII, *this); else { emitLoadConstPool(MBB, II, dl, VReg, 0, Offset); - UseRR = true; + if (!ARM::hGPRRegClass.contains(FrameReg)) { + UseRR = true; + } else { + // If FrameReg is a high register, add the reg values in a separate + // instruction as the load won't be able to access it. + BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), VReg) + .addReg(VReg) + .addReg(FrameReg) + .add(predOps(ARMCC::AL)); + } } } else emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII, *this); MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi)); MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true); - if (UseRR) + if (UseRR) { + assert(!ARM::hGPRRegClass.contains(FrameReg) && + "Thumb1 stores can't use high register"); // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, false); + } } else { llvm_unreachable("Unexpected opcode!"); } |