diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
78 files changed, 6133 insertions, 3073 deletions
diff --git a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp index 7a1865ce5fd6..9228cc2d7a9c 100644 --- a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp @@ -68,34 +68,31 @@ namespace { // unsigned createDupLane(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, - DebugLoc DL, - unsigned Reg, unsigned Lane, - bool QPR=false); + const DebugLoc &DL, unsigned Reg, unsigned Lane, + bool QPR = false); unsigned createExtractSubreg(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, - DebugLoc DL, - unsigned DReg, unsigned Lane, - const TargetRegisterClass *TRC); + const DebugLoc &DL, unsigned DReg, + unsigned Lane, const TargetRegisterClass *TRC); unsigned createVExt(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, - DebugLoc DL, - unsigned Ssub0, unsigned Ssub1); + const DebugLoc &DL, unsigned Ssub0, unsigned Ssub1); unsigned createRegSequence(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, - DebugLoc DL, - unsigned Reg1, unsigned Reg2); + const DebugLoc &DL, unsigned Reg1, + unsigned Reg2); unsigned createInsertSubreg(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, - DebugLoc DL, unsigned DReg, unsigned Lane, - unsigned ToInsert); + const DebugLoc &DL, unsigned DReg, + unsigned Lane, unsigned ToInsert); unsigned createImplicitDef(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, - DebugLoc DL); + const DebugLoc &DL); // // Various property checkers @@ -426,11 +423,10 @@ SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) { } // Creates a DPR register from an SPR one by using a VDUP. -unsigned -A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, - DebugLoc DL, - unsigned Reg, unsigned Lane, bool QPR) { +unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + const DebugLoc &DL, unsigned Reg, + unsigned Lane, bool QPR) { unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : &ARM::DPRRegClass); AddDefaultPred(BuildMI(MBB, @@ -445,12 +441,10 @@ A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, } // Creates a SPR register from a DPR by copying the value in lane 0. -unsigned -A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, - DebugLoc DL, - unsigned DReg, unsigned Lane, - const TargetRegisterClass *TRC) { +unsigned A15SDOptimizer::createExtractSubreg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + const DebugLoc &DL, unsigned DReg, unsigned Lane, + const TargetRegisterClass *TRC) { unsigned Out = MRI->createVirtualRegister(TRC); BuildMI(MBB, InsertBefore, @@ -462,11 +456,9 @@ A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB, } // Takes two SPR registers and creates a DPR by using a REG_SEQUENCE. -unsigned -A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, - DebugLoc DL, - unsigned Reg1, unsigned Reg2) { +unsigned A15SDOptimizer::createRegSequence( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + const DebugLoc &DL, unsigned Reg1, unsigned Reg2) { unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass); BuildMI(MBB, InsertBefore, @@ -481,11 +473,10 @@ A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB, // Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1) // and merges them into one DPR register. -unsigned -A15SDOptimizer::createVExt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, - DebugLoc DL, - unsigned Ssub0, unsigned Ssub1) { +unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + const DebugLoc &DL, unsigned Ssub0, + unsigned Ssub1) { unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); AddDefaultPred(BuildMI(MBB, InsertBefore, @@ -497,11 +488,9 @@ A15SDOptimizer::createVExt(MachineBasicBlock &MBB, return Out; } -unsigned -A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, - DebugLoc DL, unsigned DReg, unsigned Lane, - unsigned ToInsert) { +unsigned A15SDOptimizer::createInsertSubreg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + const DebugLoc &DL, unsigned DReg, unsigned Lane, unsigned ToInsert) { unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass); BuildMI(MBB, InsertBefore, @@ -517,7 +506,7 @@ A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB, unsigned A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, - DebugLoc DL) { + const DebugLoc &DL) { unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); BuildMI(MBB, InsertBefore, @@ -681,6 +670,9 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { } bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(*Fn.getFunction())) + return false; + const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be // enabled when NEON is available. @@ -701,7 +693,7 @@ bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end(); MI != ME;) { - Modified |= runOnInstruction(MI++); + Modified |= runOnInstruction(&*MI++); } } diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h index cd7540e52410..690ff86a0c86 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.h +++ b/contrib/llvm/lib/Target/ARM/ARM.h @@ -27,6 +27,7 @@ class FunctionPass; class ImmutablePass; class MachineInstr; class MCInst; +class PassRegistry; class TargetLowering; class TargetMachine; @@ -45,6 +46,9 @@ FunctionPass *createThumb2SizeReductionPass( void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); +void initializeARMLoadStoreOptPass(PassRegistry &); +void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); + } // end namespace llvm; #endif diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index c171656b48ab..ef626b66a1e7 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -76,6 +76,11 @@ def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", "Enable Thumb2 extract and pack instructions">; def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", "Has data barrier (dmb / dsb) instructions">; +def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", + "Has v7 clrex instruction">; +def FeatureAcquireRelease : SubtargetFeature<"acquire-release", + "HasAcquireRelease", "true", + "Has v8 acquire/release (lda/ldaex etc) instructions">; def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", "FP compare + branch is slow">; def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", @@ -84,17 +89,98 @@ def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", "Enable support for Performance Monitor extensions">; def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", "Enable support for TrustZone security extensions">; +def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", + "Enable support for ARMv8-M Security Extensions">; def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", "Enable support for Cryptography extensions", [FeatureNEON]>; def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable support for CRC instructions">; +// Not to be confused with FeatureHasRetAddrStack (return address stack) +def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Enable Reliability, Availability and Serviceability extensions">; + // Cyclone has preferred instructions for zeroing VFP registers, which can // execute in 0 cycles. def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions">; +// Whether or not it may be profitable to unpredicate certain instructions +// during if conversion. +def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", + "IsProfitableToUnpredicate", + "true", + "Is profitable to unpredicate">; + +// Some targets (e.g. Swift) have microcoded VGETLNi32. +def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", + "HasSlowVGETLNi32", "true", + "Has slow VGETLNi32 - prefer VMOV">; + +// Some targets (e.g. Swift) have microcoded VDUP32. +def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", "true", + "Has slow VDUP32 - prefer VMOV">; + +// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON +// for scalar FP, as this allows more effective execution domain optimization. +def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", + "true", "Prefer VMOVSR">; + +// Swift has ISHST barriers compatible with Atomic Release semantics but weaker +// than ISH +def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST", + "true", "Prefer ISHST barriers">; + +// Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU. +def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", "true", + "Has muxed AGU and NEON/FPU">; + +// On some targets, a VLDM/VSTM starting with an odd register number needs more +// microops than single VLDRS. +def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister", + "true", "VLDM/VSTM starting with an odd register is slow">; + +// Some targets have a renaming dependency when loading into D subregisters. +def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", + "SlowLoadDSubregister", "true", + "Loading into D subregs is slow">; +// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. +def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", + "DontWidenVMOVS", "true", + "Don't widen VMOVS to VMOVD">; + +// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. +def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", "ExpandMLx", "true", + "Expand VFP/NEON MLA/MLS instructions">; + +// Some targets have special RAW hazards for VFP/NEON VMLA/VMLS. +def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", + "true", "Has VMLx hazards">; + +// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from +// VFP to NEON, as an execution domain optimization. +def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs", + "true", "Convert VMOVSR, VMOVRS, VMOVS to NEON">; + +// Some processors benefit from using NEON instructions for scalar +// single-precision FP operations. This affects instruction selection and should +// only be enabled if the handling of denormals is not important. +def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", + "true", + "Use NEON for single precision FP">; + +// On some processors, VLDn instructions that access unaligned data take one +// extra cycle. Take that into account when computing operand latencies. +def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign", + "true", + "Check for VLDn unaligned access">; + +// Some processors have a nonpipelined VFP coprocessor. +def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", + "NonpipelinedVFP", "true", + "VFP instructions are not pipelined">; + // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better // to just not use them. @@ -106,12 +192,6 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", "HasVMLxForwarding", "true", "Has multiplier accumulator forwarding">; -// Some processors benefit from using NEON instructions for scalar -// single-precision FP operations. -def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", - "true", - "Use NEON for single precision FP">; - // Disable 32-bit to 16-bit narrowing for experimentation. def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", "Prefer 32-bit Thumb instrs">; @@ -130,7 +210,7 @@ def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", // Some processors perform return stack prediction. CodeGen should avoid issue // "normal" call instructions to callees which do not return. -def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true", +def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", "HasRetAddrStack", "true", "Has return address stack">; /// DSP extension. @@ -200,24 +280,31 @@ def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true", "Support ARM v6M instructions", [HasV6Ops]>; +def HasV8MBaselineOps : SubtargetFeature<"v8m", "HasV8MBaselineOps", "true", + "Support ARM v8M Baseline instructions", + [HasV6MOps]>; def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true", "Support ARM v6k instructions", [HasV6Ops]>; def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", "Support ARM v6t2 instructions", - [HasV6MOps, HasV6KOps, FeatureThumb2]>; + [HasV8MBaselineOps, HasV6KOps, FeatureThumb2]>; def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", "Support ARM v7 instructions", - [HasV6T2Ops, FeaturePerfMon]>; + [HasV6T2Ops, FeaturePerfMon, + FeatureV7Clrex]>; def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", "Support ARM v8 instructions", - [HasV7Ops]>; + [HasV7Ops, FeatureAcquireRelease]>; def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", "Support ARM v8.1a instructions", [HasV8Ops]>; def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", "Support ARM v8.2a instructions", [HasV8_1aOps]>; +def HasV8MMainlineOps : SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true", + "Support ARM v8M Mainline instructions", + [HasV7Ops]>; //===----------------------------------------------------------------------===// @@ -238,6 +325,8 @@ def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", "Cortex-A15 ARM processors", []>; def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17", "Cortex-A17 ARM processors", []>; +def ProcA32 : SubtargetFeature<"a32", "ARMProcFamily", "CortexA32", + "Cortex-A32 ARM processors", []>; def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", "Cortex-A35 ARM processors", []>; def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", @@ -246,6 +335,8 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", "Cortex-A57 ARM processors", []>; def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", "Cortex-A72 ARM processors", []>; +def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", + "Cortex-A73 ARM processors", []>; def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", "Qualcomm ARM processors", []>; @@ -256,12 +347,14 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", "Samsung Exynos-M1 processors", []>; def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4", - "Cortex-R4 ARM processors", []>; + "Cortex-R4 ARM processors", []>; def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", "Cortex-R5 ARM processors", []>; def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7", "Cortex-R7 ARM processors", []>; +def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3", + "Cortex-M3 ARM processors", []>; //===----------------------------------------------------------------------===// // ARM schedules. @@ -374,7 +467,27 @@ def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps, FeatureMP, FeatureVirtualization, FeatureCrypto, - FeatureCRC]>; + FeatureCRC, + FeatureRAS]>; + +def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline", + [HasV8MBaselineOps, + FeatureNoARM, + FeatureDB, + FeatureHWDiv, + FeatureV7Clrex, + Feature8MSecExt, + FeatureAcquireRelease, + FeatureMClass]>; + +def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline", + [HasV8MMainlineOps, + FeatureNoARM, + FeatureDB, + FeatureHWDiv, + Feature8MSecExt, + FeatureAcquireRelease, + FeatureMClass]>; // Aliases def IWMMXT : Architecture<"iwmmxt", "ARMv5te", [ARMv5te]>; @@ -452,7 +565,7 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2, // FIXME: A5 has currently the same Schedule model as A8 def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5, - FeatureHasRAS, + FeatureHasRetAddrStack, FeatureTrustZone, FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, @@ -462,9 +575,10 @@ def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5, FeatureVFP4]>; def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7, - FeatureHasRAS, + FeatureHasRetAddrStack, FeatureTrustZone, FeatureSlowFPBrcc, + FeatureHasVMLxHazards, FeatureHasSlowFPVMLx, FeatureVMLxForwarding, FeatureT2XtPk, @@ -475,25 +589,33 @@ def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7, FeatureVirtualization]>; def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8, - FeatureHasRAS, + FeatureHasRetAddrStack, + FeatureNonpipelinedVFP, FeatureTrustZone, FeatureSlowFPBrcc, + FeatureHasVMLxHazards, FeatureHasSlowFPVMLx, FeatureVMLxForwarding, FeatureT2XtPk]>; def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9, - FeatureHasRAS, + FeatureHasRetAddrStack, FeatureTrustZone, + FeatureHasVMLxHazards, FeatureVMLxForwarding, FeatureT2XtPk, FeatureFP16, FeatureAvoidPartialCPSR, + FeatureExpandMLx, + FeaturePreferVMOVSR, + FeatureMuxedUnits, + FeatureNEONForFPMovs, + FeatureCheckVLDnAlign, FeatureMP]>; // FIXME: A12 has currently the same Schedule model as A9 def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, - FeatureHasRAS, + FeatureHasRetAddrStack, FeatureTrustZone, FeatureVMLxForwarding, FeatureT2XtPk, @@ -506,11 +628,14 @@ def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, // FIXME: A15 has currently the same Schedule model as A9. def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, - FeatureHasRAS, + FeatureDontWidenVMOVS, + FeatureHasRetAddrStack, + FeatureMuxedUnits, FeatureTrustZone, FeatureT2XtPk, FeatureVFP4, FeatureMP, + FeatureCheckVLDnAlign, FeatureHWDiv, FeatureHWDivARM, FeatureAvoidPartialCPSR, @@ -518,7 +643,7 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, // FIXME: A17 has currently the same Schedule model as A9 def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, - FeatureHasRAS, + FeatureHasRetAddrStack, FeatureTrustZone, FeatureMP, FeatureVMLxForwarding, @@ -533,7 +658,9 @@ def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, // FIXME: krait has currently the same features as A9 plus VFP4 and hardware // division features. def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, - FeatureHasRAS, + FeatureHasRetAddrStack, + FeatureMuxedUnits, + FeatureCheckVLDnAlign, FeatureVMLxForwarding, FeatureT2XtPk, FeatureFP16, @@ -543,7 +670,7 @@ def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, FeatureHWDivARM]>; def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, - FeatureHasRAS, + FeatureHasRetAddrStack, FeatureNEONForFP, FeatureT2XtPk, FeatureVFP4, @@ -552,17 +679,24 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, - FeatureHasSlowFPVMLx]>; + FeatureHasSlowFPVMLx, + FeatureHasVMLxHazards, + FeatureProfUnpredicate, + FeaturePrefISHSTBarrier, + FeatureSlowOddRegister, + FeatureSlowLoadDSubreg, + FeatureSlowVGETLNi32, + FeatureSlowVDUP32]>; // FIXME: R4 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, - FeatureHasRAS, + FeatureHasRetAddrStack, FeatureAvoidPartialCPSR, FeatureT2XtPk]>; // FIXME: R4F has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, - FeatureHasRAS, + FeatureHasRetAddrStack, FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, FeatureVFP3, @@ -572,7 +706,7 @@ def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, // FIXME: R5 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, - FeatureHasRAS, + FeatureHasRetAddrStack, FeatureVFP3, FeatureD16, FeatureSlowFPBrcc, @@ -583,9 +717,20 @@ def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, // FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5. def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, - FeatureHasRAS, + FeatureHasRetAddrStack, + FeatureVFP3, + FeatureD16, + FeatureFP16, + FeatureMP, + FeatureSlowFPBrcc, + FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureAvoidPartialCPSR, + FeatureT2XtPk]>; + +def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, + FeatureHasRetAddrStack, FeatureVFP3, - FeatureVFPOnlySP, FeatureD16, FeatureFP16, FeatureMP, @@ -595,8 +740,8 @@ def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, FeatureAvoidPartialCPSR, FeatureT2XtPk]>; -def : ProcNoItin<"cortex-m3", [ARMv7m]>; -def : ProcNoItin<"sc300", [ARMv7m]>; +def : ProcNoItin<"cortex-m3", [ARMv7m, ProcM3]>; +def : ProcNoItin<"sc300", [ARMv7m, ProcM3]>; def : ProcNoItin<"cortex-m4", [ARMv7em, FeatureVFP4, @@ -607,6 +752,12 @@ def : ProcNoItin<"cortex-m7", [ARMv7em, FeatureFPARMv8, FeatureD16]>; +def : ProcNoItin<"cortex-a32", [ARMv8a, + FeatureHWDiv, + FeatureHWDivARM, + FeatureT2XtPk, + FeatureCrypto, + FeatureCRC]>; def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35, FeatureHWDiv, @@ -636,9 +787,16 @@ def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, FeatureCrypto, FeatureCRC]>; +def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, + FeatureHWDiv, + FeatureHWDivARM, + FeatureT2XtPk, + FeatureCrypto, + FeatureCRC]>; + // Cyclone is very similar to swift def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, - FeatureHasRAS, + FeatureHasRetAddrStack, FeatureNEONForFP, FeatureT2XtPk, FeatureVFP4, diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 206db9619a2f..04863a7ecf8f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -43,12 +43,11 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" -#include "llvm/Support/TargetParser.h" #include "llvm/Support/COFF.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -213,8 +212,6 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, GetARMGVSymbol(GV, TF)->print(O, MAI); printOffset(MO.getOffset(), O); - if (TF == ARMII::MO_PLT) - O << "(PLT)"; break; } case MachineOperand::MO_ConstantPoolIndex: @@ -516,9 +513,10 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer->AddBlankLine(); } - Stubs = MMIMacho.GetHiddenGVStubList(); + Stubs = MMIMacho.GetThreadLocalGVStubList(); if (!Stubs.empty()) { - OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); + // Switch with ".non_lazy_symbol_pointer" directive. + OutStreamer->SwitchSection(TLOFMacho.getThreadLocalPointerSection()); EmitAlignment(2); for (auto &Stub : Stubs) @@ -536,18 +534,48 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } + if (TT.isOSBinFormatCOFF()) { + const auto &TLOF = + static_cast<const TargetLoweringObjectFileCOFF &>(getObjFileLowering()); + + std::string Flags; + raw_string_ostream OS(Flags); + + for (const auto &Function : M) + TLOF.emitLinkerFlagsForGlobal(OS, &Function, *Mang); + for (const auto &Global : M.globals()) + TLOF.emitLinkerFlagsForGlobal(OS, &Global, *Mang); + for (const auto &Alias : M.aliases()) + TLOF.emitLinkerFlagsForGlobal(OS, &Alias, *Mang); + + OS.flush(); + + // Output collected flags + if (!Flags.empty()) { + OutStreamer->SwitchSection(TLOF.getDrectveSection()); + OutStreamer->EmitBytes(Flags); + } + } + // The last attribute to be emitted is ABI_optimization_goals MCTargetStreamer &TS = *OutStreamer->getTargetStreamer(); ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); if (OptimizationGoals > 0 && - (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI())) + (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || + Subtarget->isTargetMuslAEABI())) ATS.emitAttribute(ARMBuildAttrs::ABI_optimization_goals, OptimizationGoals); OptimizationGoals = -1; ATS.finishAttributeSection(); } +static bool isV8M(const ARMSubtarget *Subtarget) { + // Note that v8M Baseline is a subset of v6T2! + return (Subtarget->hasV8MBaselineOps() && !Subtarget->hasV6T2Ops()) || + Subtarget->hasV8MMainlineOps(); +} + //===----------------------------------------------------------------------===// // Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile() // FIXME: @@ -561,13 +589,17 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU, return ARMBuildAttrs::v5TEJ; if (Subtarget->hasV8Ops()) - return ARMBuildAttrs::v8; + return ARMBuildAttrs::v8_A; + else if (Subtarget->hasV8MMainlineOps()) + return ARMBuildAttrs::v8_M_Main; else if (Subtarget->hasV7Ops()) { if (Subtarget->isMClass() && Subtarget->hasDSP()) return ARMBuildAttrs::v7E_M; return ARMBuildAttrs::v7; } else if (Subtarget->hasV6T2Ops()) return ARMBuildAttrs::v6T2; + else if (Subtarget->hasV8MBaselineOps()) + return ARMBuildAttrs::v8_M_Base; else if (Subtarget->hasV6MOps()) return ARMBuildAttrs::v6S_M; else if (Subtarget->hasV6Ops()) @@ -609,9 +641,9 @@ void ARMAsmPrinter::emitAttributes() { static_cast<const ARMBaseTargetMachine &>(TM); const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian()); - std::string CPUString = STI.getCPUString(); + const std::string &CPUString = STI.getCPUString(); - if (CPUString.find("generic") != 0) { //CPUString doesn't start with "generic" + if (!StringRef(CPUString).startswith("generic")) { // FIXME: remove krait check when GNU tools support krait cpu if (STI.isKrait()) { ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); @@ -627,7 +659,7 @@ void ARMAsmPrinter::emitAttributes() { // Tag_CPU_arch_profile must have the default value of 0 when "Architecture // profile is not applicable (e.g. pre v7, or cross-profile code)". - if (STI.hasV7Ops()) { + if (STI.hasV7Ops() || isV8M(&STI)) { if (STI.isAClass()) { ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, ARMBuildAttrs::ApplicationProfile); @@ -643,7 +675,10 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, STI.hasARMOps() ? ARMBuildAttrs::Allowed : ARMBuildAttrs::Not_Allowed); - if (STI.isThumb1Only()) { + if (isV8M(&STI)) { + ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumbDerived); + } else if (STI.isThumb1Only()) { ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); } else if (STI.hasThumb2()) { ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, @@ -690,7 +725,7 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitFPU(ARM::FK_VFPV2); } - if (TM.getRelocationModel() == Reloc::PIC_) { + if (isPositionIndependent()) { // PIC specific attributes. ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data, ARMBuildAttrs::AddressRWPCRel); @@ -794,6 +829,9 @@ void ARMAsmPrinter::emitAttributes() { if (STI.hasDivideInARMMode() && !STI.hasV8Ops()) ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); + if (STI.hasDSP() && isV8M(&STI)) + ATS.emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed); + if (MMI) { if (const Module *SourceModule = MMI->getModule()) { // ABI_PCS_wchar_t to indicate wchar_t width @@ -853,11 +891,18 @@ static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber, static MCSymbolRefExpr::VariantKind getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { switch (Modifier) { - case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None; - case ARMCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD; - case ARMCP::TPOFF: return MCSymbolRefExpr::VK_TPOFF; - case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_GOTTPOFF; - case ARMCP::GOT_PREL: return MCSymbolRefExpr::VK_ARM_GOT_PREL; + case ARMCP::no_modifier: + return MCSymbolRefExpr::VK_None; + case ARMCP::TLSGD: + return MCSymbolRefExpr::VK_TLSGD; + case ARMCP::TPOFF: + return MCSymbolRefExpr::VK_TPOFF; + case ARMCP::GOTTPOFF: + return MCSymbolRefExpr::VK_GOTTPOFF; + case ARMCP::GOT_PREL: + return MCSymbolRefExpr::VK_ARM_GOT_PREL; + case ARMCP::SECREL: + return MCSymbolRefExpr::VK_SECREL; } llvm_unreachable("Invalid ARMCPModifier!"); } @@ -865,8 +910,8 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, unsigned char TargetFlags) { if (Subtarget->isTargetMachO()) { - bool IsIndirect = (TargetFlags & ARMII::MO_NONLAZY) && - Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); + bool IsIndirect = + (TargetFlags & ARMII::MO_NONLAZY) && Subtarget->isGVIndirectSymbol(GV); if (!IsIndirect) return getSymbol(GV); @@ -876,8 +921,9 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, MachineModuleInfoMachO &MMIMachO = MMI->getObjFileInfo<MachineModuleInfoMachO>(); MachineModuleInfoImpl::StubValueTy &StubSym = - GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) - : MMIMachO.getGVStubEntry(MCSym); + GV->isThreadLocal() ? MMIMachO.getThreadLocalGVStubEntry(MCSym) + : MMIMachO.getGVStubEntry(MCSym); + if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); @@ -991,7 +1037,7 @@ void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) { // .word (LBB1 - LJTI_0_0) const MCExpr *Expr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); - if (TM.getRelocationModel() == Reloc::PIC_) + if (isPositionIndependent()) Expr = MCBinaryExpr::createSub(Expr, MCSymbolRefExpr::create(JTISymbol, OutContext), OutContext); @@ -1227,6 +1273,8 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { const DataLayout &DL = getDataLayout(); + MCTargetStreamer &TS = *OutStreamer->getTargetStreamer(); + ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); // If we just ended a constant pool, mark it as such. if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) { @@ -1643,29 +1691,26 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Non-Darwin binutils don't yet support the "trap" mnemonic. // FIXME: Remove this special case when they do. if (!Subtarget->isTargetMachO()) { - //.long 0xe7ffdefe @ trap uint32_t Val = 0xe7ffdefeUL; OutStreamer->AddComment("trap"); - OutStreamer->EmitIntValue(Val, 4); + ATS.emitInst(Val); return; } break; } case ARM::TRAPNaCl: { - //.long 0xe7fedef0 @ trap uint32_t Val = 0xe7fedef0UL; OutStreamer->AddComment("trap"); - OutStreamer->EmitIntValue(Val, 4); + ATS.emitInst(Val); return; } case ARM::tTRAP: { // Non-Darwin binutils don't yet support the "trap" mnemonic. // FIXME: Remove this special case when they do. if (!Subtarget->isTargetMachO()) { - //.short 57086 @ trap uint16_t Val = 0xdefe; OutStreamer->AddComment("trap"); - OutStreamer->EmitIntValue(Val, 2); + ATS.emitInst(Val, 'n'); return; } break; @@ -1845,6 +1890,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // bx $scratch unsigned SrcReg = MI->getOperand(0).getReg(); unsigned ScratchReg = MI->getOperand(1).getReg(); + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi) .addReg(ScratchReg) .addReg(SrcReg) @@ -1885,6 +1931,36 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addReg(0)); return; } + case ARM::tInt_WIN_eh_sjlj_longjmp: { + // ldr.w r11, [$src, #0] + // ldr.w sp, [$src, #8] + // ldr.w pc, [$src, #4] + + unsigned SrcReg = MI->getOperand(0).getReg(); + + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2LDRi12) + .addReg(ARM::R11) + .addReg(SrcReg) + .addImm(0) + // Predicate + .addImm(ARMCC::AL) + .addReg(0)); + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2LDRi12) + .addReg(ARM::SP) + .addReg(SrcReg) + .addImm(8) + // Predicate + .addImm(ARMCC::AL) + .addReg(0)); + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2LDRi12) + .addReg(ARM::PC) + .addReg(SrcReg) + .addImm(4) + // Predicate + .addImm(ARMCC::AL) + .addReg(0)); + return; + } } MCInst TmpInst; diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h index ed7be2de51ca..97f5ca0ecbc2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h @@ -95,6 +95,7 @@ public: bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); private: + // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile() void emitAttributes(); diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 49f328852667..693f16499717 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -51,15 +51,6 @@ static cl::opt<bool> EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); -static cl::opt<bool> -WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), - cl::desc("Widen ARM vmovs to vmovd when possible")); - -static cl::opt<unsigned> -SwiftPartialUpdateClearance("swift-partial-update-clearance", - cl::Hidden, cl::init(12), - cl::desc("Clearance before partial register updates")); - /// ARM_MLxEntry - Record information about MLA / MLS instructions. struct ARM_MLxEntry { uint16_t MLxOpc; // MLA / MLS opcode @@ -124,18 +115,15 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } -MachineInstr * -ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, - MachineBasicBlock::iterator &MBBI, - LiveVariables *LV) const { +MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( + MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { // FIXME: Thumb2 support. if (!EnableARM3Addr) return nullptr; - MachineInstr *MI = MBBI; - MachineFunction &MF = *MI->getParent()->getParent(); - uint64_t TSFlags = MI->getDesc().TSFlags; + MachineFunction &MF = *MI.getParent()->getParent(); + uint64_t TSFlags = MI.getDesc().TSFlags; bool isPre = false; switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { default: return nullptr; @@ -148,24 +136,24 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // Try splitting an indexed load/store to an un-indexed one plus an add/sub // operation. - unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); + unsigned MemOpc = getUnindexedOpcode(MI.getOpcode()); if (MemOpc == 0) return nullptr; MachineInstr *UpdateMI = nullptr; MachineInstr *MemMI = nullptr; unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); - const MCInstrDesc &MCID = MI->getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); unsigned NumOps = MCID.getNumOperands(); - bool isLoad = !MI->mayStore(); - const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); - const MachineOperand &Base = MI->getOperand(2); - const MachineOperand &Offset = MI->getOperand(NumOps-3); + bool isLoad = !MI.mayStore(); + const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0); + const MachineOperand &Base = MI.getOperand(2); + const MachineOperand &Offset = MI.getOperand(NumOps - 3); unsigned WBReg = WB.getReg(); unsigned BaseReg = Base.getReg(); unsigned OffReg = Offset.getReg(); - unsigned OffImm = MI->getOperand(NumOps-2).getImm(); - ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); + unsigned OffImm = MI.getOperand(NumOps - 2).getImm(); + ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm(); switch (AddrMode) { default: llvm_unreachable("Unknown indexed op!"); case ARMII::AddrMode2: { @@ -176,22 +164,33 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // Can't encode it in a so_imm operand. This transformation will // add more than 1 instruction. Abandon! return nullptr; - UpdateMI = BuildMI(MF, MI->getDebugLoc(), + UpdateMI = BuildMI(MF, MI.getDebugLoc(), get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) - .addReg(BaseReg).addImm(Amt) - .addImm(Pred).addReg(0).addReg(0); + .addReg(BaseReg) + .addImm(Amt) + .addImm(Pred) + .addReg(0) + .addReg(0); } else if (Amt != 0) { ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); - UpdateMI = BuildMI(MF, MI->getDebugLoc(), + UpdateMI = BuildMI(MF, MI.getDebugLoc(), get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) - .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) - .addImm(Pred).addReg(0).addReg(0); + .addReg(BaseReg) + .addReg(OffReg) + .addReg(0) + .addImm(SOOpc) + .addImm(Pred) + .addReg(0) + .addReg(0); } else - UpdateMI = BuildMI(MF, MI->getDebugLoc(), + UpdateMI = BuildMI(MF, MI.getDebugLoc(), get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) - .addReg(BaseReg).addReg(OffReg) - .addImm(Pred).addReg(0).addReg(0); + .addReg(BaseReg) + .addReg(OffReg) + .addImm(Pred) + .addReg(0) + .addReg(0); break; } case ARMII::AddrMode3 : { @@ -199,15 +198,21 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Amt = ARM_AM::getAM3Offset(OffImm); if (OffReg == 0) // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. - UpdateMI = BuildMI(MF, MI->getDebugLoc(), + UpdateMI = BuildMI(MF, MI.getDebugLoc(), get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) - .addReg(BaseReg).addImm(Amt) - .addImm(Pred).addReg(0).addReg(0); + .addReg(BaseReg) + .addImm(Amt) + .addImm(Pred) + .addReg(0) + .addReg(0); else - UpdateMI = BuildMI(MF, MI->getDebugLoc(), + UpdateMI = BuildMI(MF, MI.getDebugLoc(), get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) - .addReg(BaseReg).addReg(OffReg) - .addImm(Pred).addReg(0).addReg(0); + .addReg(BaseReg) + .addReg(OffReg) + .addImm(Pred) + .addReg(0) + .addReg(0); break; } } @@ -215,24 +220,34 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, std::vector<MachineInstr*> NewMIs; if (isPre) { if (isLoad) - MemMI = BuildMI(MF, MI->getDebugLoc(), - get(MemOpc), MI->getOperand(0).getReg()) - .addReg(WBReg).addImm(0).addImm(Pred); + MemMI = + BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg()) + .addReg(WBReg) + .addImm(0) + .addImm(Pred); else - MemMI = BuildMI(MF, MI->getDebugLoc(), - get(MemOpc)).addReg(MI->getOperand(1).getReg()) - .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); + MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc)) + .addReg(MI.getOperand(1).getReg()) + .addReg(WBReg) + .addReg(0) + .addImm(0) + .addImm(Pred); NewMIs.push_back(MemMI); NewMIs.push_back(UpdateMI); } else { if (isLoad) - MemMI = BuildMI(MF, MI->getDebugLoc(), - get(MemOpc), MI->getOperand(0).getReg()) - .addReg(BaseReg).addImm(0).addImm(Pred); + MemMI = + BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg()) + .addReg(BaseReg) + .addImm(0) + .addImm(Pred); else - MemMI = BuildMI(MF, MI->getDebugLoc(), - get(MemOpc)).addReg(MI->getOperand(1).getReg()) - .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); + MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc)) + .addReg(MI.getOperand(1).getReg()) + .addReg(BaseReg) + .addReg(0) + .addImm(0) + .addImm(Pred); if (WB.isDead()) UpdateMI->getOperand(0).setIsDead(); NewMIs.push_back(UpdateMI); @@ -241,8 +256,8 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // Transfer LiveVariables states, kill / dead info. if (LV) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { unsigned Reg = MO.getReg(); @@ -250,7 +265,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (MO.isDef()) { MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; if (MO.isDead()) - LV->addVirtualRegisterDead(Reg, NewMI); + LV->addVirtualRegisterDead(Reg, *NewMI); } if (MO.isUse() && MO.isKill()) { for (unsigned j = 0; j < 2; ++j) { @@ -258,7 +273,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr *NewMI = NewMIs[j]; if (!NewMI->readsRegister(Reg)) continue; - LV->addVirtualRegisterKilled(Reg, NewMI); + LV->addVirtualRegisterKilled(Reg, *NewMI); if (VI.removeKill(MI)) VI.Kills.push_back(NewMI); break; @@ -268,17 +283,18 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } } + MachineBasicBlock::iterator MBBI = MI.getIterator(); MFI->insert(MBBI, NewMIs[1]); MFI->insert(MBBI, NewMIs[0]); return NewMIs[0]; } // Branch analysis. -bool -ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const { +bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { TBB = nullptr; FBB = nullptr; @@ -289,7 +305,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, // Walk backwards from the end of the basic block until the branch is // analyzed or we give up. - while (isPredicated(I) || I->isTerminator() || I->isDebugValue()) { + while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) { // Flag to be raised on unanalyzeable instructions. This is useful in cases // where we want to clean up on the end of the basic block before we bail @@ -322,7 +338,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, Cond.push_back(I->getOperand(2)); } else if (I->isReturn()) { // Returns can't be analyzed, but we should run cleanup. - CantAnalyze = !isPredicated(I); + CantAnalyze = !isPredicated(*I); } else { // We encountered other unrecognized terminator. Bail out immediately. return true; @@ -330,7 +346,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, // Cleanup code - to be run for unpredicated unconditional branches and // returns. - if (!isPredicated(I) && + if (!isPredicated(*I) && (isUncondBranchOpcode(I->getOpcode()) || isIndirectBranchOpcode(I->getOpcode()) || isJumpTableBranchOpcode(I->getOpcode()) || @@ -344,9 +360,9 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, if (AllowModify) { MachineBasicBlock::iterator DI = std::next(I); while (DI != MBB.end()) { - MachineInstr *InstToDelete = DI; + MachineInstr &InstToDelete = *DI; ++DI; - InstToDelete->eraseFromParent(); + InstToDelete.eraseFromParent(); } } } @@ -390,11 +406,11 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -unsigned -ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - ArrayRef<MachineOperand> Cond, - DebugLoc DL) const { +unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef<MachineOperand> Cond, + const DebugLoc &DL) const { ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); int BOpc = !AFI->isThumbFunction() ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); @@ -438,10 +454,10 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { return false; } -bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { - if (MI->isBundle()) { - MachineBasicBlock::const_instr_iterator I = MI->getIterator(); - MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); +bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { + if (MI.isBundle()) { + MachineBasicBlock::const_instr_iterator I = MI.getIterator(); + MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); while (++I != E && I->isInsideBundle()) { int PIdx = I->findFirstPredOperandIdx(); if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL) @@ -450,26 +466,26 @@ bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { return false; } - int PIdx = MI->findFirstPredOperandIdx(); - return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; + int PIdx = MI.findFirstPredOperandIdx(); + return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL; } -bool ARMBaseInstrInfo:: -PredicateInstruction(MachineInstr *MI, ArrayRef<MachineOperand> Pred) const { - unsigned Opc = MI->getOpcode(); +bool ARMBaseInstrInfo::PredicateInstruction( + MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { + unsigned Opc = MI.getOpcode(); if (isUncondBranchOpcode(Opc)) { - MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); - MachineInstrBuilder(*MI->getParent()->getParent(), MI) + MI.setDesc(get(getMatchingCondBranchOpcode(Opc))); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) .addImm(Pred[0].getImm()) .addReg(Pred[1].getReg()); return true; } - int PIdx = MI->findFirstPredOperandIdx(); + int PIdx = MI.findFirstPredOperandIdx(); if (PIdx != -1) { - MachineOperand &PMO = MI->getOperand(PIdx); + MachineOperand &PMO = MI.getOperand(PIdx); PMO.setImm(Pred[0].getImm()); - MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); + MI.getOperand(PIdx+1).setReg(Pred[1].getReg()); return true; } return false; @@ -501,11 +517,11 @@ bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, } } -bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, - std::vector<MachineOperand> &Pred) const { +bool ARMBaseInstrInfo::DefinesPredicate( + MachineInstr &MI, std::vector<MachineOperand> &Pred) const { bool Found = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) || (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) { Pred.push_back(MO); @@ -555,21 +571,21 @@ static bool isEligibleForITBlock(const MachineInstr *MI) { /// isPredicable - Return true if the specified instruction can be predicated. /// By default, this returns true for every instruction with a /// PredicateOperand. -bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { - if (!MI->isPredicable()) +bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const { + if (!MI.isPredicable()) return false; - if (!isEligibleForITBlock(MI)) + if (!isEligibleForITBlock(&MI)) return false; ARMFunctionInfo *AFI = - MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); + MI.getParent()->getParent()->getInfo<ARMFunctionInfo>(); if (AFI->isThumb2Function()) { if (getSubtarget().restrictIT()) - return isV8EligibleForIT(MI); + return isV8EligibleForIT(&MI); } else { // non-Thumb - if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) + if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) return false; } @@ -594,19 +610,19 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) { /// GetInstSize - Return the size of the specified MachineInstr. /// -unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - const MachineBasicBlock &MBB = *MI->getParent(); +unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const { + const MachineBasicBlock &MBB = *MI.getParent(); const MachineFunction *MF = MBB.getParent(); const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); - const MCInstrDesc &MCID = MI->getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); if (MCID.getSize()) return MCID.getSize(); // If this machine instr is an inline asm, measure it. - if (MI->getOpcode() == ARM::INLINEASM) - return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); - unsigned Opc = MI->getOpcode(); + if (MI.getOpcode() == ARM::INLINEASM) + return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); + unsigned Opc = MI.getOpcode(); switch (Opc) { default: // pseudo-instruction sizes are zero. @@ -628,11 +644,13 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::JUMPTABLE_TBH: // If this machine instr is a constant pool entry, its size is recorded as // operand #2. - return MI->getOperand(2).getImm(); + return MI.getOperand(2).getImm(); case ARM::Int_eh_sjlj_longjmp: return 16; case ARM::tInt_eh_sjlj_longjmp: return 10; + case ARM::tInt_WIN_eh_sjlj_longjmp: + return 12; case ARM::Int_eh_sjlj_setjmp: case ARM::Int_eh_sjlj_setjmp_nofp: return 20; @@ -641,17 +659,17 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::t2Int_eh_sjlj_setjmp_nofp: return 12; case ARM::SPACE: - return MI->getOperand(1).getImm(); + return MI.getOperand(1).getImm(); } } -unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { +unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const { unsigned Size = 0; - MachineBasicBlock::const_instr_iterator I = MI->getIterator(); - MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + MachineBasicBlock::const_instr_iterator I = MI.getIterator(); + MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); while (++I != E && I->isInsideBundle()) { assert(!I->isBundle() && "No nested bundle!"); - Size += GetInstSizeInBytes(&*I); + Size += GetInstSizeInBytes(*I); } return Size; } @@ -700,9 +718,9 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, } void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { bool GPRDest = ARM::GPRRegClass.contains(DestReg); bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); @@ -976,20 +994,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } } -unsigned -ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { +unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + switch (MI.getOpcode()) { default: break; case ARM::STRrs: case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isReg() && - MI->getOperand(3).isImm() && - MI->getOperand(2).getReg() == 0 && - MI->getOperand(3).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); + if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() && + MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 && + MI.getOperand(3).getImm() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); } break; case ARM::STRi12: @@ -997,27 +1012,24 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, case ARM::tSTRspi: case ARM::VSTRD: case ARM::VSTRS: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); + if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && + MI.getOperand(2).getImm() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); } break; case ARM::VST1q64: case ARM::VST1d64TPseudo: case ARM::VST1d64QPseudo: - if (MI->getOperand(0).isFI() && - MI->getOperand(2).getSubReg() == 0) { - FrameIndex = MI->getOperand(0).getIndex(); - return MI->getOperand(2).getReg(); + if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) { + FrameIndex = MI.getOperand(0).getIndex(); + return MI.getOperand(2).getReg(); } break; case ARM::VSTMQIA: - if (MI->getOperand(1).isFI() && - MI->getOperand(0).getSubReg() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); + if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); } break; } @@ -1025,10 +1037,10 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } -unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, +unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const { const MachineMemOperand *Dummy; - return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); + return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); } void ARMBaseInstrInfo:: @@ -1164,20 +1176,17 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } } -unsigned -ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { +unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + switch (MI.getOpcode()) { default: break; case ARM::LDRrs: case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isReg() && - MI->getOperand(3).isImm() && - MI->getOperand(2).getReg() == 0 && - MI->getOperand(3).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); + if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() && + MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 && + MI.getOperand(3).getImm() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); } break; case ARM::LDRi12: @@ -1185,27 +1194,24 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, case ARM::tLDRspi: case ARM::VLDRD: case ARM::VLDRS: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); + if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && + MI.getOperand(2).getImm() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); } break; case ARM::VLD1q64: case ARM::VLD1d64TPseudo: case ARM::VLD1d64QPseudo: - if (MI->getOperand(1).isFI() && - MI->getOperand(0).getSubReg() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); + if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); } break; case ARM::VLDMQIA: - if (MI->getOperand(1).isFI() && - MI->getOperand(0).getSubReg() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); + if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); } break; } @@ -1213,20 +1219,19 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return 0; } -unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, - int &FrameIndex) const { +unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, + int &FrameIndex) const { const MachineMemOperand *Dummy; - return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); + return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); } /// \brief Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD /// depending on whether the result is used. -void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MBBI) const { +void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { bool isThumb1 = Subtarget.isThumb1Only(); bool isThumb2 = Subtarget.isThumb2(); const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo(); - MachineInstr *MI = MBBI; DebugLoc dl = MI->getDebugLoc(); MachineBasicBlock *BB = MI->getParent(); @@ -1269,24 +1274,20 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MBBI) const { STM.addReg(Reg, RegState::Kill); } - BB->erase(MBBI); + BB->erase(MI); } -bool -ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { - MachineFunction &MF = *MI->getParent()->getParent(); - Reloc::Model RM = MF.getTarget().getRelocationModel(); - - if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { +bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() && "LOAD_STACK_GUARD currently supported only for MachO."); - expandLoadStackGuard(MI, RM); - MI->getParent()->erase(MI); + expandLoadStackGuard(MI); + MI.getParent()->erase(MI); return true; } - if (MI->getOpcode() == ARM::MEMCPY) { + if (MI.getOpcode() == ARM::MEMCPY) { expandMEMCPY(MI); return true; } @@ -1295,14 +1296,13 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be // changed into a VORR that can go down the NEON pipeline. - if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() || - Subtarget.isFPOnlySP()) + if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP()) return false; // Look for a copy between even S-registers. That is where we keep floats // when using NEON v2f32 instructions for f32 arithmetic. - unsigned DstRegS = MI->getOperand(0).getReg(); - unsigned SrcRegS = MI->getOperand(1).getReg(); + unsigned DstRegS = MI.getOperand(0).getReg(); + unsigned SrcRegS = MI.getOperand(1).getReg(); if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) return false; @@ -1317,44 +1317,44 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only // legal if the COPY already defines the full DstRegD, and it isn't a // sub-register insertion. - if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI)) + if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI)) return false; // A dead copy shouldn't show up here, but reject it just in case. - if (MI->getOperand(0).isDead()) + if (MI.getOperand(0).isDead()) return false; // All clear, widen the COPY. - DEBUG(dbgs() << "widening: " << *MI); - MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); + DEBUG(dbgs() << "widening: " << MI); + MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg // or some other super-register. - int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD); + int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD); if (ImpDefIdx != -1) - MI->RemoveOperand(ImpDefIdx); + MI.RemoveOperand(ImpDefIdx); // Change the opcode and operands. - MI->setDesc(get(ARM::VMOVD)); - MI->getOperand(0).setReg(DstRegD); - MI->getOperand(1).setReg(SrcRegD); + MI.setDesc(get(ARM::VMOVD)); + MI.getOperand(0).setReg(DstRegD); + MI.getOperand(1).setReg(SrcRegD); AddDefaultPred(MIB); // We are now reading SrcRegD instead of SrcRegS. This may upset the // register scavenger and machine verifier, so we need to indicate that we // are reading an undefined value from SrcRegD, but a proper value from // SrcRegS. - MI->getOperand(1).setIsUndef(); + MI.getOperand(1).setIsUndef(); MIB.addReg(SrcRegS, RegState::Implicit); // SrcRegD may actually contain an unrelated value in the ssub_1 // sub-register. Don't kill it. Only kill the ssub_0 sub-register. - if (MI->getOperand(1).isKill()) { - MI->getOperand(1).setIsKill(false); - MI->addRegisterKilled(SrcRegS, TRI, true); + if (MI.getOperand(1).isKill()) { + MI.getOperand(1).setIsKill(false); + MI.addRegisterKilled(SrcRegS, TRI, true); } - DEBUG(dbgs() << "replaced by: " << *MI); + DEBUG(dbgs() << "replaced by: " << MI); return true; } @@ -1403,54 +1403,54 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { return PCLabelId; } -void ARMBaseInstrInfo:: -reMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig, - const TargetRegisterInfo &TRI) const { - unsigned Opcode = Orig->getOpcode(); +void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SubIdx, + const MachineInstr &Orig, + const TargetRegisterInfo &TRI) const { + unsigned Opcode = Orig.getOpcode(); switch (Opcode) { default: { - MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); + MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI); MBB.insert(I, MI); break; } case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { MachineFunction &MF = *MBB.getParent(); - unsigned CPI = Orig->getOperand(1).getIndex(); + unsigned CPI = Orig.getOperand(1).getIndex(); unsigned PCLabelId = duplicateCPV(MF, CPI); - MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), - DestReg) - .addConstantPoolIndex(CPI).addImm(PCLabelId); - MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); + MachineInstrBuilder MIB = + BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg) + .addConstantPoolIndex(CPI) + .addImm(PCLabelId); + MIB->setMemRefs(Orig.memoperands_begin(), Orig.memoperands_end()); break; } } } -MachineInstr * -ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { +MachineInstr *ARMBaseInstrInfo::duplicate(MachineInstr &Orig, + MachineFunction &MF) const { MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF); - switch(Orig->getOpcode()) { + switch (Orig.getOpcode()) { case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { - unsigned CPI = Orig->getOperand(1).getIndex(); + unsigned CPI = Orig.getOperand(1).getIndex(); unsigned PCLabelId = duplicateCPV(MF, CPI); - Orig->getOperand(1).setIndex(CPI); - Orig->getOperand(2).setImm(PCLabelId); + Orig.getOperand(1).setIndex(CPI); + Orig.getOperand(2).setImm(PCLabelId); break; } } return MI; } -bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1, +bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, + const MachineInstr &MI1, const MachineRegisterInfo *MRI) const { - unsigned Opcode = MI0->getOpcode(); + unsigned Opcode = MI0.getOpcode(); if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci || @@ -1461,13 +1461,13 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || Opcode == ARM::t2MOV_ga_pcrel) { - if (MI1->getOpcode() != Opcode) + if (MI1.getOpcode() != Opcode) return false; - if (MI0->getNumOperands() != MI1->getNumOperands()) + if (MI0.getNumOperands() != MI1.getNumOperands()) return false; - const MachineOperand &MO0 = MI0->getOperand(1); - const MachineOperand &MO1 = MI1->getOperand(1); + const MachineOperand &MO0 = MI0.getOperand(1); + const MachineOperand &MO1 = MI1.getOperand(1); if (MO0.getOffset() != MO1.getOffset()) return false; @@ -1480,7 +1480,7 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, // Ignore the PC labels. return MO0.getGlobal() == MO1.getGlobal(); - const MachineFunction *MF = MI0->getParent()->getParent(); + const MachineFunction *MF = MI0.getParent()->getParent(); const MachineConstantPool *MCP = MF->getConstantPool(); int CPI0 = MO0.getIndex(); int CPI1 = MO1.getIndex(); @@ -1499,13 +1499,13 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, } return false; } else if (Opcode == ARM::PICLDR) { - if (MI1->getOpcode() != Opcode) + if (MI1.getOpcode() != Opcode) return false; - if (MI0->getNumOperands() != MI1->getNumOperands()) + if (MI0.getNumOperands() != MI1.getNumOperands()) return false; - unsigned Addr0 = MI0->getOperand(1).getReg(); - unsigned Addr1 = MI1->getOperand(1).getReg(); + unsigned Addr0 = MI0.getOperand(1).getReg(); + unsigned Addr1 = MI1.getOperand(1).getReg(); if (Addr0 != Addr1) { if (!MRI || !TargetRegisterInfo::isVirtualRegister(Addr0) || @@ -1517,21 +1517,21 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, MachineInstr *Def1 = MRI->getVRegDef(Addr1); // Check if the loaded value, e.g. a constantpool of a global address, are // the same. - if (!produceSameValue(Def0, Def1, MRI)) + if (!produceSameValue(*Def0, *Def1, MRI)) return false; } - for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) { + for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) { // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg - const MachineOperand &MO0 = MI0->getOperand(i); - const MachineOperand &MO1 = MI1->getOperand(i); + const MachineOperand &MO0 = MI0.getOperand(i); + const MachineOperand &MO1 = MI1.getOperand(i); if (!MO0.isIdenticalTo(MO1)) return false; } return true; } - return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); + return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); } /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to @@ -1653,7 +1653,7 @@ bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, return true; } -bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, +bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const { // Debug info is never a scheduling boundary. It's necessary to be explicit @@ -1662,11 +1662,11 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, // considered a scheduling hazard, which is wrong. It should be the actual // instruction preceding the dbg_value instruction(s), just like it is // when debug info is not present. - if (MI->isDebugValue()) + if (MI.isDebugValue()) return false; // Terminators and labels can't be scheduled around. - if (MI->isTerminator() || MI->isPosition()) + if (MI.isTerminator() || MI.isPosition()) return true; // Treat the start of the IT block as a scheduling boundary, but schedule @@ -1690,7 +1690,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, // Calls don't actually change the stack pointer, even if they have imp-defs. // No ARM calling conventions change the stack pointer. (X86 calling // conventions sometimes do). - if (!MI->isCall() && MI->definesRegister(ARM::SP)) + if (!MI.isCall() && MI.definesRegister(ARM::SP)) return true; return false; @@ -1718,7 +1718,7 @@ isProfitableToIfCvt(MachineBasicBlock &MBB, CmpMI->getOpcode() == ARM::t2CMPri) { unsigned Reg = CmpMI->getOperand(0).getReg(); unsigned PredReg = 0; - ARMCC::CondCodes P = getInstrPredicate(CmpMI, PredReg); + ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg); if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 && isARMLowRegister(Reg)) return false; @@ -1765,24 +1765,24 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, bool ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const { - // Reduce false anti-dependencies to let Swift's out-of-order execution + // Reduce false anti-dependencies to let the target's out-of-order execution // engine do its thing. - return Subtarget.isSwift(); + return Subtarget.isProfitableToUnpredicate(); } /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. -ARMCC::CondCodes -llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { - int PIdx = MI->findFirstPredOperandIdx(); +ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI, + unsigned &PredReg) { + int PIdx = MI.findFirstPredOperandIdx(); if (PIdx == -1) { PredReg = 0; return ARMCC::AL; } - PredReg = MI->getOperand(PIdx+1).getReg(); - return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); + PredReg = MI.getOperand(PIdx+1).getReg(); + return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); } @@ -1797,11 +1797,11 @@ unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { llvm_unreachable("Unknown unconditional branch opcode!"); } -MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr *MI, +MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const { - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { case ARM::MOVCCr: case ARM::t2MOVCCr: { // MOVCC can be commuted by inverting the condition. @@ -1810,13 +1810,14 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr *MI, // MOVCC AL can't be inverted. Shouldn't happen. if (CC == ARMCC::AL || PredReg != ARM::CPSR) return nullptr; - MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); - if (!MI) + MachineInstr *CommutedMI = + TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); + if (!CommutedMI) return nullptr; // After swapping the MOVCC operands, also invert the condition. - MI->getOperand(MI->findFirstPredOperandIdx()) - .setImm(ARMCC::getOppositeCondition(CC)); - return MI; + CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx()) + .setImm(ARMCC::getOppositeCondition(CC)); + return CommutedMI; } } return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); @@ -1860,11 +1861,11 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg, return MI; } -bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, +bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI, SmallVectorImpl<MachineOperand> &Cond, unsigned &TrueOp, unsigned &FalseOp, bool &Optimizable) const { - assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && + assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); // MOVCC operands: // 0: Def. @@ -1874,38 +1875,38 @@ bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, // 4: CPSR use. TrueOp = 1; FalseOp = 2; - Cond.push_back(MI->getOperand(3)); - Cond.push_back(MI->getOperand(4)); + Cond.push_back(MI.getOperand(3)); + Cond.push_back(MI.getOperand(4)); // We can always fold a def. Optimizable = true; return false; } MachineInstr * -ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, +ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &SeenMIs, bool PreferFalse) const { - assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && + assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); - MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); - MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); + MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this); bool Invert = !DefMI; if (!DefMI) - DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); + DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this); if (!DefMI) return nullptr; // Find new register class to use. - MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); - unsigned DestReg = MI->getOperand(0).getReg(); + MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1); + unsigned DestReg = MI.getOperand(0).getReg(); const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); if (!MRI.constrainRegClass(DestReg, PreviousClass)) return nullptr; // Create a new predicated version of DefMI. // Rfalse is the first use. - MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - DefMI->getDesc(), DestReg); + MachineInstrBuilder NewMI = + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg); // Copy all the DefMI operands, excluding its (null) predicate. const MCInstrDesc &DefDesc = DefMI->getDesc(); @@ -1913,12 +1914,12 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) NewMI.addOperand(DefMI->getOperand(i)); - unsigned CondCode = MI->getOperand(3).getImm(); + unsigned CondCode = MI.getOperand(3).getImm(); if (Invert) NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); else NewMI.addImm(CondCode); - NewMI.addOperand(MI->getOperand(4)); + NewMI.addOperand(MI.getOperand(4)); // DefMI is not the -S version that sets CPSR, so add an optional %noreg. if (NewMI->hasOptionalDef()) @@ -1940,7 +1941,7 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, // DefMI would be invalid when tranferred inside the loop. Checking for a // loop is expensive, but at least remove kill flags if they are in different // BBs. - if (DefMI->getParent() != MI->getParent()) + if (DefMI->getParent() != MI.getParent()) NewMI->clearKillInfo(); // The caller will erase MI, but not DefMI. @@ -1994,10 +1995,12 @@ unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { } void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, DebugLoc dl, - unsigned DestReg, unsigned BaseReg, int NumBytes, - ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII, unsigned MIFlags) { + MachineBasicBlock::iterator &MBBI, + const DebugLoc &dl, unsigned DestReg, + unsigned BaseReg, int NumBytes, + ARMCC::CondCodes Pred, unsigned PredReg, + const ARMBaseInstrInfo &TII, + unsigned MIFlags) { if (NumBytes == 0 && DestReg != BaseReg) { BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) .addReg(BaseReg, RegState::Kill) @@ -2281,30 +2284,30 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, /// in SrcReg and SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction /// can be analyzed. -bool ARMBaseInstrInfo:: -analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, - int &CmpMask, int &CmpValue) const { - switch (MI->getOpcode()) { +bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, + unsigned &SrcReg2, int &CmpMask, + int &CmpValue) const { + switch (MI.getOpcode()) { default: break; case ARM::CMPri: case ARM::t2CMPri: - SrcReg = MI->getOperand(0).getReg(); + SrcReg = MI.getOperand(0).getReg(); SrcReg2 = 0; CmpMask = ~0; - CmpValue = MI->getOperand(1).getImm(); + CmpValue = MI.getOperand(1).getImm(); return true; case ARM::CMPrr: case ARM::t2CMPrr: - SrcReg = MI->getOperand(0).getReg(); - SrcReg2 = MI->getOperand(1).getReg(); + SrcReg = MI.getOperand(0).getReg(); + SrcReg2 = MI.getOperand(1).getReg(); CmpMask = ~0; CmpValue = 0; return true; case ARM::TSTri: case ARM::t2TSTri: - SrcReg = MI->getOperand(0).getReg(); + SrcReg = MI.getOperand(0).getReg(); SrcReg2 = 0; - CmpMask = MI->getOperand(1).getImm(); + CmpMask = MI.getOperand(1).getImm(); CmpValue = 0; return true; } @@ -2385,25 +2388,25 @@ inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the /// condition code of instructions which use the flags. -bool ARMBaseInstrInfo:: -optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, - int CmpMask, int CmpValue, - const MachineRegisterInfo *MRI) const { +bool ARMBaseInstrInfo::optimizeCompareInstr( + MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, + int CmpValue, const MachineRegisterInfo *MRI) const { // Get the unique definition of SrcReg. MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); if (!MI) return false; // Masked compares sometimes use the same register as the corresponding 'and'. if (CmpMask != ~0) { - if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { + if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) { MI = nullptr; for (MachineRegisterInfo::use_instr_iterator UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end(); UI != UE; ++UI) { - if (UI->getParent() != CmpInstr->getParent()) continue; + if (UI->getParent() != CmpInstr.getParent()) + continue; MachineInstr *PotentialAND = &*UI; if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || - isPredicated(PotentialAND)) + isPredicated(*PotentialAND)) continue; MI = PotentialAND; break; @@ -2414,7 +2417,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Get ready to iterate backward from CmpInstr. MachineBasicBlock::iterator I = CmpInstr, E = MI, - B = CmpInstr->getParent()->begin(); + B = CmpInstr.getParent()->begin(); // Early exit if CmpInstr is at the beginning of the BB. if (I == B) return false; @@ -2427,13 +2430,13 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, if (SrcReg2 != 0) // MI is not a candidate for CMPrr. MI = nullptr; - else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { + else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. // For CMPri w/ CmpValue != 0, a Sub may still be a candidate. // Thus we cannot return here. - if (CmpInstr->getOpcode() == ARM::CMPri || - CmpInstr->getOpcode() == ARM::t2CMPri) + if (CmpInstr.getOpcode() == ARM::CMPri || + CmpInstr.getOpcode() == ARM::t2CMPri) MI = nullptr; else return false; @@ -2453,7 +2456,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, return false; // Check whether CmpInstr can be made redundant by the current instruction. - if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { + if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { Sub = &*I; break; } @@ -2471,7 +2474,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, if (!MI) MI = Sub; // We can't use a predicated instruction - it doesn't always write the flags. - if (isPredicated(MI)) + if (isPredicated(*MI)) return false; switch (MI->getOpcode()) { @@ -2519,7 +2522,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, OperandsToUpdate; bool isSafe = false; I = CmpInstr; - E = CmpInstr->getParent()->end(); + E = CmpInstr.getParent()->end(); while (!isSafe && ++I != E) { const MachineInstr &Instr = *I; for (unsigned IO = 0, EO = Instr.getNumOperands(); @@ -2608,7 +2611,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // If CPSR is not killed nor re-defined, we should check whether it is // live-out. If it is live-out, do not optimize. if (!isSafe) { - MachineBasicBlock *MBB = CmpInstr->getParent(); + MachineBasicBlock *MBB = CmpInstr.getParent(); for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) if ((*SI)->isLiveIn(ARM::CPSR)) @@ -2618,8 +2621,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Toggle the optional operand to CPSR. MI->getOperand(5).setReg(ARM::CPSR); MI->getOperand(5).setIsDef(true); - assert(!isPredicated(MI) && "Can't use flags from predicated instruction"); - CmpInstr->eraseFromParent(); + assert(!isPredicated(*MI) && "Can't use flags from predicated instruction"); + CmpInstr.eraseFromParent(); // Modify the condition code of operands in OperandsToUpdate. // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to @@ -2633,42 +2636,42 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, return false; } -bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, - MachineInstr *DefMI, unsigned Reg, +bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + unsigned Reg, MachineRegisterInfo *MRI) const { // Fold large immediates into add, sub, or, xor. - unsigned DefOpc = DefMI->getOpcode(); + unsigned DefOpc = DefMI.getOpcode(); if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) return false; - if (!DefMI->getOperand(1).isImm()) + if (!DefMI.getOperand(1).isImm()) // Could be t2MOVi32imm <ga:xx> return false; if (!MRI->hasOneNonDBGUse(Reg)) return false; - const MCInstrDesc &DefMCID = DefMI->getDesc(); + const MCInstrDesc &DefMCID = DefMI.getDesc(); if (DefMCID.hasOptionalDef()) { unsigned NumOps = DefMCID.getNumOperands(); - const MachineOperand &MO = DefMI->getOperand(NumOps-1); + const MachineOperand &MO = DefMI.getOperand(NumOps - 1); if (MO.getReg() == ARM::CPSR && !MO.isDead()) // If DefMI defines CPSR and it is not dead, it's obviously not safe // to delete DefMI. return false; } - const MCInstrDesc &UseMCID = UseMI->getDesc(); + const MCInstrDesc &UseMCID = UseMI.getDesc(); if (UseMCID.hasOptionalDef()) { unsigned NumOps = UseMCID.getNumOperands(); - if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR) + if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR) // If the instruction sets the flag, do not attempt this optimization // since it may change the semantics of the code. return false; } - unsigned UseOpc = UseMI->getOpcode(); + unsigned UseOpc = UseMI.getOpcode(); unsigned NewUseOpc = 0; - uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); + uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm(); uint32_t SOImmValV1 = 0, SOImmValV2 = 0; bool Commute = false; switch (UseOpc) { @@ -2681,17 +2684,27 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, case ARM::t2ADDrr: case ARM::t2ORRrr: case ARM::t2EORrr: { - Commute = UseMI->getOperand(2).getReg() != Reg; + Commute = UseMI.getOperand(2).getReg() != Reg; switch (UseOpc) { default: break; + case ARM::ADDrr: case ARM::SUBrr: { - if (Commute) + if (UseOpc == ARM::SUBrr && Commute) + return false; + + // ADD/SUB are special because they're essentially the same operation, so + // we can handle a larger range of immediates. + if (ARM_AM::isSOImmTwoPartVal(ImmVal)) + NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri; + else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) { + ImmVal = -ImmVal; + NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri; + } else return false; - ImmVal = -ImmVal; - NewUseOpc = ARM::SUBri; - // Fallthrough + SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); + SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); + break; } - case ARM::ADDrr: case ARM::ORRrr: case ARM::EORrr: { if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) @@ -2700,20 +2713,29 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); switch (UseOpc) { default: break; - case ARM::ADDrr: NewUseOpc = ARM::ADDri; break; case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; case ARM::EORrr: NewUseOpc = ARM::EORri; break; } break; } + case ARM::t2ADDrr: case ARM::t2SUBrr: { - if (Commute) + if (UseOpc == ARM::t2SUBrr && Commute) return false; - ImmVal = -ImmVal; - NewUseOpc = ARM::t2SUBri; - // Fallthrough + + // ADD/SUB are special because they're essentially the same operation, so + // we can handle a larger range of immediates. + if (ARM_AM::isT2SOImmTwoPartVal(ImmVal)) + NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri; + else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) { + ImmVal = -ImmVal; + NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri; + } else + return false; + SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); + SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); + break; } - case ARM::t2ADDrr: case ARM::t2ORRrr: case ARM::t2EORrr: { if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) @@ -2722,7 +2744,6 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); switch (UseOpc) { default: break; - case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break; case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; } @@ -2733,27 +2754,27 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, } unsigned OpIdx = Commute ? 2 : 1; - unsigned Reg1 = UseMI->getOperand(OpIdx).getReg(); - bool isKill = UseMI->getOperand(OpIdx).isKill(); + unsigned Reg1 = UseMI.getOperand(OpIdx).getReg(); + bool isKill = UseMI.getOperand(OpIdx).isKill(); unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); - AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), - UseMI, UseMI->getDebugLoc(), - get(NewUseOpc), NewReg) - .addReg(Reg1, getKillRegState(isKill)) - .addImm(SOImmValV1))); - UseMI->setDesc(get(NewUseOpc)); - UseMI->getOperand(1).setReg(NewReg); - UseMI->getOperand(1).setIsKill(); - UseMI->getOperand(2).ChangeToImmediate(SOImmValV2); - DefMI->eraseFromParent(); + AddDefaultCC( + AddDefaultPred(BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), + get(NewUseOpc), NewReg) + .addReg(Reg1, getKillRegState(isKill)) + .addImm(SOImmValV1))); + UseMI.setDesc(get(NewUseOpc)); + UseMI.getOperand(1).setReg(NewReg); + UseMI.getOperand(1).setIsKill(); + UseMI.getOperand(2).ChangeToImmediate(SOImmValV2); + DefMI.eraseFromParent(); return true; } static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, - const MachineInstr *MI) { - switch (MI->getOpcode()) { + const MachineInstr &MI) { + switch (MI.getOpcode()) { default: { - const MCInstrDesc &Desc = MI->getDesc(); + const MCInstrDesc &Desc = MI.getDesc(); int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); assert(UOps >= 0 && "bad # UOps"); return UOps; @@ -2763,7 +2784,7 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, case ARM::LDRBrs: case ARM::STRrs: case ARM::STRBrs: { - unsigned ShOpVal = MI->getOperand(3).getImm(); + unsigned ShOpVal = MI.getOperand(3).getImm(); bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); if (!isSub && @@ -2776,10 +2797,10 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, case ARM::LDRH: case ARM::STRH: { - if (!MI->getOperand(2).getReg()) + if (!MI.getOperand(2).getReg()) return 1; - unsigned ShOpVal = MI->getOperand(3).getImm(); + unsigned ShOpVal = MI.getOperand(3).getImm(); bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); if (!isSub && @@ -2792,22 +2813,22 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, case ARM::LDRSB: case ARM::LDRSH: - return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2; + return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2; case ARM::LDRSB_POST: case ARM::LDRSH_POST: { - unsigned Rt = MI->getOperand(0).getReg(); - unsigned Rm = MI->getOperand(3).getReg(); + unsigned Rt = MI.getOperand(0).getReg(); + unsigned Rm = MI.getOperand(3).getReg(); return (Rt == Rm) ? 4 : 3; } case ARM::LDR_PRE_REG: case ARM::LDRB_PRE_REG: { - unsigned Rt = MI->getOperand(0).getReg(); - unsigned Rm = MI->getOperand(3).getReg(); + unsigned Rt = MI.getOperand(0).getReg(); + unsigned Rm = MI.getOperand(3).getReg(); if (Rt == Rm) return 3; - unsigned ShOpVal = MI->getOperand(4).getImm(); + unsigned ShOpVal = MI.getOperand(4).getImm(); bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); if (!isSub && @@ -2820,7 +2841,7 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, case ARM::STR_PRE_REG: case ARM::STRB_PRE_REG: { - unsigned ShOpVal = MI->getOperand(4).getImm(); + unsigned ShOpVal = MI.getOperand(4).getImm(); bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); if (!isSub && @@ -2833,21 +2854,20 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, case ARM::LDRH_PRE: case ARM::STRH_PRE: { - unsigned Rt = MI->getOperand(0).getReg(); - unsigned Rm = MI->getOperand(3).getReg(); + unsigned Rt = MI.getOperand(0).getReg(); + unsigned Rm = MI.getOperand(3).getReg(); if (!Rm) return 2; if (Rt == Rm) return 3; - return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) - ? 3 : 2; + return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2; } case ARM::LDR_POST_REG: case ARM::LDRB_POST_REG: case ARM::LDRH_POST: { - unsigned Rt = MI->getOperand(0).getReg(); - unsigned Rm = MI->getOperand(3).getReg(); + unsigned Rt = MI.getOperand(0).getReg(); + unsigned Rm = MI.getOperand(3).getReg(); return (Rt == Rm) ? 3 : 2; } @@ -2866,13 +2886,13 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, case ARM::LDRSB_PRE: case ARM::LDRSH_PRE: { - unsigned Rm = MI->getOperand(3).getReg(); + unsigned Rm = MI.getOperand(3).getReg(); if (Rm == 0) return 3; - unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rt = MI.getOperand(0).getReg(); if (Rt == Rm) return 4; - unsigned ShOpVal = MI->getOperand(4).getImm(); + unsigned ShOpVal = MI.getOperand(4).getImm(); bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); if (!isSub && @@ -2884,18 +2904,20 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, } case ARM::LDRD: { - unsigned Rt = MI->getOperand(0).getReg(); - unsigned Rn = MI->getOperand(2).getReg(); - unsigned Rm = MI->getOperand(3).getReg(); + unsigned Rt = MI.getOperand(0).getReg(); + unsigned Rn = MI.getOperand(2).getReg(); + unsigned Rm = MI.getOperand(3).getReg(); if (Rm) - return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; + return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4 + : 3; return (Rt == Rn) ? 3 : 2; } case ARM::STRD: { - unsigned Rm = MI->getOperand(3).getReg(); + unsigned Rm = MI.getOperand(3).getReg(); if (Rm) - return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; + return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4 + : 3; return 2; } @@ -2908,24 +2930,26 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, return 4; case ARM::LDRD_PRE: { - unsigned Rt = MI->getOperand(0).getReg(); - unsigned Rn = MI->getOperand(3).getReg(); - unsigned Rm = MI->getOperand(4).getReg(); + unsigned Rt = MI.getOperand(0).getReg(); + unsigned Rn = MI.getOperand(3).getReg(); + unsigned Rm = MI.getOperand(4).getReg(); if (Rm) - return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; + return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5 + : 4; return (Rt == Rn) ? 4 : 3; } case ARM::t2LDRD_PRE: { - unsigned Rt = MI->getOperand(0).getReg(); - unsigned Rn = MI->getOperand(3).getReg(); + unsigned Rt = MI.getOperand(0).getReg(); + unsigned Rn = MI.getOperand(3).getReg(); return (Rt == Rn) ? 4 : 3; } case ARM::STRD_PRE: { - unsigned Rm = MI->getOperand(4).getReg(); + unsigned Rm = MI.getOperand(4).getReg(); if (Rm) - return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; + return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5 + : 4; return 3; } @@ -2953,8 +2977,8 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, return 2; case ARM::t2LDRDi8: { - unsigned Rt = MI->getOperand(0).getReg(); - unsigned Rn = MI->getOperand(2).getReg(); + unsigned Rt = MI.getOperand(0).getReg(); + unsigned Rn = MI.getOperand(2).getReg(); return (Rt == Rn) ? 3 : 2; } @@ -2994,22 +3018,61 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, // sizes during MC lowering. That target hook should be local to MC lowering // because we can't ensure that it is aware of other MI forms. Doing this will // ensure that MachineMemOperands are correctly propagated through all passes. -unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const { +unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const { unsigned Size = 0; - for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), - E = MI->memoperands_end(); I != E; ++I) { + for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), + E = MI.memoperands_end(); + I != E; ++I) { Size += (*I)->getSize(); } return Size / 4; } -unsigned -ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, - const MachineInstr *MI) const { +static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, + unsigned NumRegs) { + unsigned UOps = 1 + NumRegs; // 1 for address computation. + switch (Opc) { + default: + break; + case ARM::VLDMDIA_UPD: + case ARM::VLDMDDB_UPD: + case ARM::VLDMSIA_UPD: + case ARM::VLDMSDB_UPD: + case ARM::VSTMDIA_UPD: + case ARM::VSTMDDB_UPD: + case ARM::VSTMSIA_UPD: + case ARM::VSTMSDB_UPD: + case ARM::LDMIA_UPD: + case ARM::LDMDA_UPD: + case ARM::LDMDB_UPD: + case ARM::LDMIB_UPD: + case ARM::STMIA_UPD: + case ARM::STMDA_UPD: + case ARM::STMDB_UPD: + case ARM::STMIB_UPD: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: + case ARM::t2LDMIA_UPD: + case ARM::t2LDMDB_UPD: + case ARM::t2STMIA_UPD: + case ARM::t2STMDB_UPD: + ++UOps; // One for base register writeback. + break; + case ARM::LDMIA_RET: + case ARM::tPOP_RET: + case ARM::t2LDMIA_RET: + UOps += 2; // One for base reg wb, one for write to pc. + break; + } + return UOps; +} + +unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, + const MachineInstr &MI) const { if (!ItinData || ItinData->isEmpty()) return 1; - const MCInstrDesc &Desc = MI->getDesc(); + const MCInstrDesc &Desc = MI.getDesc(); unsigned Class = Desc.getSchedClass(); int ItinUOps = ItinData->getNumMicroOps(Class); if (ItinUOps >= 0) { @@ -3019,7 +3082,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, return ItinUOps; } - unsigned Opc = MI->getOpcode(); + unsigned Opc = MI.getOpcode(); switch (Opc) { default: llvm_unreachable("Unexpected multi-uops instruction!"); @@ -3049,7 +3112,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, case ARM::VSTMSIA: case ARM::VSTMSIA_UPD: case ARM::VSTMSDB_UPD: { - unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); + unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands(); return (NumRegs / 2) + (NumRegs % 2) + 1; } @@ -3085,66 +3148,36 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, case ARM::t2STMDB: case ARM::t2STMIA_UPD: case ARM::t2STMDB_UPD: { - unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; - if (Subtarget.isSwift()) { - int UOps = 1 + NumRegs; // One for address computation, one for each ld / st. - switch (Opc) { - default: break; - case ARM::VLDMDIA_UPD: - case ARM::VLDMDDB_UPD: - case ARM::VLDMSIA_UPD: - case ARM::VLDMSDB_UPD: - case ARM::VSTMDIA_UPD: - case ARM::VSTMDDB_UPD: - case ARM::VSTMSIA_UPD: - case ARM::VSTMSDB_UPD: - case ARM::LDMIA_UPD: - case ARM::LDMDA_UPD: - case ARM::LDMDB_UPD: - case ARM::LDMIB_UPD: - case ARM::STMIA_UPD: - case ARM::STMDA_UPD: - case ARM::STMDB_UPD: - case ARM::STMIB_UPD: - case ARM::tLDMIA_UPD: - case ARM::tSTMIA_UPD: - case ARM::t2LDMIA_UPD: - case ARM::t2LDMDB_UPD: - case ARM::t2STMIA_UPD: - case ARM::t2STMDB_UPD: - ++UOps; // One for base register writeback. - break; - case ARM::LDMIA_RET: - case ARM::tPOP_RET: - case ARM::t2LDMIA_RET: - UOps += 2; // One for base reg wb, one for write to pc. - break; - } - return UOps; - } else if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { + unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1; + switch (Subtarget.getLdStMultipleTiming()) { + case ARMSubtarget::SingleIssuePlusExtras: + return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs); + case ARMSubtarget::SingleIssue: + // Assume the worst. + return NumRegs; + case ARMSubtarget::DoubleIssue: { if (NumRegs < 4) return 2; // 4 registers would be issued: 2, 2. // 5 registers would be issued: 2, 2, 1. - int A8UOps = (NumRegs / 2); + unsigned UOps = (NumRegs / 2); if (NumRegs % 2) - ++A8UOps; - return A8UOps; - } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { - int A9UOps = (NumRegs / 2); + ++UOps; + return UOps; + } + case ARMSubtarget::DoubleIssueCheckUnalignedAccess: { + unsigned UOps = (NumRegs / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. - if ((NumRegs % 2) || - !MI->hasOneMemOperand() || - (*MI->memoperands_begin())->getAlignment() < 8) - ++A9UOps; - return A9UOps; - } else { - // Assume the worst. - return NumRegs; + if ((NumRegs % 2) || !MI.hasOneMemOperand() || + (*MI.memoperands_begin())->getAlignment() < 8) + ++UOps; + return UOps; + } } } } + llvm_unreachable("Didn't find the number of microops"); } int @@ -3428,13 +3461,13 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, } static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, - const MachineInstr *MI, unsigned Reg, + const MachineInstr &MI, unsigned Reg, unsigned &UseIdx, unsigned &Dist) { Dist = 0; - MachineBasicBlock::const_instr_iterator II = ++MI->getIterator(); + MachineBasicBlock::const_instr_iterator II = ++MI.getIterator(); assert(II->isInsideBundle() && "Empty bundle?"); - MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); // FIXME: This doesn't properly handle multiple uses. int Idx = -1; @@ -3460,17 +3493,17 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, /// itinerary based on the def opcode and alignment. The caller will ensure that /// adjusted latency is at least one cycle. static int adjustDefLatency(const ARMSubtarget &Subtarget, - const MachineInstr *DefMI, - const MCInstrDesc *DefMCID, unsigned DefAlign) { + const MachineInstr &DefMI, + const MCInstrDesc &DefMCID, unsigned DefAlign) { int Adjust = 0; if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. - switch (DefMCID->getOpcode()) { + switch (DefMCID.getOpcode()) { default: break; case ARM::LDRrs: case ARM::LDRBrs: { - unsigned ShOpVal = DefMI->getOperand(3).getImm(); + unsigned ShOpVal = DefMI.getOperand(3).getImm(); unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); if (ShImm == 0 || (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) @@ -3482,7 +3515,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, case ARM::t2LDRHs: case ARM::t2LDRSHs: { // Thumb2 mode: lsl only. - unsigned ShAmt = DefMI->getOperand(3).getImm(); + unsigned ShAmt = DefMI.getOperand(3).getImm(); if (ShAmt == 0 || ShAmt == 2) --Adjust; break; @@ -3491,11 +3524,11 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, } else if (Subtarget.isSwift()) { // FIXME: Properly handle all of the latency adjustments for address // writeback. - switch (DefMCID->getOpcode()) { + switch (DefMCID.getOpcode()) { default: break; case ARM::LDRrs: case ARM::LDRBrs: { - unsigned ShOpVal = DefMI->getOperand(3).getImm(); + unsigned ShOpVal = DefMI.getOperand(3).getImm(); bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); if (!isSub && @@ -3513,7 +3546,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, case ARM::t2LDRHs: case ARM::t2LDRSHs: { // Thumb2 mode: lsl only. - unsigned ShAmt = DefMI->getOperand(3).getImm(); + unsigned ShAmt = DefMI.getOperand(3).getImm(); if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) Adjust -= 2; break; @@ -3521,8 +3554,8 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, } } - if (DefAlign < 8 && Subtarget.isLikeA9()) { - switch (DefMCID->getOpcode()) { + if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) { + switch (DefMCID.getOpcode()) { default: break; case ARM::VLD1q8: case ARM::VLD1q16: @@ -3637,53 +3670,55 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, return Adjust; } - - -int -ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, - unsigned UseIdx) const { +int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr &DefMI, + unsigned DefIdx, + const MachineInstr &UseMI, + unsigned UseIdx) const { // No operand latency. The caller may fall back to getInstrLatency. if (!ItinData || ItinData->isEmpty()) return -1; - const MachineOperand &DefMO = DefMI->getOperand(DefIdx); + const MachineOperand &DefMO = DefMI.getOperand(DefIdx); unsigned Reg = DefMO.getReg(); - const MCInstrDesc *DefMCID = &DefMI->getDesc(); - const MCInstrDesc *UseMCID = &UseMI->getDesc(); + const MachineInstr *ResolvedDefMI = &DefMI; unsigned DefAdj = 0; - if (DefMI->isBundle()) { - DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj); - DefMCID = &DefMI->getDesc(); - } - if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || - DefMI->isRegSequence() || DefMI->isImplicitDef()) { + if (DefMI.isBundle()) + ResolvedDefMI = + getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj); + if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() || + ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) { return 1; } + const MachineInstr *ResolvedUseMI = &UseMI; unsigned UseAdj = 0; - if (UseMI->isBundle()) { - unsigned NewUseIdx; - const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, - Reg, NewUseIdx, UseAdj); - if (!NewUseMI) + if (UseMI.isBundle()) { + ResolvedUseMI = + getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj); + if (!ResolvedUseMI) return -1; - - UseMI = NewUseMI; - UseIdx = NewUseIdx; - UseMCID = &UseMI->getDesc(); } + return getOperandLatencyImpl( + ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO, + Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj); +} + +int ARMBaseInstrInfo::getOperandLatencyImpl( + const InstrItineraryData *ItinData, const MachineInstr &DefMI, + unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj, + const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI, + unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const { if (Reg == ARM::CPSR) { - if (DefMI->getOpcode() == ARM::FMSTAT) { + if (DefMI.getOpcode() == ARM::FMSTAT) { // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) return Subtarget.isLikeA9() ? 1 : 20; } // CPSR set and branch can be paired in the same cycle. - if (UseMI->isBranch()) + if (UseMI.isBranch()) return 0; // Otherwise it takes the instruction latency (generally one). @@ -3694,7 +3729,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // incur a code size penalty (not able to use the CPSR setting 16-bit // instructions). if (Latency > 0 && Subtarget.isThumb2()) { - const MachineFunction *MF = DefMI->getParent()->getParent(); + const MachineFunction *MF = DefMI.getParent()->getParent(); // FIXME: Use Function::optForSize(). if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) --Latency; @@ -3702,17 +3737,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } - if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit()) + if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit()) return -1; - unsigned DefAlign = DefMI->hasOneMemOperand() - ? (*DefMI->memoperands_begin())->getAlignment() : 0; - unsigned UseAlign = UseMI->hasOneMemOperand() - ? (*UseMI->memoperands_begin())->getAlignment() : 0; + unsigned DefAlign = DefMI.hasOneMemOperand() + ? (*DefMI.memoperands_begin())->getAlignment() + : 0; + unsigned UseAlign = UseMI.hasOneMemOperand() + ? (*UseMI.memoperands_begin())->getAlignment() + : 0; // Get the itinerary's latency if possible, and handle variable_ops. - int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign, - *UseMCID, UseIdx, UseAlign); + int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID, + UseIdx, UseAlign); // Unable to find operand latency. The caller may resort to getInstrLatency. if (Latency < 0) return Latency; @@ -3746,10 +3783,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (!UseNode->isMachineOpcode()) { int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); - if (Subtarget.isLikeA9() || Subtarget.isSwift()) - return Latency <= 2 ? 1 : Latency - 1; - else - return Latency <= 3 ? 1 : Latency - 2; + int Adj = Subtarget.getPreISelOperandLatencyAdjustment(); + int Threshold = 1 + Adj; + return Latency <= Threshold ? 1 : Latency - Adj; } const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); @@ -3820,7 +3856,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } } - if (DefAlign < 8 && Subtarget.isLikeA9()) + if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) switch (DefMCID.getOpcode()) { default: break; case ARM::VLD1q8: @@ -3946,15 +3982,15 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } -unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const { - if (MI->isCopyLike() || MI->isInsertSubreg() || - MI->isRegSequence() || MI->isImplicitDef()) +unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const { + if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || + MI.isImplicitDef()) return 0; - if (MI->isBundle()) + if (MI.isBundle()) return 0; - const MCInstrDesc &MCID = MI->getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) { // When predicated, CPSR is an additional source operand for CPSR updating @@ -3965,26 +4001,26 @@ unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const { } unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, + const MachineInstr &MI, unsigned *PredCost) const { - if (MI->isCopyLike() || MI->isInsertSubreg() || - MI->isRegSequence() || MI->isImplicitDef()) + if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || + MI.isImplicitDef()) return 1; // An instruction scheduler typically runs on unbundled instructions, however // other passes may query the latency of a bundled instruction. - if (MI->isBundle()) { + if (MI.isBundle()) { unsigned Latency = 0; - MachineBasicBlock::const_instr_iterator I = MI->getIterator(); - MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + MachineBasicBlock::const_instr_iterator I = MI.getIterator(); + MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); while (++I != E && I->isInsideBundle()) { if (I->getOpcode() != ARM::t2IT) - Latency += getInstrLatency(ItinData, &*I, PredCost); + Latency += getInstrLatency(ItinData, *I, PredCost); } return Latency; } - const MCInstrDesc &MCID = MI->getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) { // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. @@ -3993,7 +4029,7 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, // Be sure to call getStageLatency for an empty itinerary in case it has a // valid MinLatency property. if (!ItinData) - return MI->mayLoad() ? 3 : 1; + return MI.mayLoad() ? 3 : 1; unsigned Class = MCID.getSchedClass(); @@ -4005,9 +4041,9 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, unsigned Latency = ItinData->getStageLatency(Class); // Adjust for dynamic def-side opcode variants not captured by the itinerary. - unsigned DefAlign = MI->hasOneMemOperand() - ? (*MI->memoperands_begin())->getAlignment() : 0; - int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign); + unsigned DefAlign = + MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0; + int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign); if (Adj >= 0 || (int)Latency > -Adj) { return Latency + Adj; } @@ -4032,46 +4068,46 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, } } -bool ARMBaseInstrInfo:: -hasHighOperandLatency(const TargetSchedModel &SchedModel, - const MachineRegisterInfo *MRI, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const { - unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; - unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask; - if (Subtarget.isCortexA8() && +bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel, + const MachineRegisterInfo *MRI, + const MachineInstr &DefMI, + unsigned DefIdx, + const MachineInstr &UseMI, + unsigned UseIdx) const { + unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; + unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask; + if (Subtarget.nonpipelinedVFP() && (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) - // CortexA8 VFP instructions are not pipelined. return true; // Hoist VFP / NEON instructions with 4 or higher latency. - unsigned Latency - = SchedModel.computeOperandLatency(DefMI, DefIdx, UseMI, UseIdx); + unsigned Latency = + SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx); if (Latency <= 3) return false; return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; } -bool ARMBaseInstrInfo:: -hasLowDefLatency(const TargetSchedModel &SchedModel, - const MachineInstr *DefMI, unsigned DefIdx) const { +bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, + const MachineInstr &DefMI, + unsigned DefIdx) const { const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); if (!ItinData || ItinData->isEmpty()) return false; - unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; + unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; if (DDomain == ARMII::DomainGeneral) { - unsigned DefClass = DefMI->getDesc().getSchedClass(); + unsigned DefClass = DefMI.getDesc().getSchedClass(); int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); return (DefCycle != -1 && DefCycle <= 2); } return false; } -bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI, +bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const { - if (convertAddSubFlagsOpcode(MI->getOpcode())) { + if (convertAddSubFlagsOpcode(MI.getOpcode())) { ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; return false; } @@ -4082,8 +4118,7 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI, // sequence is needed for other targets. void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, - unsigned LoadOpc, - Reloc::Model RM) const { + unsigned LoadOpc) const { MachineBasicBlock &MBB = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); unsigned Reg = MI->getOperand(0).getReg(); @@ -4094,12 +4129,12 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg) .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); - if (Subtarget.GVIsIndirectSymbol(GV, RM)) { + if (Subtarget.isGVIndirectSymbol(GV)) { MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); MIB.addReg(Reg, RegState::Kill).addImm(0); - unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( - MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4); + MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4); MIB.addMemOperand(MMO); AddDefaultPred(MIB); } @@ -4146,24 +4181,24 @@ enum ARMExeDomain { // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h // std::pair<uint16_t, uint16_t> -ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { +ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const { // If we don't have access to NEON instructions then we won't be able // to swizzle anything to the NEON domain. Check to make sure. if (Subtarget.hasNEON()) { // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON // if they are not predicated. - if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) + if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI)) return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); // CortexA9 is particularly picky about mixing the two and wants these // converted. - if (Subtarget.isCortexA9() && !isPredicated(MI) && - (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || - MI->getOpcode() == ARM::VMOVS)) + if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) && + (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR || + MI.getOpcode() == ARM::VMOVS)) return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); } // No other instructions can be swizzled, so just determine their domain. - unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; + unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask; if (Domain & ARMII::DomainNEON) return std::make_pair(ExeNEON, 0); @@ -4210,12 +4245,11 @@ static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, /// (including the case where the DPR itself is defined), it should not. /// static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, - MachineInstr *MI, - unsigned DReg, unsigned Lane, - unsigned &ImplicitSReg) { + MachineInstr &MI, unsigned DReg, + unsigned Lane, unsigned &ImplicitSReg) { // If the DPR is defined or used already, the other SPR lane will be chained // correctly, so there is nothing to be done. - if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) { + if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) { ImplicitSReg = 0; return true; } @@ -4224,7 +4258,7 @@ static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, ImplicitSReg = TRI->getSubReg(DReg, (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); MachineBasicBlock::LivenessQueryResult LQR = - MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); + MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); if (LQR == MachineBasicBlock::LQR_Live) return true; @@ -4237,106 +4271,105 @@ static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, return true; } -void -ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { +void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, + unsigned Domain) const { unsigned DstReg, SrcReg, DReg; unsigned Lane; - MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); + MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); const TargetRegisterInfo *TRI = &getRegisterInfo(); - switch (MI->getOpcode()) { - default: - llvm_unreachable("cannot handle opcode!"); + switch (MI.getOpcode()) { + default: + llvm_unreachable("cannot handle opcode!"); + break; + case ARM::VMOVD: + if (Domain != ExeNEON) break; - case ARM::VMOVD: - if (Domain != ExeNEON) - break; - // Zap the predicate operands. - assert(!isPredicated(MI) && "Cannot predicate a VORRd"); + // Zap the predicate operands. + assert(!isPredicated(MI) && "Cannot predicate a VORRd"); - // Make sure we've got NEON instructions. - assert(Subtarget.hasNEON() && "VORRd requires NEON"); + // Make sure we've got NEON instructions. + assert(Subtarget.hasNEON() && "VORRd requires NEON"); - // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); - for (unsigned i = MI->getDesc().getNumOperands(); i; --i) - MI->RemoveOperand(i-1); + for (unsigned i = MI.getDesc().getNumOperands(); i; --i) + MI.RemoveOperand(i - 1); - // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) - MI->setDesc(get(ARM::VORRd)); - AddDefaultPred(MIB.addReg(DstReg, RegState::Define) - .addReg(SrcReg) - .addReg(SrcReg)); + // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) + MI.setDesc(get(ARM::VORRd)); + AddDefaultPred( + MIB.addReg(DstReg, RegState::Define).addReg(SrcReg).addReg(SrcReg)); + break; + case ARM::VMOVRS: + if (Domain != ExeNEON) break; - case ARM::VMOVRS: - if (Domain != ExeNEON) - break; - assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); + assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); - // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); - for (unsigned i = MI->getDesc().getNumOperands(); i; --i) - MI->RemoveOperand(i-1); + for (unsigned i = MI.getDesc().getNumOperands(); i; --i) + MI.RemoveOperand(i - 1); - DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); + DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); - // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) - // Note that DSrc has been widened and the other lane may be undef, which - // contaminates the entire register. - MI->setDesc(get(ARM::VGETLNi32)); - AddDefaultPred(MIB.addReg(DstReg, RegState::Define) - .addReg(DReg, RegState::Undef) - .addImm(Lane)); + // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) + // Note that DSrc has been widened and the other lane may be undef, which + // contaminates the entire register. + MI.setDesc(get(ARM::VGETLNi32)); + AddDefaultPred(MIB.addReg(DstReg, RegState::Define) + .addReg(DReg, RegState::Undef) + .addImm(Lane)); - // The old source should be an implicit use, otherwise we might think it - // was dead before here. - MIB.addReg(SrcReg, RegState::Implicit); + // The old source should be an implicit use, otherwise we might think it + // was dead before here. + MIB.addReg(SrcReg, RegState::Implicit); + break; + case ARM::VMOVSR: { + if (Domain != ExeNEON) break; - case ARM::VMOVSR: { - if (Domain != ExeNEON) - break; - assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); + assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); - // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); - DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); + DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); - unsigned ImplicitSReg; - if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) - break; + unsigned ImplicitSReg; + if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) + break; - for (unsigned i = MI->getDesc().getNumOperands(); i; --i) - MI->RemoveOperand(i-1); + for (unsigned i = MI.getDesc().getNumOperands(); i; --i) + MI.RemoveOperand(i - 1); - // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) - // Again DDst may be undefined at the beginning of this instruction. - MI->setDesc(get(ARM::VSETLNi32)); - MIB.addReg(DReg, RegState::Define) - .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI))) - .addReg(SrcReg) - .addImm(Lane); - AddDefaultPred(MIB); + // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) + // Again DDst may be undefined at the beginning of this instruction. + MI.setDesc(get(ARM::VSETLNi32)); + MIB.addReg(DReg, RegState::Define) + .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI))) + .addReg(SrcReg) + .addImm(Lane); + AddDefaultPred(MIB); - // The narrower destination must be marked as set to keep previous chains - // in place. - MIB.addReg(DstReg, RegState::Define | RegState::Implicit); - if (ImplicitSReg != 0) - MIB.addReg(ImplicitSReg, RegState::Implicit); - break; + // The narrower destination must be marked as set to keep previous chains + // in place. + MIB.addReg(DstReg, RegState::Define | RegState::Implicit); + if (ImplicitSReg != 0) + MIB.addReg(ImplicitSReg, RegState::Implicit); + break; } case ARM::VMOVS: { if (Domain != ExeNEON) break; // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); @@ -4346,16 +4379,16 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg)) break; - for (unsigned i = MI->getDesc().getNumOperands(); i; --i) - MI->RemoveOperand(i-1); + for (unsigned i = MI.getDesc().getNumOperands(); i; --i) + MI.RemoveOperand(i - 1); if (DSrc == DDst) { // Destination can be: // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) - MI->setDesc(get(ARM::VDUPLN32d)); + MI.setDesc(get(ARM::VDUPLN32d)); MIB.addReg(DDst, RegState::Define) - .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI))) - .addImm(SrcLane); + .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI))) + .addImm(SrcLane); AddDefaultPred(MIB); // Neither the source or the destination are naturally represented any @@ -4380,18 +4413,18 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // Pattern of the MachineInstrs is: // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) MachineInstrBuilder NewMIB; - NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - get(ARM::VEXTd32), DDst); + NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32), + DDst); // On the first instruction, both DSrc and DDst may be <undef> if present. // Specifically when the original instruction didn't have them as an // <imp-use>. unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; - bool CurUndef = !MI->readsRegister(CurReg, TRI); + bool CurUndef = !MI.readsRegister(CurReg, TRI); NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; - CurUndef = !MI->readsRegister(CurReg, TRI); + CurUndef = !MI.readsRegister(CurReg, TRI); NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); NewMIB.addImm(1); @@ -4400,17 +4433,17 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { if (SrcLane == DstLane) NewMIB.addReg(SrcReg, RegState::Implicit); - MI->setDesc(get(ARM::VEXTd32)); + MI.setDesc(get(ARM::VEXTd32)); MIB.addReg(DDst, RegState::Define); // On the second instruction, DDst has definitely been defined above, so // it is not <undef>. DSrc, if present, can be <undef> as above. CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; - CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); + CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI); MIB.addReg(CurReg, getUndefRegState(CurUndef)); CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; - CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); + CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI); MIB.addReg(CurReg, getUndefRegState(CurUndef)); MIB.addImm(1); @@ -4446,24 +4479,23 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. // // FCONSTD can be used as a dependency-breaking instruction. -unsigned ARMBaseInstrInfo:: -getPartialRegUpdateClearance(const MachineInstr *MI, - unsigned OpNum, - const TargetRegisterInfo *TRI) const { - if (!SwiftPartialUpdateClearance || - !(Subtarget.isSwift() || Subtarget.isCortexA15())) +unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( + const MachineInstr &MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const { + auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance(); + if (!PartialUpdateClearance) return 0; assert(TRI && "Need TRI instance"); - const MachineOperand &MO = MI->getOperand(OpNum); + const MachineOperand &MO = MI.getOperand(OpNum); if (MO.readsReg()) return 0; unsigned Reg = MO.getReg(); int UseOp = -1; - switch(MI->getOpcode()) { - // Normal instructions writing only an S-register. + switch (MI.getOpcode()) { + // Normal instructions writing only an S-register. case ARM::VLDRS: case ARM::FCONSTS: case ARM::VMOVSR: @@ -4472,7 +4504,7 @@ getPartialRegUpdateClearance(const MachineInstr *MI, case ARM::VMOVv2i32: case ARM::VMOVv2f32: case ARM::VMOVv1i64: - UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI); + UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI); break; // Explicitly reads the dependency. @@ -4485,37 +4517,35 @@ getPartialRegUpdateClearance(const MachineInstr *MI, // If this instruction actually reads a value from Reg, there is no unwanted // dependency. - if (UseOp != -1 && MI->getOperand(UseOp).readsReg()) + if (UseOp != -1 && MI.getOperand(UseOp).readsReg()) return 0; // We must be able to clobber the whole D-reg. if (TargetRegisterInfo::isVirtualRegister(Reg)) { // Virtual register must be a foo:ssub_0<def,undef> operand. - if (!MO.getSubReg() || MI->readsVirtualRegister(Reg)) + if (!MO.getSubReg() || MI.readsVirtualRegister(Reg)) return 0; } else if (ARM::SPRRegClass.contains(Reg)) { // Physical register: MI must define the full D-reg. unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, &ARM::DPRRegClass); - if (!DReg || !MI->definesRegister(DReg, TRI)) + if (!DReg || !MI.definesRegister(DReg, TRI)) return 0; } // MI has an unwanted D-register dependency. // Avoid defs in the previous N instructrions. - return SwiftPartialUpdateClearance; + return PartialUpdateClearance; } // Break a partial register dependency after getPartialRegUpdateClearance // returned non-zero. -void ARMBaseInstrInfo:: -breakPartialRegDependency(MachineBasicBlock::iterator MI, - unsigned OpNum, - const TargetRegisterInfo *TRI) const { - assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def"); +void ARMBaseInstrInfo::breakPartialRegDependency( + MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { + assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def"); assert(TRI && "Need TRI instance"); - const MachineOperand &MO = MI->getOperand(OpNum); + const MachineOperand &MO = MI.getOperand(OpNum); unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Can't break virtual register dependencies."); @@ -4528,7 +4558,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, } assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); - assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); + assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines // the full D-register by loading the same value to both lanes. The @@ -4538,9 +4568,10 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, // Insert the dependency-breaking FCONSTD before MI. // 96 is the encoding of 0.5, but the actual value doesn't matter here. - AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - get(ARM::FCONSTD), DReg).addImm(96)); - MI->addRegisterKilled(DReg, TRI, true); + AddDefaultPred( + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg) + .addImm(96)); + MI.addRegisterKilled(DReg, TRI, true); } bool ARMBaseInstrInfo::hasNOP() const { diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index d80c49494c77..52b0ff17dea2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -36,8 +36,7 @@ protected: explicit ARMBaseInstrInfo(const ARMSubtarget &STI); void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, - unsigned LoadImmOpc, unsigned LoadOpc, - Reloc::Model RM) const; + unsigned LoadImmOpc, unsigned LoadOpc) const; /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI /// and \p DefIdx. @@ -93,8 +92,7 @@ protected: /// non-commutable pair of operand indices OpIdx1 and OpIdx2. /// Even though the instruction is commutable, the method may still /// fail to commute the operands, null pointer is returned in such cases. - MachineInstr *commuteInstructionImpl(MachineInstr *MI, - bool NewMI, + MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override; @@ -107,7 +105,7 @@ public: virtual unsigned getUnindexedOpcode(unsigned Opc) const =0; MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, - MachineBasicBlock::iterator &MBBI, + MachineInstr &MI, LiveVariables *LV) const override; virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0; @@ -122,49 +120,49 @@ public: const ScheduleDAG *DAG) const override; // Branch analysis. - bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify = false) const override; unsigned RemoveBranch(MachineBasicBlock &MBB) const override; unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, - DebugLoc DL) const override; + const DebugLoc &DL) const override; bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; // Predication support. - bool isPredicated(const MachineInstr *MI) const override; + bool isPredicated(const MachineInstr &MI) const override; - ARMCC::CondCodes getPredicate(const MachineInstr *MI) const { - int PIdx = MI->findFirstPredOperandIdx(); - return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm() + ARMCC::CondCodes getPredicate(const MachineInstr &MI) const { + int PIdx = MI.findFirstPredOperandIdx(); + return PIdx != -1 ? (ARMCC::CondCodes)MI.getOperand(PIdx).getImm() : ARMCC::AL; } - bool PredicateInstruction(MachineInstr *MI, - ArrayRef<MachineOperand> Pred) const override; + bool PredicateInstruction(MachineInstr &MI, + ArrayRef<MachineOperand> Pred) const override; bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1, ArrayRef<MachineOperand> Pred2) const override; - bool DefinesPredicate(MachineInstr *MI, + bool DefinesPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred) const override; - bool isPredicable(MachineInstr *MI) const override; + bool isPredicable(MachineInstr &MI) const override; /// GetInstSize - Returns the size of the specified MachineInstr. /// - virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const; + virtual unsigned GetInstSizeInBytes(const MachineInstr &MI) const; - unsigned isLoadFromStackSlot(const MachineInstr *MI, + unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; - unsigned isStoreToStackSlot(const MachineInstr *MI, + unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; - unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, + unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override; - unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, + unsigned isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override; void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, @@ -175,7 +173,7 @@ public: const ARMSubtarget &Subtarget) const; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - DebugLoc DL, unsigned DestReg, unsigned SrcReg, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, @@ -190,21 +188,21 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; - bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; + bool expandPostRAPseudo(MachineInstr &MI) const override; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig, + const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override; - MachineInstr *duplicate(MachineInstr *Orig, + MachineInstr *duplicate(MachineInstr &Orig, MachineFunction &MF) const override; const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI) const; - bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1, + bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI) const override; /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to @@ -227,7 +225,7 @@ public: int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override; - bool isSchedulingBoundary(const MachineInstr *MI, + bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override; @@ -252,7 +250,7 @@ public: /// in SrcReg and SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction /// can be analyzed. - bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const override; @@ -260,30 +258,29 @@ public: /// that we can remove a "comparison with zero"; Remove a redundant CMP /// instruction if the flags can be updated in the same way by an earlier /// instruction such as SUB. - bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, + bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const override; - bool analyzeSelect(const MachineInstr *MI, - SmallVectorImpl<MachineOperand> &Cond, - unsigned &TrueOp, unsigned &FalseOp, - bool &Optimizable) const override; + bool analyzeSelect(const MachineInstr &MI, + SmallVectorImpl<MachineOperand> &Cond, unsigned &TrueOp, + unsigned &FalseOp, bool &Optimizable) const override; - MachineInstr *optimizeSelect(MachineInstr *MI, + MachineInstr *optimizeSelect(MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &SeenMIs, bool) const override; /// FoldImmediate - 'Reg' is known to be defined by a move immediate /// instruction, try to fold the immediate into the use instruction. - bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, - unsigned Reg, MachineRegisterInfo *MRI) const override; + bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, + MachineRegisterInfo *MRI) const override; unsigned getNumMicroOps(const InstrItineraryData *ItinData, - const MachineInstr *MI) const override; + const MachineInstr &MI) const override; int getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, + const MachineInstr &DefMI, unsigned DefIdx, + const MachineInstr &UseMI, unsigned UseIdx) const override; int getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, @@ -291,19 +288,20 @@ public: /// VFP/NEON execution domains. std::pair<uint16_t, uint16_t> - getExecutionDomain(const MachineInstr *MI) const override; - void setExecutionDomain(MachineInstr *MI, unsigned Domain) const override; + getExecutionDomain(const MachineInstr &MI) const override; + void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override; - unsigned getPartialRegUpdateClearance(const MachineInstr*, unsigned, - const TargetRegisterInfo*) const override; - void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned, + unsigned + getPartialRegUpdateClearance(const MachineInstr &, unsigned, + const TargetRegisterInfo *) const override; + void breakPartialRegDependency(MachineInstr &, unsigned, const TargetRegisterInfo *TRI) const override; /// Get the number of addresses by LDM or VLDM or zero for unknown. - unsigned getNumLDMAddresses(const MachineInstr *MI) const; + unsigned getNumLDMAddresses(const MachineInstr &MI) const; private: - unsigned getInstBundleLength(const MachineInstr *MI) const; + unsigned getInstBundleLength(const MachineInstr &MI) const; int getVLDMDefCycle(const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, @@ -327,10 +325,17 @@ private: const MCInstrDesc &UseMCID, unsigned UseIdx, unsigned UseAlign) const; - unsigned getPredicationCost(const MachineInstr *MI) const override; + int getOperandLatencyImpl(const InstrItineraryData *ItinData, + const MachineInstr &DefMI, unsigned DefIdx, + const MCInstrDesc &DefMCID, unsigned DefAdj, + const MachineOperand &DefMO, unsigned Reg, + const MachineInstr &UseMI, unsigned UseIdx, + const MCInstrDesc &UseMCID, unsigned UseAdj) const; + + unsigned getPredicationCost(const MachineInstr &MI) const override; unsigned getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, + const MachineInstr &MI, unsigned *PredCost = nullptr) const override; int getInstrLatency(const InstrItineraryData *ItinData, @@ -338,19 +343,18 @@ private: bool hasHighOperandLatency(const TargetSchedModel &SchedModel, const MachineRegisterInfo *MRI, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, + const MachineInstr &DefMI, unsigned DefIdx, + const MachineInstr &UseMI, unsigned UseIdx) const override; bool hasLowDefLatency(const TargetSchedModel &SchedModel, - const MachineInstr *DefMI, + const MachineInstr &DefMI, unsigned DefIdx) const override; /// verifyInstruction - Perform target specific instruction verification. - bool verifyInstruction(const MachineInstr *MI, + bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; - virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI, - Reloc::Model RM) const = 0; + virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI) const = 0; void expandMEMCPY(MachineBasicBlock::iterator) const; @@ -447,7 +451,7 @@ static inline bool isPushOpcode(int Opc) { /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. -ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg); +ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, unsigned &PredReg); unsigned getMatchingCondBranchOpcode(unsigned Opc); @@ -466,21 +470,24 @@ unsigned convertAddSubFlagsOpcode(unsigned OldOpc); /// instructions to materializea destreg = basereg + immediate in ARM / Thumb2 /// code. void emitARMRegPlusImmediate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, DebugLoc dl, - unsigned DestReg, unsigned BaseReg, int NumBytes, + MachineBasicBlock::iterator &MBBI, + const DebugLoc &dl, unsigned DestReg, + unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags = 0); void emitT2RegPlusImmediate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, DebugLoc dl, - unsigned DestReg, unsigned BaseReg, int NumBytes, + MachineBasicBlock::iterator &MBBI, + const DebugLoc &dl, unsigned DestReg, + unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags = 0); void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, DebugLoc dl, - unsigned DestReg, unsigned BaseReg, - int NumBytes, const TargetInstrInfo &TII, - const ARMBaseRegisterInfo& MRI, + MachineBasicBlock::iterator &MBBI, + const DebugLoc &dl, unsigned DestReg, + unsigned BaseReg, int NumBytes, + const TargetInstrInfo &TII, + const ARMBaseRegisterInfo &MRI, unsigned MIFlags = 0); /// Tries to add registers to the reglist of a given base-updating diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index a5207705fc69..aa968efc37d4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -49,12 +49,9 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo() : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {} static unsigned getFramePointerReg(const ARMSubtarget &STI) { - if (STI.isTargetMachO()) { - if (STI.isTargetDarwin() || STI.isThumb1Only()) - return ARM::R7; - else - return ARM::R11; - } else if (STI.isTargetWindows()) + if (STI.isTargetMachO()) + return ARM::R7; + else if (STI.isTargetWindows()) return ARM::R11; else // ARM EABI return STI.isThumb() ? ARM::R7 : ARM::R11; @@ -63,8 +60,11 @@ static unsigned getFramePointerReg(const ARMSubtarget &STI) { const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>(); + bool UseSplitPush = STI.splitFramePushPop(); const MCPhysReg *RegList = - STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; + STI.isTargetDarwin() + ? CSR_iOS_SaveList + : (UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList); const Function *F = MF->getFunction(); if (F->getCallingConv() == CallingConv::GHC) { @@ -75,7 +75,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (STI.isMClass()) { // M-class CPUs have hardware which saves the registers needed to allow a // function conforming to the AAPCS to function as a handler. - return CSR_AAPCS_SaveList; + return UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList; } else if (F->getFnAttribute("interrupt").getValueAsString() == "FIQ") { // Fast interrupt mode gives the handler a private copy of R8-R14, so less // need to be saved to restore user-mode state. @@ -87,6 +87,10 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } } + if (STI.isTargetDarwin() && STI.getTargetLowering()->supportSwiftError() && + F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + return CSR_iOS_SwiftError_SaveList; + if (STI.isTargetDarwin() && F->getCallingConv() == CallingConv::CXX_FAST_TLS) return MF->getInfo<ARMFunctionInfo>()->isSplitCSR() ? CSR_iOS_CXX_TLS_PE_SaveList @@ -110,6 +114,11 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF, if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls return CSR_NoRegs_RegMask; + + if (STI.isTargetDarwin() && STI.getTargetLowering()->supportSwiftError() && + MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + return CSR_iOS_SwiftError_RegMask; + if (STI.isTargetDarwin() && CC == CallingConv::CXX_FAST_TLS) return CSR_iOS_CXX_TLS_RegMask; return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; @@ -167,9 +176,8 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(ARM::R9); // Reserve D16-D31 if the subtarget doesn't support them. if (!STI.hasVFP3() || STI.hasD16()) { - assert(ARM::D31 == ARM::D16 + 15); - for (unsigned i = 0; i != 16; ++i) - Reserved.set(ARM::D16 + i); + static_assert(ARM::D31 == ARM::D16 + 15, "Register list not consecutive!"); + Reserved.set(ARM::D16, ARM::D31 + 1); } const TargetRegisterClass *RC = &ARM::GPRPairRegClass; for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I) @@ -400,13 +408,10 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. -void ARMBaseRegisterInfo:: -emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned SubIdx, int Val, - ARMCC::CondCodes Pred, - unsigned PredReg, unsigned MIFlags) const { +void ARMBaseRegisterInfo::emitLoadConstPool( + MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + const DebugLoc &dl, unsigned DestReg, unsigned SubIdx, int Val, + ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 6a9a45a65687..1eee94857e05 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -166,12 +166,12 @@ public: /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. - virtual void emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, unsigned DestReg, unsigned SubIdx, - int Val, ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0, - unsigned MIFlags = MachineInstr::NoFlags)const; + virtual void + emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + const DebugLoc &dl, unsigned DestReg, unsigned SubIdx, + int Val, ARMCC::CondCodes Pred = ARMCC::AL, + unsigned PredReg = 0, + unsigned MIFlags = MachineInstr::NoFlags) const; /// Code Generation virtual methods... bool requiresRegisterScavenging(const MachineFunction &MF) const override; diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h index a731d00883a1..71b819362404 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h @@ -211,7 +211,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, // First consume all registers that would give an unaligned object. Whether // we go on stack or in regs, no-one will be using them in future. - unsigned RegAlign = RoundUpToAlignment(Align, 4) / 4; + unsigned RegAlign = alignTo(Align, 4) / 4; while (RegIdx % RegAlign != 0 && RegIdx < RegList.size()) State.AllocateReg(RegList[RegIdx++]); diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td index 847ef87c1b26..edb69581b9d3 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td @@ -23,6 +23,12 @@ def CC_ARM_APCS : CallingConv<[ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, + + // A SwiftError is passed in R6. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, @@ -42,6 +48,12 @@ def RetCC_ARM_APCS : CallingConv<[ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, CCIfType<[f32], CCBitConvertToType<i32>>, + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, + + // A SwiftError is returned in R6. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, @@ -151,6 +163,12 @@ def CC_ARM_AAPCS : CallingConv<[ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, + + // A SwiftError is passed in R6. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType<i32>>, CCDelegateTo<CC_ARM_AAPCS_Common> @@ -161,6 +179,12 @@ def RetCC_ARM_AAPCS : CallingConv<[ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, + + // A SwiftError is returned in R6. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType<i32>>, CCDelegateTo<RetCC_ARM_AAPCS_Common> @@ -179,6 +203,12 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, + + // A SwiftError is passed in R6. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // HFAs are passed in a contiguous block of registers, or on the stack CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>, @@ -194,6 +224,12 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, + + // A SwiftError is returned in R6. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, @@ -210,6 +246,14 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>; def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, (sequence "D%u", 15, 8))>; +// The order of callee-saved registers needs to match the order we actually push +// them in FrameLowering, because this order is what's used by +// PrologEpilogInserter to allocate frame index slots. So when R7 is the frame +// pointer, we use this AAPCS alternative. +def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, + R11, R10, R9, R8, + (sequence "D%u", 15, 8))>; + // Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' // and the pointer return value are both passed in R0 in these cases, this can // be partially modelled by treating R0 as a callee-saved register @@ -222,6 +266,9 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, // Also save R7-R4 first to match the stack frame fixed spill areas. def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; +// R6 is used to pass swifterror, remove it from CSR. +def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R6)>; + def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS_ThisReturn, R9))>; @@ -235,10 +282,11 @@ def CSR_iOS_CXX_TLS : CalleeSavedRegs<(add CSR_iOS, (sequence "R%u", 12, 1), (sequence "D%u", 31, 0))>; // CSRs that are handled by prologue, epilogue. -def CSR_iOS_CXX_TLS_PE : CalleeSavedRegs<(add LR)>; +def CSR_iOS_CXX_TLS_PE : CalleeSavedRegs<(add LR, R12, R11, R7, R5, R4)>; // CSRs that are handled explicitly via copies. -def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS, LR)>; +def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS, + CSR_iOS_CXX_TLS_PE)>; // The "interrupt" attribute is used to generate code that is acceptable in // exception-handlers of various kinds. It makes us use a different return diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 55c1684028c2..8511f67dccd5 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -53,6 +53,11 @@ static cl::opt<bool> AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), cl::desc("Adjust basic block layout to better use TB[BH]")); +static cl::opt<unsigned> +CPMaxIteration("arm-constant-island-max-iteration", cl::Hidden, cl::init(30), + cl::desc("The max number of iteration for converge")); + + /// UnknownPadding - Return the worst case padding that could result from /// unknown offset bits. This does not include alignment padding caused by /// known offset bits. @@ -274,6 +279,11 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + const char *getPassName() const override { return "ARM constant island placement and branch shortening pass"; } @@ -293,10 +303,10 @@ namespace { unsigned getCombinedIndex(const MachineInstr *CPEMI); int findInRangeCPEntry(CPUser& U, unsigned UserOffset); bool findAvailableWater(CPUser&U, unsigned UserOffset, - water_iterator &WaterIter); + water_iterator &WaterIter, bool CloserWater); void createNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB); - bool handleConstantPoolUser(unsigned CPUserIndex); + bool handleConstantPoolUser(unsigned CPUserIndex, bool CloserWater); void removeDeadCPEMI(MachineInstr *CPEMI); bool removeUnusedCPEntries(); bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, @@ -456,8 +466,11 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n'); bool CPChange = false; for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) - CPChange |= handleConstantPoolUser(i); - if (CPChange && ++NoCPIters > 30) + // For most inputs, it converges in no more than 5 iterations. + // If it doesn't end in 10, the input may have huge BB or many CPEs. + // In this case, we will try different heuristics. + CPChange |= handleConstantPoolUser(i, NoCPIters >= CPMaxIteration / 2); + if (CPChange && ++NoCPIters > CPMaxIteration) report_fatal_error("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); @@ -478,10 +491,18 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { MadeChange = true; } - // Shrink 32-bit Thumb2 branch, load, and store instructions. + // Shrink 32-bit Thumb2 load and store instructions. if (isThumb2 && !STI->prefers32BitThumb()) MadeChange |= optimizeThumb2Instructions(); + // Shrink 32-bit branch instructions. + if (isThumb && STI->hasV8MBaselineOps()) + MadeChange |= optimizeThumb2Branches(); + + // Optimize jump tables using TBB / TBH. + if (isThumb2) + MadeChange |= optimizeThumb2JumpTables(); + // After a while, this might be made debug-only, but it is not expensive. verify(); @@ -654,7 +675,7 @@ bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) { // have an unconditional branch for whatever reason. MachineBasicBlock *TBB, *FBB; SmallVector<MachineOperand, 4> Cond; - bool TooDifficult = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond); + bool TooDifficult = TII->analyzeBranch(*MBB, TBB, FBB, Cond); return TooDifficult || FBB == nullptr; } @@ -701,14 +722,10 @@ unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { /// information about the sizes of each block and the locations of all /// the jump tables. void ARMConstantIslands::scanFunctionJumpTables() { - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock &MBB = *MBBI; - - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) - if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT) - T2JumpTables.push_back(I); + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &I : MBB) + if (I.isBranch() && I.getOpcode() == ARM::t2BR_JT) + T2JumpTables.push_back(&I); } } @@ -735,22 +752,18 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { adjustBBOffsetsAfter(&MF->front()); // Now go back through the instructions and build up our data structures. - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock &MBB = *MBBI; - + for (MachineBasicBlock &MBB : *MF) { // If this block doesn't fall through into the next MBB, then this is // 'water' that a constant pool island could be placed. if (!BBHasFallthrough(&MBB)) WaterList.push_back(&MBB); - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - if (I->isDebugValue()) + for (MachineInstr &I : MBB) { + if (I.isDebugValue()) continue; - unsigned Opc = I->getOpcode(); - if (I->isBranch()) { + unsigned Opc = I.getOpcode(); + if (I.isBranch()) { bool isCond = false; unsigned Bits = 0; unsigned Scale = 1; @@ -759,7 +772,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { default: continue; // Ignore other JT branches case ARM::t2BR_JT: - T2JumpTables.push_back(I); + T2JumpTables.push_back(&I); continue; // Does not get an entry in ImmBranches case ARM::Bcc: isCond = true; @@ -793,11 +806,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { // Record this immediate branch. unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; - ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc)); + ImmBranches.push_back(ImmBranch(&I, MaxOffs, isCond, UOpc)); } if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET) - PushPopMIs.push_back(I); + PushPopMIs.push_back(&I); if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS || Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB || @@ -805,8 +818,8 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { continue; // Scan the instructions for constant pool operands. - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) - if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) { + for (unsigned op = 0, e = I.getNumOperands(); op != e; ++op) + if (I.getOperand(op).isCPI() || I.getOperand(op).isJTI()) { // We found one. The addressing mode tells us the max displacement // from the PC that this instruction permits. @@ -865,15 +878,15 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { } // Remember that this is a user of a CP entry. - unsigned CPI = I->getOperand(op).getIndex(); - if (I->getOperand(op).isJTI()) { + unsigned CPI = I.getOperand(op).getIndex(); + if (I.getOperand(op).isJTI()) { JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size())); CPI = JumpTableEntryIndices[CPI]; } MachineInstr *CPEMI = CPEMIs[CPI]; unsigned MaxOffs = ((1 << Bits)-1) * Scale; - CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm)); + CPUsers.push_back(CPUser(&I, CPEMI, MaxOffs, NegOk, IsSoImm)); // Increment corresponding CPEntry reference count. CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); @@ -896,15 +909,14 @@ void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) { BBI.Unalign = 0; BBI.PostAlign = 0; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { + for (MachineInstr &I : *MBB) { BBI.Size += TII->GetInstSizeInBytes(I); // For inline asm, GetInstSizeInBytes returns a conservative estimate. // The actual size may be smaller, but still a multiple of the instr size. - if (I->isInlineAsm()) + if (I.isInlineAsm()) BBI.Unalign = isThumb ? 1 : 2; // Also consider instructions that may be shrunk later. - else if (isThumb && mayOptimizeThumb2Instruction(I)) + else if (isThumb && mayOptimizeThumb2Instruction(&I)) BBI.Unalign = 1; } @@ -929,7 +941,7 @@ unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const { // Sum instructions before MI in MBB. for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { assert(I != MBB->end() && "Didn't find MI in its own basic block?"); - Offset += TII->GetInstSizeInBytes(I); + Offset += TII->GetInstSizeInBytes(*I); } return Offset; } @@ -1108,7 +1120,7 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset, Growth = CPEEnd - NextBlockOffset; // Compute the padding that would go at the end of the CPE to align the next // block. - Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment); + Growth += OffsetToAlignment(CPEEnd, 1ULL << NextBlockAlignment); // If the CPE is to be inserted before the instruction, that will raise // the offset of the instruction. Also account for unknown alignment padding @@ -1285,11 +1297,27 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { /// move to a lower address, so search backward from the end of the list and /// prefer the first water that is in range. bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, - water_iterator &WaterIter) { + water_iterator &WaterIter, + bool CloserWater) { if (WaterList.empty()) return false; unsigned BestGrowth = ~0u; + // The nearest water without splitting the UserBB is right after it. + // If the distance is still large (we have a big BB), then we need to split it + // if we don't converge after certain iterations. This helps the following + // situation to converge: + // BB0: + // Big BB + // BB1: + // Constant Pool + // When a CP access is out of range, BB0 may be used as water. However, + // inserting islands between BB0 and BB1 makes other accesses out of range. + MachineBasicBlock *UserBB = U.MI->getParent(); + unsigned MinNoSplitDisp = + BBInfo[UserBB->getNumber()].postOffset(getCPELogAlign(U.CPEMI)); + if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2) + return false; for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();; --IP) { MachineBasicBlock* WaterBB = *IP; @@ -1301,6 +1329,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, // should be relatively uncommon and when it does happen, we want to be // sure to take advantage of it for all the CPEs near that block, so that // we don't insert more branches than necessary. + // When CloserWater is true, we try to find the lowest address after (or + // equal to) user MI's BB no matter of padding growth. unsigned Growth; if (isWaterInRange(UserOffset, WaterBB, U, Growth) && (WaterBB->getNumber() < U.HighWaterMark->getNumber() || @@ -1312,8 +1342,11 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber() << " Growth=" << Growth << '\n'); - // Keep looking unless it is perfect. - if (BestGrowth == 0) + if (CloserWater && WaterBB == U.MI->getParent()) + return true; + // Keep looking unless it is perfect and we're not looking for the lowest + // possible address. + if (!CloserWater && BestGrowth == 0) return true; } if (IP == B) @@ -1416,7 +1449,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // iterates at least once. BaseInsertOffset = std::max(UserBBI.postOffset() - UPad - 8, - UserOffset + TII->GetInstSizeInBytes(UserMI) + 1); + UserOffset + TII->GetInstSizeInBytes(*UserMI) + 1); DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset)); } unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad + @@ -1426,11 +1459,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, unsigned CPUIndex = CPUserIndex+1; unsigned NumCPUsers = CPUsers.size(); MachineInstr *LastIT = nullptr; - for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); + for (unsigned Offset = UserOffset + TII->GetInstSizeInBytes(*UserMI); Offset < BaseInsertOffset; - Offset += TII->GetInstSizeInBytes(MI), MI = std::next(MI)) { + Offset += TII->GetInstSizeInBytes(*MI), MI = std::next(MI)) { assert(MI != UserMBB->end() && "Fell off end of block"); - if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { + if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == &*MI) { CPUser &U = CPUsers[CPUIndex]; if (!isOffsetInRange(Offset, EndInsertOffset, U)) { // Shift intertion point by one unit of alignment so it is within reach. @@ -1447,7 +1480,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // Remember the last IT instruction. if (MI->getOpcode() == ARM::t2IT) - LastIT = MI; + LastIT = &*MI; } --MI; @@ -1455,23 +1488,24 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // Avoid splitting an IT block. if (LastIT) { unsigned PredReg = 0; - ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg); + ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg); if (CC != ARMCC::AL) MI = LastIT; } // We really must not split an IT block. DEBUG(unsigned PredReg; - assert(!isThumb || getITInstrPredicate(MI, PredReg) == ARMCC::AL)); + assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL)); - NewMBB = splitBlockBeforeInstr(MI); + NewMBB = splitBlockBeforeInstr(&*MI); } /// handleConstantPoolUser - Analyze the specified user, checking to see if it /// is out-of-range. If so, pick up the constant pool value and move it some /// place in-range. Return true if we changed any addresses (thus must run /// another pass of branch lengthening), false otherwise. -bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { +bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex, + bool CloserWater) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; @@ -1494,7 +1528,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock(); MachineBasicBlock *NewMBB; water_iterator IP; - if (findAvailableWater(U, UserOffset, IP)) { + if (findAvailableWater(U, UserOffset, IP, CloserWater)) { DEBUG(dbgs() << "Found water in range\n"); MachineBasicBlock *WaterBB = *IP; @@ -1584,7 +1618,7 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { CPEBB->setAlignment(0); } else // Entries are sorted by descending alignment, so realign from the front. - CPEBB->setAlignment(getCPELogAlign(CPEBB->begin())); + CPEBB->setAlignment(getCPELogAlign(&*CPEBB->begin())); adjustBBOffsetsAfter(CPEBB); // An island has only one predecessor BB and one successor BB. Check if @@ -1728,7 +1762,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { splitBlockBeforeInstr(MI); // No need for the branch to the next block. We're adding an unconditional // branch to the destination. - int delta = TII->GetInstSizeInBytes(&MBB->back()); + int delta = TII->GetInstSizeInBytes(MBB->back()); BBInfo[MBB->getNumber()].Size -= delta; MBB->back().eraseFromParent(); // BBInfo[SplitBB].Offset is wrong temporarily, fixed below @@ -1744,18 +1778,18 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode())) .addMBB(NextBB).addImm(CC).addReg(CCReg); Br.MI = &MBB->back(); - BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); + BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back()); if (isThumb) BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB) .addImm(ARMCC::AL).addReg(0); else BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); - BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); + BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back()); unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr); ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr)); // Remove the old conditional branch. It may or may not still be in MBB. - BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI); + BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(*MI); MI->eraseFromParent(); adjustBBOffsetsAfter(MBB); return true; @@ -1852,8 +1886,6 @@ bool ARMConstantIslands::optimizeThumb2Instructions() { } } - MadeChange |= optimizeThumb2Branches(); - MadeChange |= optimizeThumb2JumpTables(); return MadeChange; } @@ -1910,7 +1942,7 @@ bool ARMConstantIslands::optimizeThumb2Branches() { NewOpc = 0; unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg); if (Pred == ARMCC::EQ) NewOpc = ARM::tCBZ; else if (Pred == ARMCC::NE) @@ -1928,7 +1960,7 @@ bool ARMConstantIslands::optimizeThumb2Branches() { --CmpMI; if (CmpMI->getOpcode() == ARM::tCMPi8) { unsigned Reg = CmpMI->getOperand(0).getReg(); - Pred = getInstrPredicate(CmpMI, PredReg); + Pred = getInstrPredicate(*CmpMI, PredReg); if (Pred == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 && isARMLowRegister(Reg)) { @@ -2170,8 +2202,8 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { } } - unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); - unsigned OrigSize = TII->GetInstSizeInBytes(MI); + unsigned NewSize = TII->GetInstSizeInBytes(*NewJTMI); + unsigned OrigSize = TII->GetInstSizeInBytes(*MI); MI->eraseFromParent(); int Delta = OrigSize - NewSize + DeadSize; @@ -2240,13 +2272,13 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { MachineFunction::iterator OldPrior = std::prev(BBi); // If the block terminator isn't analyzable, don't try to move the block - bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond); + bool B = TII->analyzeBranch(*BB, TBB, FBB, Cond); // If the block ends in an unconditional branch, move it. The prior block // has to have an analyzable terminator for us to move this one. Be paranoid // and make sure we're not trying to move the entry block of the function. - if (!B && Cond.empty() && BB != MF->begin() && - !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) { + if (!B && Cond.empty() && BB != &MF->front() && + !TII->analyzeBranch(*OldPrior, TBB, FBB, CondPrior)) { BB->moveAfter(JTBB); OldPrior->updateTerminator(); BB->updateTerminator(); diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp index c9849b2605ea..c0db001cb6f1 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -50,11 +50,18 @@ const char *ARMConstantPoolValue::getModifierText() const { switch (Modifier) { // FIXME: Are these case sensitive? It'd be nice to lower-case all the // strings if that's legal. - case ARMCP::no_modifier: return "none"; - case ARMCP::TLSGD: return "tlsgd"; - case ARMCP::GOT_PREL: return "GOT_PREL"; - case ARMCP::GOTTPOFF: return "gottpoff"; - case ARMCP::TPOFF: return "tpoff"; + case ARMCP::no_modifier: + return "none"; + case ARMCP::TLSGD: + return "tlsgd"; + case ARMCP::GOT_PREL: + return "GOT_PREL"; + case ARMCP::GOTTPOFF: + return "gottpoff"; + case ARMCP::TPOFF: + return "tpoff"; + case ARMCP::SECREL: + return "secrel32"; } llvm_unreachable("Unknown modifier!"); } @@ -74,9 +81,9 @@ bool ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) { if (ACPV->Kind == Kind && ACPV->PCAdjust == PCAdjust && - ACPV->Modifier == Modifier) { - if (ACPV->LabelId == LabelId) - return true; + ACPV->Modifier == Modifier && + ACPV->LabelId == LabelId && + ACPV->AddCurrentAddress == AddCurrentAddress) { // Two PC relative constpool entries containing the same GV address or // external symbols. FIXME: What about blockaddress? if (Kind == ARMCP::CPValue || Kind == ARMCP::CPExtSymbol) @@ -85,7 +92,7 @@ ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) { return false; } -void ARMConstantPoolValue::dump() const { +LLVM_DUMP_METHOD void ARMConstantPoolValue::dump() const { errs() << " " << *this; } diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h index 6b18a4e52878..c07331d71dad 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h @@ -37,11 +37,12 @@ namespace ARMCP { }; enum ARMCPModifier { - no_modifier, - TLSGD, - GOT_PREL, - GOTTPOFF, - TPOFF + no_modifier, /// None + TLSGD, /// Thread Local Storage (General Dynamic Mode) + GOT_PREL, /// Global Offset Table, PC Relative + GOTTPOFF, /// Global Offset Table, Thread Pointer Offset + TPOFF, /// Thread Pointer Offset + SECREL, /// Section Relative (Windows TLS) }; } diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 56f3498e1204..56f5728ecfb8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -20,6 +20,7 @@ #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -50,6 +51,11 @@ namespace { bool runOnMachineFunction(MachineFunction &Fn) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + const char *getPassName() const override { return "ARM pseudo instruction expansion pass"; } @@ -58,7 +64,8 @@ namespace { void TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); bool ExpandMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI); + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); bool ExpandMBB(MachineBasicBlock &MBB); void ExpandVLD(MachineBasicBlock::iterator &MBBI); void ExpandVST(MachineBasicBlock::iterator &MBBI); @@ -67,6 +74,14 @@ namespace { unsigned Opc, bool IsExt); void ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); + bool ExpandCMP_SWAP(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned LdrexOp, + unsigned StrexOp, unsigned UxtOp, + MachineBasicBlock::iterator &NextMBBI); + + bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); }; char ARMExpandPseudo::ID = 0; } @@ -651,7 +666,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; @@ -737,8 +752,242 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MI.eraseFromParent(); } +static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) { + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + MBB->addLiveIn(*I); +} + +/// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as +/// possible. This only gets used at -O0 so we don't care about efficiency of the +/// generated code. +bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned LdrexOp, unsigned StrexOp, + unsigned UxtOp, + MachineBasicBlock::iterator &NextMBBI) { + bool IsThumb = STI->isThumb(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineOperand &Dest = MI.getOperand(0); + unsigned StatusReg = MI.getOperand(1).getReg(); + MachineOperand &Addr = MI.getOperand(2); + MachineOperand &Desired = MI.getOperand(3); + MachineOperand &New = MI.getOperand(4); + + LivePhysRegs LiveRegs(&TII->getRegisterInfo()); + LiveRegs.addLiveOuts(MBB); + for (auto I = std::prev(MBB.end()); I != MBBI; --I) + LiveRegs.stepBackward(*I); + + MachineFunction *MF = MBB.getParent(); + auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + MF->insert(++MBB.getIterator(), LoadCmpBB); + MF->insert(++LoadCmpBB->getIterator(), StoreBB); + MF->insert(++StoreBB->getIterator(), DoneBB); + + if (UxtOp) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, DL, TII->get(UxtOp), Desired.getReg()) + .addReg(Desired.getReg(), RegState::Kill); + if (!IsThumb) + MIB.addImm(0); + AddDefaultPred(MIB); + } + + // .Lloadcmp: + // ldrex rDest, [rAddr] + // cmp rDest, rDesired + // bne .Ldone + LoadCmpBB->addLiveIn(Addr.getReg()); + LoadCmpBB->addLiveIn(Dest.getReg()); + LoadCmpBB->addLiveIn(Desired.getReg()); + addPostLoopLiveIns(LoadCmpBB, LiveRegs); + + MachineInstrBuilder MIB; + MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg()); + MIB.addReg(Addr.getReg()); + if (LdrexOp == ARM::t2LDREX) + MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset. + AddDefaultPred(MIB); + + unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; + AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) + .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) + .addOperand(Desired)); + unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; + BuildMI(LoadCmpBB, DL, TII->get(Bcc)) + .addMBB(DoneBB) + .addImm(ARMCC::NE) + .addReg(ARM::CPSR, RegState::Kill); + LoadCmpBB->addSuccessor(DoneBB); + LoadCmpBB->addSuccessor(StoreBB); + + // .Lstore: + // strex rStatus, rNew, [rAddr] + // cmp rStatus, #0 + // bne .Lloadcmp + StoreBB->addLiveIn(Addr.getReg()); + StoreBB->addLiveIn(New.getReg()); + addPostLoopLiveIns(StoreBB, LiveRegs); + + + MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg); + MIB.addOperand(New); + MIB.addOperand(Addr); + if (StrexOp == ARM::t2STREX) + MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset. + AddDefaultPred(MIB); + + unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; + AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri)) + .addReg(StatusReg, RegState::Kill) + .addImm(0)); + BuildMI(StoreBB, DL, TII->get(Bcc)) + .addMBB(LoadCmpBB) + .addImm(ARMCC::NE) + .addReg(ARM::CPSR, RegState::Kill); + StoreBB->addSuccessor(LoadCmpBB); + StoreBB->addSuccessor(DoneBB); + + DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); + DoneBB->transferSuccessors(&MBB); + addPostLoopLiveIns(DoneBB, LiveRegs); + + MBB.addSuccessor(LoadCmpBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + return true; +} + +/// ARM's ldrexd/strexd take a consecutive register pair (represented as a +/// single GPRPair register), Thumb's take two separate registers so we need to +/// extract the subregs from the pair. +static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg, + unsigned Flags, bool IsThumb, + const TargetRegisterInfo *TRI) { + if (IsThumb) { + unsigned RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0); + unsigned RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1); + MIB.addReg(RegLo, Flags | getKillRegState(Reg.isDead())); + MIB.addReg(RegHi, Flags | getKillRegState(Reg.isDead())); + } else + MIB.addReg(Reg.getReg(), Flags | getKillRegState(Reg.isDead())); +} + +/// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop. +bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + bool IsThumb = STI->isThumb(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineOperand &Dest = MI.getOperand(0); + unsigned StatusReg = MI.getOperand(1).getReg(); + MachineOperand &Addr = MI.getOperand(2); + MachineOperand &Desired = MI.getOperand(3); + MachineOperand &New = MI.getOperand(4); + + unsigned DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0); + unsigned DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1); + unsigned DesiredLo = TRI->getSubReg(Desired.getReg(), ARM::gsub_0); + unsigned DesiredHi = TRI->getSubReg(Desired.getReg(), ARM::gsub_1); + + LivePhysRegs LiveRegs(&TII->getRegisterInfo()); + LiveRegs.addLiveOuts(MBB); + for (auto I = std::prev(MBB.end()); I != MBBI; --I) + LiveRegs.stepBackward(*I); + + MachineFunction *MF = MBB.getParent(); + auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + MF->insert(++MBB.getIterator(), LoadCmpBB); + MF->insert(++LoadCmpBB->getIterator(), StoreBB); + MF->insert(++StoreBB->getIterator(), DoneBB); + + // .Lloadcmp: + // ldrexd rDestLo, rDestHi, [rAddr] + // cmp rDestLo, rDesiredLo + // sbcs rStatus<dead>, rDestHi, rDesiredHi + // bne .Ldone + LoadCmpBB->addLiveIn(Addr.getReg()); + LoadCmpBB->addLiveIn(Dest.getReg()); + LoadCmpBB->addLiveIn(Desired.getReg()); + addPostLoopLiveIns(LoadCmpBB, LiveRegs); + + unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; + MachineInstrBuilder MIB; + MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD)); + addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI); + MIB.addReg(Addr.getReg()); + AddDefaultPred(MIB); + + unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; + AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) + .addReg(DestLo, getKillRegState(Dest.isDead())) + .addReg(DesiredLo, getKillRegState(Desired.isDead()))); + + unsigned SBCrr = IsThumb ? ARM::t2SBCrr : ARM::SBCrr; + MIB = BuildMI(LoadCmpBB, DL, TII->get(SBCrr)) + .addReg(StatusReg, RegState::Define | RegState::Dead) + .addReg(DestHi, getKillRegState(Dest.isDead())) + .addReg(DesiredHi, getKillRegState(Desired.isDead())); + AddDefaultPred(MIB); + MIB.addReg(ARM::CPSR, RegState::Kill); + + unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; + BuildMI(LoadCmpBB, DL, TII->get(Bcc)) + .addMBB(DoneBB) + .addImm(ARMCC::NE) + .addReg(ARM::CPSR, RegState::Kill); + LoadCmpBB->addSuccessor(DoneBB); + LoadCmpBB->addSuccessor(StoreBB); + + // .Lstore: + // strexd rStatus, rNewLo, rNewHi, [rAddr] + // cmp rStatus, #0 + // bne .Lloadcmp + StoreBB->addLiveIn(Addr.getReg()); + StoreBB->addLiveIn(New.getReg()); + addPostLoopLiveIns(StoreBB, LiveRegs); + + unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; + MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg); + addExclusiveRegPair(MIB, New, 0, IsThumb, TRI); + MIB.addOperand(Addr); + AddDefaultPred(MIB); + + unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; + AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri)) + .addReg(StatusReg, RegState::Kill) + .addImm(0)); + BuildMI(StoreBB, DL, TII->get(Bcc)) + .addMBB(LoadCmpBB) + .addImm(ARMCC::NE) + .addReg(ARM::CPSR, RegState::Kill); + StoreBB->addSuccessor(LoadCmpBB); + StoreBB->addSuccessor(DoneBB); + + DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); + DoneBB->transferSuccessors(&MBB); + addPostLoopLiveIns(DoneBB, LiveRegs); + + MBB.addSuccessor(LoadCmpBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + return true; +} + + bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI) { + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); switch (Opcode) { @@ -784,7 +1033,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addReg(JumpTarget.getReg(), RegState::Kill); } - MachineInstr *NewMI = std::prev(MBBI); + auto NewMI = std::prev(MBBI); for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) NewMI->addOperand(MBBI->getOperand(i)); @@ -1375,6 +1624,30 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; + + case ARM::CMP_SWAP_8: + if (STI->isThumb()) + return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, + ARM::tUXTB, NextMBBI); + else + return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, + ARM::UXTB, NextMBBI); + case ARM::CMP_SWAP_16: + if (STI->isThumb()) + return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, + ARM::tUXTH, NextMBBI); + else + return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, + ARM::UXTH, NextMBBI); + case ARM::CMP_SWAP_32: + if (STI->isThumb()) + return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, + NextMBBI); + else + return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI); + + case ARM::CMP_SWAP_64: + return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); } } @@ -1384,7 +1657,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineBasicBlock::iterator NMBBI = std::next(MBBI); - Modified |= ExpandMI(MBB, MBBI); + Modified |= ExpandMI(MBB, MBBI, NMBBI); MBBI = NMBBI; } diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index ff2fcfa349dc..13724da5d4f7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -22,7 +22,6 @@ #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -41,7 +40,6 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -110,11 +108,6 @@ class ARMFastISel final : public FastISel { const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); - unsigned fastEmitInst_rrr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - unsigned Op2, bool Op2IsKill); unsigned fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, @@ -164,6 +157,7 @@ class ARMFastISel final : public FastISel { // Utility routines. private: + bool isPositionIndependent() const; bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, @@ -215,7 +209,7 @@ class ARMFastISel final : public FastISel { const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); void AddLoadStoreOperands(MVT VT, Address &Addr, const MachineInstrBuilder &MIB, - unsigned Flags, bool useAM3); + MachineMemOperand::Flags Flags, bool useAM3); }; } // end anonymous namespace @@ -331,38 +325,6 @@ unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode, return ResultReg; } -unsigned ARMFastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - unsigned Op2, bool Op2IsKill) { - unsigned ResultReg = createResultReg(RC); - const MCInstrDesc &II = TII.get(MachineInstOpcode); - - // Make sure the input operands are sufficiently constrained to be legal - // for this instruction. - Op0 = constrainOperandRegClass(II, Op0, 1); - Op1 = constrainOperandRegClass(II, Op1, 2); - Op2 = constrainOperandRegClass(II, Op1, 3); - - if (II.getNumDefs() >= 1) { - AddOptionalDefs( - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addReg(Op2, Op2IsKill * RegState::Kill)); - } else { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addReg(Op2, Op2IsKill * RegState::Kill)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg) - .addReg(II.ImplicitDefs[0])); - } - return ResultReg; -} - unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, @@ -576,12 +538,15 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { return ResultReg; } +bool ARMFastISel::isPositionIndependent() const { + return TLI.isPositionIndependent(); +} + unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { // For now 32-bit only. if (VT != MVT::i32 || GV->isThreadLocal()) return 0; - Reloc::Model RelocM = TM.getRelocationModel(); - bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM); + bool IsIndirect = Subtarget->isGVIndirectSymbol(GV); const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass; unsigned DestReg = createResultReg(RC); @@ -591,23 +556,20 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { bool IsThreadLocal = GVar && GVar->isThreadLocal(); if (!Subtarget->isTargetMachO() && IsThreadLocal) return 0; + bool IsPositionIndependent = isPositionIndependent(); // Use movw+movt when possible, it avoids constant pool entries. // Non-darwin targets only support static movt relocations in FastISel. if (Subtarget->useMovt(*FuncInfo.MF) && - (Subtarget->isTargetMachO() || RelocM == Reloc::Static)) { + (Subtarget->isTargetMachO() || !IsPositionIndependent)) { unsigned Opc; unsigned char TF = 0; if (Subtarget->isTargetMachO()) TF = ARMII::MO_NONLAZY; - switch (RelocM) { - case Reloc::PIC_: + if (IsPositionIndependent) Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel; - break; - default: + else Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm; - break; - } AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF)); } else { @@ -618,12 +580,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { Align = DL.getTypeAllocSize(GV->getType()); } - if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_) + if (Subtarget->isTargetELF() && IsPositionIndependent) return ARMLowerPICELF(GV, Align, VT); // Grab index. - unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : - (Subtarget->isThumb() ? 4 : 8); + unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0; unsigned Id = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id, ARMCP::CPValue, @@ -633,10 +594,10 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { // Load value. MachineInstrBuilder MIB; if (isThumb2) { - unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; + unsigned Opc = IsPositionIndependent ? ARM::t2LDRpci_pic : ARM::t2LDRpci; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg).addConstantPoolIndex(Idx); - if (RelocM == Reloc::PIC_) + if (IsPositionIndependent) MIB.addImm(Id); AddOptionalDefs(MIB); } else { @@ -648,7 +609,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { .addImm(0); AddOptionalDefs(MIB); - if (RelocM == Reloc::PIC_) { + if (IsPositionIndependent) { unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD; unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); @@ -912,7 +873,8 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) { void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr, const MachineInstrBuilder &MIB, - unsigned Flags, bool useAM3) { + MachineMemOperand::Flags Flags, + bool useAM3) { // addrmode5 output depends on the selection dag addressing dividing the // offset by 4 that it then later multiplies. Do this here as well. if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64) @@ -931,7 +893,7 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr, // ARM halfword load/stores and signed byte loads need an additional // operand. if (useAM3) { - signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; + int Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; MIB.addReg(0); MIB.addImm(Imm); } else { @@ -945,7 +907,7 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr, // ARM halfword load/stores and signed byte loads need an additional // operand. if (useAM3) { - signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; + int Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; MIB.addReg(0); MIB.addImm(Imm); } else { @@ -1062,6 +1024,21 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { if (cast<LoadInst>(I)->isAtomic()) return false; + const Value *SV = I->getOperand(0); + if (TLI.supportSwiftError()) { + // Swifterror values can come from either a function parameter with + // swifterror attribute or an alloca with swifterror attribute. + if (const Argument *Arg = dyn_cast<Argument>(SV)) { + if (Arg->hasSwiftErrorAttr()) + return false; + } + + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { + if (Alloca->isSwiftError()) + return false; + } + } + // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(I->getType(), VT)) @@ -1177,6 +1154,21 @@ bool ARMFastISel::SelectStore(const Instruction *I) { if (cast<StoreInst>(I)->isAtomic()) return false; + const Value *PtrV = I->getOperand(1); + if (TLI.supportSwiftError()) { + // Swifterror values can come from either a function parameter with + // swifterror attribute or an alloca with swifterror attribute. + if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { + if (Arg->hasSwiftErrorAttr()) + return false; + } + + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { + if (Alloca->isSwiftError()) + return false; + } + } + // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) @@ -1726,6 +1718,13 @@ bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) { if (!isTypeLegal(Ty, VT)) return false; + // Many ABIs do not provide a libcall for standalone remainder, so we need to + // use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double + // multi-reg returns, we'll have to bail out. + if (!TLI.hasStandaloneRem(VT)) { + return false; + } + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i8) LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8; @@ -1847,6 +1846,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, } // Fallthrough case CallingConv::C: + case CallingConv::CXX_FAST_TLS: // Use target triple & subtarget features to do actual dispatch. if (Subtarget->isAAPCS_ABI()) { if (Subtarget->hasVFP2() && @@ -1858,6 +1858,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); } case CallingConv::ARM_AAPCS_VFP: + case CallingConv::Swift: if (!isVarArg) return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); // Fall through to soft float variant, variadic functions don't @@ -2083,6 +2084,10 @@ bool ARMFastISel::SelectRet(const Instruction *I) { if (!FuncInfo.CanLowerReturn) return false; + if (TLI.supportSwiftError() && + F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + return false; + if (TLI.supportSplitCSR(FuncInfo.MF)) return false; @@ -2295,8 +2300,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, // TODO: Avoid some calling conventions? - PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + FunctionType *FTy = CS.getFunctionType(); bool isVarArg = FTy->isVarArg(); // Handle *simple* calls for now. @@ -2345,6 +2349,8 @@ bool ARMFastISel::SelectCall(const Instruction *I, // FIXME: Only handle *easy* calls for now. if (CS.paramHasAttr(AttrInd, Attribute::InReg) || CS.paramHasAttr(AttrInd, Attribute::StructRet) || + CS.paramHasAttr(AttrInd, Attribute::SwiftSelf) || + CS.paramHasAttr(AttrInd, Attribute::SwiftError) || CS.paramHasAttr(AttrInd, Attribute::Nest) || CS.paramHasAttr(AttrInd, Attribute::ByVal)) return false; @@ -2394,22 +2400,15 @@ bool ARMFastISel::SelectCall(const Instruction *I, MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); - unsigned char OpFlags = 0; - - // Add MO_PLT for global address or external symbol in the PIC relocation - // model. - if (Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_) - OpFlags = ARMII::MO_PLT; - // ARM calls don't take a predicate, but tBL / tBLX do. if(isThumb2) AddDefaultPred(MIB); if (UseReg) MIB.addReg(CalleeReg); else if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, OpFlags); + MIB.addGlobalAddress(GV, 0, 0); else - MIB.addExternalSymbol(IntrMemName, OpFlags); + MIB.addExternalSymbol(IntrMemName, 0); // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) @@ -2942,8 +2941,7 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT) { - bool UseGOT_PREL = - !(GV->hasHiddenVisibility() || GV->hasLocalLinkage()); + bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); LLVMContext *Context = &MF->getFunction()->getContext(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); @@ -3006,6 +3004,7 @@ bool ARMFastISel::fastLowerArguments() { case CallingConv::ARM_AAPCS_VFP: case CallingConv::ARM_AAPCS: case CallingConv::ARM_APCS: + case CallingConv::Swift: break; } @@ -3019,6 +3018,8 @@ bool ARMFastISel::fastLowerArguments() { if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) || F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || + F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) || + F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) || F->getAttributes().hasAttribute(Idx, Attribute::ByVal)) return false; diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index c5990bb7d1fb..e8c9f610ea64 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -98,35 +98,32 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); } -static bool isCSRestore(MachineInstr *MI, - const ARMBaseInstrInfo &TII, +static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII, const MCPhysReg *CSRegs) { // Integer spill area is handled with "pop". - if (isPopOpcode(MI->getOpcode())) { + if (isPopOpcode(MI.getOpcode())) { // The first two operands are predicates. The last two are // imp-def and imp-use of SP. Check everything in between. - for (int i = 5, e = MI->getNumOperands(); i != e; ++i) - if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) + for (int i = 5, e = MI.getNumOperands(); i != e; ++i) + if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs)) return false; return true; } - if ((MI->getOpcode() == ARM::LDR_POST_IMM || - MI->getOpcode() == ARM::LDR_POST_REG || - MI->getOpcode() == ARM::t2LDR_POST) && - isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) && - MI->getOperand(1).getReg() == ARM::SP) + if ((MI.getOpcode() == ARM::LDR_POST_IMM || + MI.getOpcode() == ARM::LDR_POST_REG || + MI.getOpcode() == ARM::t2LDR_POST) && + isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs) && + MI.getOperand(1).getReg() == ARM::SP) return true; return false; } -static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, DebugLoc dl, - const ARMBaseInstrInfo &TII, unsigned DestReg, - unsigned SrcReg, int NumBytes, - unsigned MIFlags = MachineInstr::NoFlags, - ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0) { +static void emitRegPlusImmediate( + bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, + unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags, + ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { if (isARM) emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, Pred, PredReg, TII, MIFlags); @@ -136,7 +133,7 @@ static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, } static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, DebugLoc dl, + MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags, ARMCC::CondCodes Pred = ARMCC::AL, @@ -145,9 +142,9 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MIFlags, Pred, PredReg); } -static int sizeOfSPAdjustment(const MachineInstr *MI) { +static int sizeOfSPAdjustment(const MachineInstr &MI) { int RegSize; - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { case ARM::VSTMDDB_UPD: RegSize = 8; break; @@ -165,7 +162,7 @@ static int sizeOfSPAdjustment(const MachineInstr *MI) { int count = 0; // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ // pred) so the list starts at 4. - for (int i = MI->getNumOperands() - 1; i >= 4; --i) + for (int i = MI.getNumOperands() - 1; i >= 4; --i) count += RegSize; return count; } @@ -206,7 +203,8 @@ struct StackAdjustingInsts { } void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB, - DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) { + const DebugLoc &dl, const ARMBaseInstrInfo &TII, + bool HasFP) { unsigned CFAOffset = 0; for (auto &Info : Insts) { if (HasFP && !Info.BeforeFPSet) @@ -235,7 +233,7 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc DL, const unsigned Reg, + const DebugLoc &DL, const unsigned Reg, const unsigned Alignment, const bool MustBeSingleInstruction) { const ARMSubtarget &AST = @@ -355,7 +353,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.isTargetDarwin()) { + if (STI.splitFramePushPop()) { GPRCS2Size += 4; break; } @@ -416,7 +414,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // .cfi_offset operations will reflect that. if (DPRGapSize) { assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs"); - if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize)) + if (tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize)) DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize); else { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize, @@ -430,7 +428,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) { - DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI)); + DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI)); LastPush = MBBI++; } } @@ -485,7 +483,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP) - .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP, RegState::Kill) .addReg(ARM::R4, RegState::Kill) .setMIFlags(MachineInstr::FrameSetup))); NumBytes = 0; @@ -494,7 +492,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, if (NumBytes) { // Adjust SP after all the callee-save spills. if (AFI->getNumAlignedDPRCS2Regs() == 0 && - tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) + tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes)) DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes); else { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, @@ -522,7 +520,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // that push. if (HasFP) { MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); - unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push); + unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push); emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, FramePtr, ARM::SP, PushSize + FramePtrOffsetInPush, @@ -559,7 +557,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.isTargetDarwin()) + if (STI.splitFramePushPop()) break; // fallthrough case ARM::R0: @@ -592,7 +590,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.isTargetDarwin()) { + if (STI.splitFramePushPop()) { unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); unsigned Offset = MFI->getObjectOffset(FI); unsigned CFIIndex = MMI.addFrameInst( @@ -727,8 +725,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (MBBI != MBB.begin()) { do { --MBBI; - } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); - if (!isCSRestore(MBBI, TII, CSRegs)) + } while (MBBI != MBB.begin() && isCSRestore(*MBBI, TII, CSRegs)); + if (!isCSRestore(*MBBI, TII, CSRegs)) ++MBBI; } @@ -774,8 +772,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(FramePtr)); } } else if (NumBytes && - !tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); + !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. if (AFI->getDPRCalleeSavedAreaSize()) { @@ -904,33 +902,27 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned LastReg = 0; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetDarwin())) continue; + if (!(Func)(Reg, STI.splitFramePushPop())) continue; // D-registers in the aligned area DPRCS2 are NOT spilled here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) continue; - // Add the callee-saved register as live-in unless it's LR and - // @llvm.returnaddress is called. If LR is returned for - // @llvm.returnaddress then it's already added to the function and - // entry block live-in sets. - bool isKill = true; - if (Reg == ARM::LR) { - if (MF.getFrameInfo()->isReturnAddressTaken() && - MF.getRegInfo().isLiveIn(Reg)) - isKill = false; - } - - if (isKill) + bool isLiveIn = MF.getRegInfo().isLiveIn(Reg); + if (!isLiveIn) MBB.addLiveIn(Reg); - // If NoGap is true, push consecutive registers and then leave the rest // for other instructions. e.g. // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11} if (NoGap && LastReg && LastReg != Reg-1) break; LastReg = Reg; - Regs.push_back(std::make_pair(Reg, isKill)); + // Do not set a kill flag on values that are also marked as live-in. This + // happens with the @llvm-returnaddress intrinsic and with arguments + // passed in callee saved registers. + // Omitting the kill flags is conservatively correct even if the live-in + // is not used after all. + Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn)); } if (Regs.empty()) @@ -991,7 +983,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetDarwin())) continue; + if (!(Func)(Reg, STI.splitFramePushPop())) continue; // The aligned reloads from area DPRCS2 are not inserted here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -1027,7 +1019,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); if (DeleteRet && MI != MBB.end()) { - MIB.copyImplicitOps(&*MI); + MIB.copyImplicitOps(*MI); MI->eraseFromParent(); } MI = MIB; @@ -1367,7 +1359,7 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, unsigned FnSize = 0; for (auto &MBB : MF) { for (auto &MI : MBB) - FnSize += TII.GetInstSizeInBytes(&MI); + FnSize += TII.GetInstSizeInBytes(MI); } return FnSize; } @@ -1485,6 +1477,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, bool CS1Spilled = false; bool LRSpilled = false; unsigned NumGPRSpills = 0; + unsigned NumFPRSpills = 0; SmallVector<unsigned, 4> UnspilledCS1GPRs; SmallVector<unsigned, 4> UnspilledCS2GPRs; const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( @@ -1539,13 +1532,22 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, CanEliminateFrame = false; } - if (!ARM::GPRRegClass.contains(Reg)) + if (!ARM::GPRRegClass.contains(Reg)) { + if (Spilled) { + if (ARM::SPRRegClass.contains(Reg)) + NumFPRSpills++; + else if (ARM::DPRRegClass.contains(Reg)) + NumFPRSpills += 2; + else if (ARM::QPRRegClass.contains(Reg)) + NumFPRSpills += 4; + } continue; + } if (Spilled) { NumGPRSpills++; - if (!STI.isTargetDarwin()) { + if (!STI.splitFramePushPop()) { if (Reg == ARM::LR) LRSpilled = true; CS1Spilled = true; @@ -1567,7 +1569,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, break; } } else { - if (!STI.isTargetDarwin()) { + if (!STI.splitFramePushPop()) { UnspilledCS1GPRs.push_back(Reg); continue; } @@ -1613,12 +1615,21 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // FIXME: We could add logic to be more precise about negative offsets // and which instructions will need a scratch register for them. Is it // worth the effort and added fragility? - bool BigStack = (RS && (MFI->estimateStackSize(MF) + - ((hasFP(MF) && AFI->hasStackFrame()) ? 4 : 0) >= - estimateRSStackSizeLimit(MF, this))) || + unsigned EstimatedStackSize = + MFI->estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills); + if (hasFP(MF)) { + if (AFI->hasStackFrame()) + EstimatedStackSize += 4; + } else { + // If FP is not used, SP will be used to access arguments, so count the + // size of arguments into the estimation. + EstimatedStackSize += MF.getInfo<ARMFunctionInfo>()->getArgumentStackSize(); + } + EstimatedStackSize += 16; // For possible paddings. + + bool BigStack = EstimatedStackSize >= estimateRSStackSizeLimit(MF, this) || MFI->hasVarSizedObjects() || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); - bool ExtraCSSpill = false; if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); @@ -1712,6 +1723,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } else if (!AFI->isThumb1OnlyFunction()) { // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. + assert(RS && "Register scavenging not provided"); const TargetRegisterClass *RC = &ARM::GPRRegClass; RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), @@ -1726,19 +1738,18 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } } - -void ARMFrameLowering:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { +MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); if (!hasReservedCallFrame(MF)) { // If we have alloca, convert as follows: // ADJCALLSTACKDOWN -> sub, sp, sp, amount // ADJCALLSTACKUP -> add, sp, sp, amount - MachineInstr *Old = I; - DebugLoc dl = Old->getDebugLoc(); - unsigned Amount = Old->getOperand(0).getImm(); + MachineInstr &Old = *I; + DebugLoc dl = Old.getDebugLoc(); + unsigned Amount = Old.getOperand(0).getImm(); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next @@ -1751,25 +1762,26 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, bool isARM = !AFI->isThumbFunction(); // Replace the pseudo instruction with a new instruction... - unsigned Opc = Old->getOpcode(); - int PIdx = Old->findFirstPredOperandIdx(); - ARMCC::CondCodes Pred = (PIdx == -1) - ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); + unsigned Opc = Old.getOpcode(); + int PIdx = Old.findFirstPredOperandIdx(); + ARMCC::CondCodes Pred = + (PIdx == -1) ? ARMCC::AL + : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm(); if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. - unsigned PredReg = Old->getOperand(2).getReg(); + unsigned PredReg = Old.getOperand(2).getReg(); emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, Pred, PredReg); } else { // Note: PredReg is operand 3 for ADJCALLSTACKUP. - unsigned PredReg = Old->getOperand(3).getReg(); + unsigned PredReg = Old.getOperand(3).getReg(); assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, Pred, PredReg); } } } - MBB.erase(I); + return MBB.erase(I); } /// Get the minimum constant for ARM that is greater than or equal to the @@ -2162,7 +2174,7 @@ void ARMFrameLowering::adjustForSegmentedStacks( PrevStackMBB->addSuccessor(McrMBB); -#ifdef XDEBUG +#ifdef EXPENSIVE_CHECKS MF.verify(); #endif } diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h index 66f4dfb6ef52..21cd78da395c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -74,7 +74,7 @@ public: bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs) const; - void + MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp index 0157c0a35286..0d904ecb6296 100644 --- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -50,8 +50,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { // Skip over one non-VFP / NEON instruction. if (!LastMI->isBarrier() && - // On A9, AGU and NEON/FPU are muxed. - !(TII.getSubtarget().isLikeA9() && LastMI->mayLoadOrStore()) && + !(TII.getSubtarget().hasMuxedUnits() && LastMI->mayLoadOrStore()) && (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 6e7edbf9fb15..20db3d39bcae 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -29,7 +29,6 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLowering.h" @@ -44,11 +43,6 @@ DisableShifterOp("disable-shifter-op", cl::Hidden, cl::desc("Disable isel of shifter-op"), cl::init(false)); -static cl::opt<bool> -CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, - cl::desc("Check fp vmla / vmls hazard at isel time"), - cl::init(true)); - //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -84,12 +78,11 @@ public: /// getI32Imm - Return a target constant of type i32 with the specified /// value. - inline SDValue getI32Imm(unsigned Imm, SDLoc dl) { + inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); } - SDNode *Select(SDNode *N) override; - + void Select(SDNode *N) override; bool hasNoVMLxHazardUse(SDNode *N) const; bool isShifterOpProfitable(const SDValue &Shift, @@ -200,57 +193,61 @@ public: #include "ARMGenDAGISel.inc" private: - /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for - /// ARM. - SDNode *SelectARMIndexedLoad(SDNode *N); - SDNode *SelectT2IndexedLoad(SDNode *N); + /// Indexed (pre/post inc/dec) load matching code for ARM. + bool tryARMIndexedLoad(SDNode *N); + bool tryT1IndexedLoad(SDNode *N); + bool tryT2IndexedLoad(SDNode *N); /// SelectVLD - Select NEON load intrinsics. NumVecs should be /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for /// loads of D registers and even subregs and odd subregs of Q registers. /// For NumVecs <= 2, QOpcodes1 is not used. - SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *DOpcodes, - const uint16_t *QOpcodes0, const uint16_t *QOpcodes1); + void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, + const uint16_t *DOpcodes, const uint16_t *QOpcodes0, + const uint16_t *QOpcodes1); /// SelectVST - Select NEON store intrinsics. NumVecs should /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for /// stores of D registers and even subregs and odd subregs of Q registers. /// For NumVecs <= 2, QOpcodes1 is not used. - SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *DOpcodes, - const uint16_t *QOpcodes0, const uint16_t *QOpcodes1); + void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, + const uint16_t *DOpcodes, const uint16_t *QOpcodes0, + const uint16_t *QOpcodes1); /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should /// be 2, 3 or 4. The opcode arrays specify the instructions used for /// load/store of D registers and Q registers. - SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, - bool isUpdating, unsigned NumVecs, - const uint16_t *DOpcodes, const uint16_t *QOpcodes); + void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, + unsigned NumVecs, const uint16_t *DOpcodes, + const uint16_t *QOpcodes); /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs /// should be 2, 3 or 4. The opcode array specifies the instructions used /// for loading D registers. (Q registers are not supported.) - SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *Opcodes); + void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, + const uint16_t *Opcodes); /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be /// generated to force the table registers to be consecutive. - SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc); + void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc); - /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. - SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned); + /// Try to select SBFX/UBFX instructions for ARM. + bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); // Select special operations if node forms integer ABS pattern - SDNode *SelectABSOp(SDNode *N); + bool tryABSOp(SDNode *N); + + bool tryReadRegister(SDNode *N); + bool tryWriteRegister(SDNode *N); - SDNode *SelectReadRegister(SDNode *N); - SDNode *SelectWriteRegister(SDNode *N); + bool tryInlineAsm(SDNode *N); - SDNode *SelectInlineAsm(SDNode *N); + void SelectConcatVector(SDNode *N); - SDNode *SelectConcatVector(SDNode *N); + bool trySMLAWSMULW(SDNode *N); + + void SelectCMP_SWAP(SDNode *N); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. @@ -269,7 +266,7 @@ private: SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); // Get the alignment operand for a NEON VLD or VST instruction. - SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs, + SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, bool is64BitVector); /// Returns the number of instructions required to materialize the given @@ -426,11 +423,7 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (OptLevel == CodeGenOpt::None) return true; - if (!CheckVMLxHazard) - return true; - - if (!Subtarget->isCortexA7() && !Subtarget->isCortexA8() && - !Subtarget->isCortexA9() && !Subtarget->isSwift()) + if (!Subtarget->hasVMLxHazards()) return true; if (!N->hasOneUse()) @@ -484,6 +477,7 @@ unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const { if (Subtarget->isThumb()) { if (Val <= 255) return 1; // MOV if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW + if (Val <= 510) return 2; // MOV + ADDi8 if (~Val <= 255) return 2; // MOV + MVN if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL } else { @@ -548,11 +542,9 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, unsigned PowerOfTwo = 0; SDValue NewMulConst; if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { - BaseReg = SDValue(Select(CurDAG->getNode(ISD::MUL, SDLoc(N), MVT::i32, - N.getOperand(0), NewMulConst) - .getNode()), - 0); + HandleSDNode Handle(N); replaceDAGValue(N.getOperand(1), NewMulConst); + BaseReg = Handle.getValue(); Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), SDLoc(N), MVT::i32); @@ -623,6 +615,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); } else @@ -803,6 +796,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, FI, TLI->getPointerTy(CurDAG->getDataLayout())); } else if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); } @@ -1070,6 +1064,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, FI, TLI->getPointerTy(CurDAG->getDataLayout())); } else if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); } @@ -1190,6 +1185,7 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, return false; // We want to select register offset instead } else if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); } else { @@ -1297,6 +1293,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); if (Base.getOpcode() == ISD::TargetConstantPool) @@ -1468,15 +1465,15 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, //===--------------------------------------------------------------------===// /// getAL - Returns a ARMCC::AL immediate node. -static inline SDValue getAL(SelectionDAG *CurDAG, SDLoc dl) { +static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); } -SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { +bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM == ISD::UNINDEXED) - return nullptr; + return false; EVT LoadedVT = LD->getMemoryVT(); SDValue Offset, AMOpc; @@ -1530,26 +1527,53 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, - MVT::i32, MVT::Other, Ops); + ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, + MVT::i32, MVT::Other, Ops)); + return true; } else { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, - MVT::i32, MVT::Other, Ops); + ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, + MVT::i32, MVT::Other, Ops)); + return true; } } - return nullptr; + return false; +} + +bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT LoadedVT = LD->getMemoryVT(); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM == ISD::UNINDEXED || LD->getExtensionType() != ISD::NON_EXTLOAD || + AM != ISD::POST_INC || LoadedVT.getSimpleVT().SimpleTy != MVT::i32) + return false; + + auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); + if (!COffs || COffs->getZExtValue() != 4) + return false; + + // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. + // The encoding of LDM is not how the rest of ISel expects a post-inc load to + // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after + // ISel. + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), + CurDAG->getRegister(0, MVT::i32), Chain }; + ReplaceNode(N, CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, MVT::i32, + MVT::Other, Ops)); + return true; } -SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { +bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM == ISD::UNINDEXED) - return nullptr; + return false; EVT LoadedVT = LD->getMemoryVT(); bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; @@ -1576,7 +1600,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; break; default: - return nullptr; + return false; } Match = true; } @@ -1586,11 +1610,12 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, - MVT::Other, Ops); + ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, + MVT::Other, Ops)); + return true; } - return nullptr; + return false; } /// \brief Form a GPRPair pseudo register from a pair of GPR regs. @@ -1685,7 +1710,7 @@ SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand /// of a NEON VLD or VST instruction. The supported values depend on the /// number of registers being loaded. -SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, SDLoc dl, +SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, bool is64BitVector) { unsigned NumRegs = NumVecs; if (!is64BitVector && NumVecs < 3) @@ -1806,17 +1831,17 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { return Opc; // If not one we handle, return it unchanged. } -SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *DOpcodes, - const uint16_t *QOpcodes0, - const uint16_t *QOpcodes1) { +void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, + const uint16_t *DOpcodes, + const uint16_t *QOpcodes0, + const uint16_t *QOpcodes1) { assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); SDLoc dl(N); SDValue MemAddr, Align; unsigned AddrOpIdx = isUpdating ? 1 : 2; if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) - return nullptr; + return; SDValue Chain = N->getOperand(0); EVT VT = N->getValueType(0); @@ -1922,13 +1947,16 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1); - if (NumVecs == 1) - return VLd; + if (NumVecs == 1) { + ReplaceNode(N, VLd); + return; + } // Extract out the subregisters. SDValue SuperReg = SDValue(VLd, 0); - assert(ARM::dsub_7 == ARM::dsub_0+7 && - ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && + ARM::qsub_3 == ARM::qsub_0 + 3, + "Unexpected subreg numbering"); unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); for (unsigned Vec = 0; Vec < NumVecs; ++Vec) ReplaceUses(SDValue(N, Vec), @@ -1936,13 +1964,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); if (isUpdating) ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); - return nullptr; + CurDAG->RemoveDeadNode(N); } -SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *DOpcodes, - const uint16_t *QOpcodes0, - const uint16_t *QOpcodes1) { +void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, + const uint16_t *DOpcodes, + const uint16_t *QOpcodes0, + const uint16_t *QOpcodes1) { assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); SDLoc dl(N); @@ -1950,7 +1978,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, unsigned AddrOpIdx = isUpdating ? 1 : 2; unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) - return nullptr; + return; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); @@ -2042,7 +2070,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, // Transfer memoperands. cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); - return VSt; + ReplaceNode(N, VSt); + return; } // Otherwise, quad registers are stored with two separate instructions, @@ -2083,13 +2112,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1); - return VStB; + ReplaceNode(N, VStB); } -SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, - bool isUpdating, unsigned NumVecs, - const uint16_t *DOpcodes, - const uint16_t *QOpcodes) { +void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, + unsigned NumVecs, + const uint16_t *DOpcodes, + const uint16_t *QOpcodes) { assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); SDLoc dl(N); @@ -2097,7 +2126,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned AddrOpIdx = isUpdating ? 1 : 2; unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) - return nullptr; + return; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); @@ -2188,13 +2217,16 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, QOpcodes[OpcodeIndex]); SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1); - if (!IsLoad) - return VLdLn; + if (!IsLoad) { + ReplaceNode(N, VLdLn); + return; + } // Extract the subregisters. SuperReg = SDValue(VLdLn, 0); - assert(ARM::dsub_7 == ARM::dsub_0+7 && - ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && + ARM::qsub_3 == ARM::qsub_0 + 3, + "Unexpected subreg numbering"); unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; for (unsigned Vec = 0; Vec < NumVecs; ++Vec) ReplaceUses(SDValue(N, Vec), @@ -2202,18 +2234,17 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); if (isUpdating) ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); - return nullptr; + CurDAG->RemoveDeadNode(N); } -SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, - unsigned NumVecs, - const uint16_t *Opcodes) { +void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, + const uint16_t *Opcodes) { assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); SDLoc dl(N); SDValue MemAddr, Align; if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) - return nullptr; + return; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); @@ -2277,7 +2308,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, SuperReg = SDValue(VLdDup, 0); // Extract the subregisters. - assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); unsigned SubIdx = ARM::dsub_0; for (unsigned Vec = 0; Vec < NumVecs; ++Vec) ReplaceUses(SDValue(N, Vec), @@ -2285,11 +2316,11 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); if (isUpdating) ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); - return nullptr; + CurDAG->RemoveDeadNode(N); } -SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, - unsigned Opc) { +void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, + unsigned Opc) { assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); SDLoc dl(N); EVT VT = N->getValueType(0); @@ -2318,13 +2349,12 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); Ops.push_back(getAL(CurDAG, dl)); // predicate Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register - return CurDAG->getMachineNode(Opc, dl, VT, Ops); + ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); } -SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, - bool isSigned) { +bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) - return nullptr; + return false; unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) @@ -2338,7 +2368,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, // The immediate is a mask of the low bits iff imm & (imm+1) == 0 if (And_imm & (And_imm + 1)) - return nullptr; + return false; unsigned Srl_imm = 0; if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, @@ -2358,7 +2388,8 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, dl, MVT::i32), getAL(CurDAG, dl), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); + return true; } // ARM models shift instructions as MOVsi with shifter operand. @@ -2368,17 +2399,19 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, MVT::i32); SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, getAL(CurDAG, dl), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); + return true; } SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, dl, MVT::i32), CurDAG->getTargetConstant(Width, dl, MVT::i32), getAL(CurDAG, dl), Reg0 }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); + return true; } } - return nullptr; + return false; } // Otherwise, we're looking for a shift of a shift @@ -2392,13 +2425,35 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Width = 32 - Srl_imm - 1; int LSB = Srl_imm - Shl_imm; if (LSB < 0) - return nullptr; + return false; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, dl, MVT::i32), CurDAG->getTargetConstant(Width, dl, MVT::i32), getAL(CurDAG, dl), Reg0 }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); + return true; + } + } + + // Or we are looking for a shift of an and, with a mask operand + if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && + isShiftedMask_32(And_imm)) { + unsigned Srl_imm = 0; + unsigned LSB = countTrailingZeros(And_imm); + // Shift must be the same as the ands lsb + if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { + assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); + unsigned MSB = 31 - countLeadingZeros(And_imm); + // Note: The width operand is encoded as width-1. + unsigned Width = MSB - LSB; + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { N->getOperand(0).getOperand(0), + CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), + CurDAG->getTargetConstant(Width, dl, MVT::i32), + getAL(CurDAG, dl), Reg0 }; + CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); + return true; } } @@ -2407,20 +2462,21 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, unsigned LSB = 0; if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) - return nullptr; + return false; if (LSB + Width > 32) - return nullptr; + return false; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, dl, MVT::i32), CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), getAL(CurDAG, dl), Reg0 }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); + return true; } - return nullptr; + return false; } /// Target-specific DAG combining for ISD::XOR. @@ -2433,16 +2489,16 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) /// ARM instruction selection detects the latter and matches it to /// ARM::ABS or ARM::t2ABS machine node. -SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ +bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ SDValue XORSrc0 = N->getOperand(0); SDValue XORSrc1 = N->getOperand(1); EVT VT = N->getValueType(0); if (Subtarget->isThumb1Only()) - return nullptr; + return false; if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) - return nullptr; + return false; SDValue ADDSrc0 = XORSrc0.getOperand(0); SDValue ADDSrc1 = XORSrc0.getOperand(1); @@ -2456,57 +2512,214 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ XType.isInteger() && SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; - return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); + CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); + return true; + } + + return false; +} + +static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1, + bool Accumulate) { + // For SM*WB, we need to some form of sext. + // For SM*WT, we need to search for (sra X, 16) + // Src1 then gets set to X. + if ((SignExt.getOpcode() == ISD::SIGN_EXTEND || + SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG || + SignExt.getOpcode() == ISD::AssertSext) && + SignExt.getValueType() == MVT::i32) { + + *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB; + Src1 = SignExt.getOperand(0); + return true; } - return nullptr; + if (SignExt.getOpcode() != ISD::SRA) + return false; + + ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1)); + if (!SRASrc1 || SRASrc1->getZExtValue() != 16) + return false; + + SDValue Op0 = SignExt.getOperand(0); + + // The sign extend operand for SM*WB could be generated by a shl and ashr. + if (Op0.getOpcode() == ISD::SHL) { + SDValue SHL = Op0; + ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1)); + if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16) + return false; + + *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB; + Src1 = Op0.getOperand(0); + return true; + } + *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT; + Src1 = SignExt.getOperand(0); + return true; } -SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { +static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0, + SDValue &Src1, bool Accumulate) { + // First we look for: + // (add (or (srl ?, 16), (shl ?, 16))) + if (OR.getOpcode() != ISD::OR) + return false; + + SDValue SRL = OR.getOperand(0); + SDValue SHL = OR.getOperand(1); + + if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) { + SRL = OR.getOperand(1); + SHL = OR.getOperand(0); + if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) + return false; + } + + ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1)); + ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1)); + if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 || + SHLSrc1->getZExtValue() != 16) + return false; + + // The first operands to the shifts need to be the two results from the + // same smul_lohi node. + if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) || + SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI) + return false; + + SDNode *SMULLOHI = SRL.getOperand(0).getNode(); + if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) || + SHL.getOperand(0) != SDValue(SMULLOHI, 1)) + return false; + + // Now we have: + // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16))) + // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments. + // For SMLAWB the 16-bit value will signed extended somehow. + // For SMLAWT only the SRA is required. + + // Check both sides of SMUL_LOHI + if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) { + Src0 = SMULLOHI->getOperand(1); + } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1, + Accumulate)) { + Src0 = SMULLOHI->getOperand(0); + } else { + return false; + } + return true; +} + +bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) { + SDLoc dl(N); + SDValue Src0 = N->getOperand(0); + SDValue Src1 = N->getOperand(1); + SDValue A, B; + unsigned Opc = 0; + + if (N->getOpcode() == ISD::ADD) { + if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR) + return false; + + SDValue Acc; + if (SearchSignedMulLong(Src0, &Opc, A, B, true)) { + Acc = Src1; + } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) { + Acc = Src0; + } else { + return false; + } + if (Opc == 0) + return false; + + SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32) }; + CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops); + return true; + } else if (N->getOpcode() == ISD::OR && + SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) { + if (Opc == 0) + return false; + + SDValue Ops[] = { A, B, getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32)}; + CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); + return true; + } + return false; +} + +/// We've got special pseudo-instructions for these +void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { + unsigned Opcode; + EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); + if (MemTy == MVT::i8) + Opcode = ARM::CMP_SWAP_8; + else if (MemTy == MVT::i16) + Opcode = ARM::CMP_SWAP_16; + else if (MemTy == MVT::i32) + Opcode = ARM::CMP_SWAP_32; + else + llvm_unreachable("Unknown AtomicCmpSwap type"); + + SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), + N->getOperand(0)}; + SDNode *CmpSwap = CurDAG->getMachineNode( + Opcode, SDLoc(N), + CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); + + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1); + + ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); + ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); + CurDAG->RemoveDeadNode(N); +} + +void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { // The only time a CONCAT_VECTORS operation can have legal types is when // two 64-bit vectors are concatenated to a 128-bit vector. EVT VT = N->getValueType(0); if (!VT.is128BitVector() || N->getNumOperands() != 2) llvm_unreachable("unexpected CONCAT_VECTORS"); - return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)); + ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1))); } -SDNode *ARMDAGToDAGISel::Select(SDNode *N) { +void ARMDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); if (N->isMachineOpcode()) { N->setNodeId(-1); - return nullptr; // Already selected. + return; // Already selected. } switch (N->getOpcode()) { default: break; - case ISD::WRITE_REGISTER: { - SDNode *ResNode = SelectWriteRegister(N); - if (ResNode) - return ResNode; + case ISD::ADD: + case ISD::OR: + if (trySMLAWSMULW(N)) + return; break; - } - case ISD::READ_REGISTER: { - SDNode *ResNode = SelectReadRegister(N); - if (ResNode) - return ResNode; + case ISD::WRITE_REGISTER: + if (tryWriteRegister(N)) + return; break; - } - case ISD::INLINEASM: { - SDNode *ResNode = SelectInlineAsm(N); - if (ResNode) - return ResNode; + case ISD::READ_REGISTER: + if (tryReadRegister(N)) + return; break; - } - case ISD::XOR: { + case ISD::INLINEASM: + if (tryInlineAsm(N)) + return; + break; + case ISD::XOR: // Select special operations if XOR node forms integer ABS pattern - SDNode *ResNode = SelectABSOp(N); - if (ResNode) - return ResNode; + if (tryABSOp(N)) + return; // Other cases are autogenerated. break; - } case ISD::Constant: { unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); // If we can't materialize the constant we need to use a literal pool @@ -2530,11 +2743,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->getRegister(0, MVT::i32), CurDAG->getEntryNode() }; - ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, - Ops); + ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, + Ops); } - ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0)); - return nullptr; + ReplaceNode(N, ResNode); + return; } // Other cases are autogenerated. @@ -2551,25 +2764,27 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { MachineFrameInfo *MFI = MF->getFrameInfo(); if (MFI->getObjectAlignment(FI) < 4) MFI->setObjectAlignment(FI, 4); - return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, - CurDAG->getTargetConstant(0, dl, MVT::i32)); + CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, + CurDAG->getTargetConstant(0, dl, MVT::i32)); + return; } else { unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? ARM::t2ADDri : ARM::ADDri); SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); + return; } } case ISD::SRL: - if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) - return I; + if (tryV6T2BitfieldExtractOp(N, false)) + return; break; case ISD::SIGN_EXTEND_INREG: case ISD::SRA: - if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true)) - return I; + if (tryV6T2BitfieldExtractOp(N, true)) + return; break; case ISD::MUL: if (Subtarget->isThumb1Only()) @@ -2587,11 +2802,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (Subtarget->isThumb()) { SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); + return; } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); + return; } } if (isPowerOf2_32(RHSV+1)) { // 2^n-1? @@ -2604,19 +2821,63 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (Subtarget->isThumb()) { SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); + return; } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); + CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); + return; } } } break; case ISD::AND: { // Check for unsigned bitfield extract - if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) - return I; + if (tryV6T2BitfieldExtractOp(N, false)) + return; + + // If an immediate is used in an AND node, it is possible that the immediate + // can be more optimally materialized when negated. If this is the case we + // can negate the immediate and use a BIC instead. + auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { + uint32_t Imm = (uint32_t) N1C->getZExtValue(); + + // In Thumb2 mode, an AND can take a 12-bit immediate. If this + // immediate can be negated and fit in the immediate operand of + // a t2BIC, don't do any manual transform here as this can be + // handled by the generic ISel machinery. + bool PreferImmediateEncoding = + Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); + if (!PreferImmediateEncoding && + ConstantMaterializationCost(Imm) > + ConstantMaterializationCost(~Imm)) { + // The current immediate costs more to materialize than a negated + // immediate, so negate the immediate and use a BIC. + SDValue NewImm = + CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); + // If the new constant didn't exist before, reposition it in the topological + // ordering so it is just before N. Otherwise, don't touch its location. + if (NewImm->getNodeId() == -1) + CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); + + if (!Subtarget->hasThumb2()) { + SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), + N->getOperand(0), NewImm, getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32)}; + ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); + return; + } else { + SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32)}; + ReplaceNode(N, + CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); + return; + } + } + } // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits @@ -2632,7 +2893,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (!Opc) break; SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + N1C = dyn_cast<ConstantSDNode>(N1); if (!N1C) break; if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { @@ -2649,29 +2910,34 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { dl, MVT::i32); SDValue Ops[] = { N0.getOperand(0), Imm16, getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(Opc, dl, VT, Ops); + ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); + return; } } break; } case ARMISD::VMOVRRD: - return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32, - N->getOperand(0), getAL(CurDAG, dl), - CurDAG->getRegister(0, MVT::i32)); + ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32, + N->getOperand(0), getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32))); + return; case ISD::UMUL_LOHI: { if (Subtarget->isThumb1Only()) break; if (Subtarget->isThumb()) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops); + ReplaceNode( + N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops)); + return; } else { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? - ARM::UMULL : ARM::UMULLv5, - dl, MVT::i32, MVT::i32, Ops); + ReplaceNode(N, CurDAG->getMachineNode( + Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl, + MVT::i32, MVT::i32, Ops)); + return; } } case ISD::SMUL_LOHI: { @@ -2680,30 +2946,76 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (Subtarget->isThumb()) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops); + ReplaceNode( + N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops)); + return; } else { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? - ARM::SMULL : ARM::SMULLv5, - dl, MVT::i32, MVT::i32, Ops); + ReplaceNode(N, CurDAG->getMachineNode( + Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl, + MVT::i32, MVT::i32, Ops)); + return; } } + case ARMISD::UMAAL: { + unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3), + getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32) }; + ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); + return; + } case ARMISD::UMLAL:{ + // UMAAL is similar to UMLAL but it adds two 32-bit values to the + // 64-bit multiplication result. + if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC && + N->getOperand(3).getOpcode() == ARMISD::ADDE) { + + SDValue Addc = N->getOperand(2); + SDValue Adde = N->getOperand(3); + + if (Adde.getOperand(2).getNode() == Addc.getNode()) { + + ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0)); + ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1)); + + if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0) + { + // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm + // RdLo = one operand to be added, lower 32-bits of res + // RdHi = other operand to be added, upper 32-bits of res + // Rn = first multiply operand + // Rm = second multiply operand + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + Addc.getOperand(0), Addc.getOperand(1), + getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32) }; + unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; + CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops); + return; + } + } + } + if (Subtarget->isThumb()) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; - return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops); + ReplaceNode( + N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); + return; }else{ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? - ARM::UMLAL : ARM::UMLALv5, - dl, MVT::i32, MVT::i32, Ops); + ReplaceNode(N, CurDAG->getMachineNode( + Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, + MVT::i32, MVT::i32, Ops)); + return; } } case ARMISD::SMLAL:{ @@ -2711,25 +3023,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; - return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops); + ReplaceNode( + N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); + return; }else{ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? - ARM::SMLAL : ARM::SMLALv5, - dl, MVT::i32, MVT::i32, Ops); + ReplaceNode(N, CurDAG->getMachineNode( + Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, + MVT::i32, MVT::i32, Ops)); + return; } } case ISD::LOAD: { - SDNode *ResNode = nullptr; - if (Subtarget->isThumb() && Subtarget->hasThumb2()) - ResNode = SelectT2IndexedLoad(N); - else - ResNode = SelectARMIndexedLoad(N); - if (ResNode) - return ResNode; + if (Subtarget->isThumb() && Subtarget->hasThumb2()) { + if (tryT2IndexedLoad(N)) + return; + } else if (Subtarget->isThumb()) { + if (tryT1IndexedLoad(N)) + return; + } else if (tryARMIndexedLoad(N)) + return; // Other cases are autogenerated. break; } @@ -2770,13 +3086,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } ReplaceUses(SDValue(N, 0), SDValue(Chain.getNode(), Chain.getResNo())); - return nullptr; + CurDAG->RemoveDeadNode(N); + return; } case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: return nullptr; + default: return; case MVT::v8i8: Opc = ARM::VZIPd8; break; case MVT::v4i16: Opc = ARM::VZIPd16; break; case MVT::v2f32: @@ -2790,13 +3107,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getAL(CurDAG, dl); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); + ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); + return; } case ARMISD::VUZP: { unsigned Opc = 0; EVT VT = N->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: return nullptr; + default: return; case MVT::v8i8: Opc = ARM::VUZPd8; break; case MVT::v4i16: Opc = ARM::VUZPd16; break; case MVT::v2f32: @@ -2810,13 +3128,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getAL(CurDAG, dl); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); + ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); + return; } case ARMISD::VTRN: { unsigned Opc = 0; EVT VT = N->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: return nullptr; + default: return; case MVT::v8i8: Opc = ARM::VTRNd8; break; case MVT::v4i16: Opc = ARM::VTRNd16; break; case MVT::v2f32: @@ -2829,7 +3148,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getAL(CurDAG, dl); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); + ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); + return; } case ARMISD::BUILD_VECTOR: { EVT VecVT = N->getValueType(0); @@ -2837,55 +3157,68 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned NumElts = VecVT.getVectorNumElements(); if (EltVT == MVT::f64) { assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); - return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)); + ReplaceNode( + N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); + return; } assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); - if (NumElts == 2) - return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)); + if (NumElts == 2) { + ReplaceNode( + N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); + return; + } assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); - return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), - N->getOperand(2), N->getOperand(3)); + ReplaceNode(N, + createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3))); + return; } case ARMISD::VLD2DUP: { static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, ARM::VLD2DUPd32 }; - return SelectVLDDup(N, false, 2, Opcodes); + SelectVLDDup(N, false, 2, Opcodes); + return; } case ARMISD::VLD3DUP: { static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd32Pseudo }; - return SelectVLDDup(N, false, 3, Opcodes); + SelectVLDDup(N, false, 3, Opcodes); + return; } case ARMISD::VLD4DUP: { static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd32Pseudo }; - return SelectVLDDup(N, false, 4, Opcodes); + SelectVLDDup(N, false, 4, Opcodes); + return; } case ARMISD::VLD2DUP_UPD: { static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed, ARM::VLD2DUPd32wb_fixed }; - return SelectVLDDup(N, true, 2, Opcodes); + SelectVLDDup(N, true, 2, Opcodes); + return; } case ARMISD::VLD3DUP_UPD: { static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd32Pseudo_UPD }; - return SelectVLDDup(N, true, 3, Opcodes); + SelectVLDDup(N, true, 3, Opcodes); + return; } case ARMISD::VLD4DUP_UPD: { static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd32Pseudo_UPD }; - return SelectVLDDup(N, true, 4, Opcodes); + SelectVLDDup(N, true, 4, Opcodes); + return; } case ARMISD::VLD1_UPD: { @@ -2897,7 +3230,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed, ARM::VLD1q64wb_fixed }; - return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); + SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); + return; } case ARMISD::VLD2_UPD: { @@ -2908,7 +3242,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q32PseudoWB_fixed }; - return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); + SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); + return; } case ARMISD::VLD3_UPD: { @@ -2922,7 +3257,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q32oddPseudo_UPD }; - return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); + SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); + return; } case ARMISD::VLD4_UPD: { @@ -2936,7 +3272,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q32oddPseudo_UPD }; - return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); + SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); + return; } case ARMISD::VLD2LN_UPD: { @@ -2945,7 +3282,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD2LNd32Pseudo_UPD }; static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq32Pseudo_UPD }; - return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); + SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); + return; } case ARMISD::VLD3LN_UPD: { @@ -2954,7 +3292,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD3LNd32Pseudo_UPD }; static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq32Pseudo_UPD }; - return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); + SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); + return; } case ARMISD::VLD4LN_UPD: { @@ -2963,7 +3302,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD4LNd32Pseudo_UPD }; static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq32Pseudo_UPD }; - return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); + SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); + return; } case ARMISD::VST1_UPD: { @@ -2975,7 +3315,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST1q16wb_fixed, ARM::VST1q32wb_fixed, ARM::VST1q64wb_fixed }; - return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); + SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); + return; } case ARMISD::VST2_UPD: { @@ -2986,7 +3327,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, ARM::VST2q16PseudoWB_fixed, ARM::VST2q32PseudoWB_fixed }; - return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); + SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); + return; } case ARMISD::VST3_UPD: { @@ -3000,7 +3342,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, ARM::VST3q16oddPseudo_UPD, ARM::VST3q32oddPseudo_UPD }; - return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); + SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); + return; } case ARMISD::VST4_UPD: { @@ -3014,7 +3357,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, ARM::VST4q16oddPseudo_UPD, ARM::VST4q32oddPseudo_UPD }; - return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); + SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); + return; } case ARMISD::VST2LN_UPD: { @@ -3023,7 +3367,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST2LNd32Pseudo_UPD }; static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq32Pseudo_UPD }; - return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); + SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); + return; } case ARMISD::VST3LN_UPD: { @@ -3032,7 +3377,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST3LNd32Pseudo_UPD }; static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq32Pseudo_UPD }; - return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); + SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); + return; } case ARMISD::VST4LN_UPD: { @@ -3041,7 +3387,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST4LNd32Pseudo_UPD }; static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq32Pseudo_UPD }; - return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); + SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); + return; } case ISD::INTRINSIC_VOID: @@ -3051,12 +3398,44 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { default: break; + case Intrinsic::arm_mrrc: + case Intrinsic::arm_mrrc2: { + SDLoc dl(N); + SDValue Chain = N->getOperand(0); + unsigned Opc; + + if (Subtarget->isThumb()) + Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); + else + Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); + + SmallVector<SDValue, 5> Ops; + Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ + Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ + Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ + + // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded + // instruction will always be '1111' but it is possible in assembly language to specify + // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. + if (Opc != ARM::MRRC2) { + Ops.push_back(getAL(CurDAG, dl)); + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); + } + + Ops.push_back(Chain); + + // Writes to two registers. + const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; + + ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); + return; + } case Intrinsic::arm_ldaexd: case Intrinsic::arm_ldrexd: { SDLoc dl(N); SDValue Chain = N->getOperand(0); SDValue MemAddr = N->getOperand(2); - bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); + bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) @@ -3072,11 +3451,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ResTys.push_back(MVT::Other); // Place arguments in the right order. - SmallVector<SDValue, 7> Ops; - Ops.push_back(MemAddr); - Ops.push_back(getAL(CurDAG, dl)); - Ops.push_back(CurDAG->getRegister(0, MVT::i32)); - Ops.push_back(Chain); + SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32), Chain}; SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); // Transfer memoperands. MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); @@ -3112,7 +3488,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ReplaceUses(SDValue(N, 1), Result); } ReplaceUses(SDValue(N, 2), OutChain); - return nullptr; + CurDAG->RemoveDeadNode(N); + return; } case Intrinsic::arm_stlexd: case Intrinsic::arm_strexd: { @@ -3150,7 +3527,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); - return St; + ReplaceNode(N, St); + return; } case Intrinsic::arm_neon_vld1: { @@ -3158,7 +3536,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD1d32, ARM::VLD1d64 }; static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, ARM::VLD1q32, ARM::VLD1q64}; - return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); + SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); + return; } case Intrinsic::arm_neon_vld2: { @@ -3166,7 +3545,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD2d32, ARM::VLD1q64 }; static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, ARM::VLD2q32Pseudo }; - return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); + SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); + return; } case Intrinsic::arm_neon_vld3: { @@ -3180,7 +3560,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, ARM::VLD3q16oddPseudo, ARM::VLD3q32oddPseudo }; - return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); + SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); + return; } case Intrinsic::arm_neon_vld4: { @@ -3194,7 +3575,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, ARM::VLD4q16oddPseudo, ARM::VLD4q32oddPseudo }; - return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); + SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); + return; } case Intrinsic::arm_neon_vld2lane: { @@ -3203,7 +3585,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD2LNd32Pseudo }; static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo }; - return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); + SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); + return; } case Intrinsic::arm_neon_vld3lane: { @@ -3212,7 +3595,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD3LNd32Pseudo }; static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo }; - return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); + SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); + return; } case Intrinsic::arm_neon_vld4lane: { @@ -3221,7 +3605,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD4LNd32Pseudo }; static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo }; - return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); + SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); + return; } case Intrinsic::arm_neon_vst1: { @@ -3229,15 +3614,17 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST1d32, ARM::VST1d64 }; static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, ARM::VST1q32, ARM::VST1q64 }; - return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); + SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); + return; } case Intrinsic::arm_neon_vst2: { static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, ARM::VST2d32, ARM::VST1q64 }; - static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, - ARM::VST2q32Pseudo }; - return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); + static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, + ARM::VST2q32Pseudo }; + SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); + return; } case Intrinsic::arm_neon_vst3: { @@ -3251,7 +3638,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, ARM::VST3q16oddPseudo, ARM::VST3q32oddPseudo }; - return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); + SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); + return; } case Intrinsic::arm_neon_vst4: { @@ -3265,7 +3653,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, ARM::VST4q16oddPseudo, ARM::VST4q32oddPseudo }; - return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); + SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); + return; } case Intrinsic::arm_neon_vst2lane: { @@ -3274,7 +3663,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST2LNd32Pseudo }; static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo }; - return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); + SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); + return; } case Intrinsic::arm_neon_vst3lane: { @@ -3283,7 +3673,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST3LNd32Pseudo }; static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo }; - return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); + SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); + return; } case Intrinsic::arm_neon_vst4lane: { @@ -3292,7 +3683,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST4LNd32Pseudo }; static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo }; - return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); + SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); + return; } } break; @@ -3305,18 +3697,24 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; case Intrinsic::arm_neon_vtbl2: - return SelectVTBL(N, false, 2, ARM::VTBL2); + SelectVTBL(N, false, 2, ARM::VTBL2); + return; case Intrinsic::arm_neon_vtbl3: - return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); + SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); + return; case Intrinsic::arm_neon_vtbl4: - return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); + SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); + return; case Intrinsic::arm_neon_vtbx2: - return SelectVTBL(N, true, 2, ARM::VTBX2); + SelectVTBL(N, true, 2, ARM::VTBX2); + return; case Intrinsic::arm_neon_vtbx3: - return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); + SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); + return; case Intrinsic::arm_neon_vtbx4: - return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo); + SelectVTBL(N, true, 4, ARM::VTBX4Pseudo); + return; } break; } @@ -3324,13 +3722,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VTBL1: { SDLoc dl(N); EVT VT = N->getValueType(0); - SmallVector<SDValue, 6> Ops; - - Ops.push_back(N->getOperand(0)); - Ops.push_back(N->getOperand(1)); - Ops.push_back(getAL(CurDAG, dl)); // Predicate - Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register - return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops); + SDValue Ops[] = {N->getOperand(0), N->getOperand(1), + getAL(CurDAG, dl), // Predicate + CurDAG->getRegister(0, MVT::i32)}; // Predicate Register + ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops)); + return; } case ARMISD::VTBL2: { SDLoc dl(N); @@ -3341,19 +3737,22 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue V1 = N->getOperand(1); SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); - SmallVector<SDValue, 6> Ops; - Ops.push_back(RegSeq); - Ops.push_back(N->getOperand(2)); - Ops.push_back(getAL(CurDAG, dl)); // Predicate - Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register - return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops); + SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate + CurDAG->getRegister(0, MVT::i32)}; // Predicate Register + ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops)); + return; } case ISD::CONCAT_VECTORS: - return SelectConcatVector(N); + SelectConcatVector(N); + return; + + case ISD::ATOMIC_CMP_SWAP: + SelectCMP_SWAP(N); + return; } - return SelectCode(N); + SelectCode(N); } // Inspect a register string of the form @@ -3362,8 +3761,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { // and obtain the integer operands from them, adding these operands to the // provided vector. static void getIntOperandsFromRegisterString(StringRef RegString, - SelectionDAG *CurDAG, SDLoc DL, - std::vector<SDValue>& Ops) { + SelectionDAG *CurDAG, + const SDLoc &DL, + std::vector<SDValue> &Ops) { SmallVector<StringRef, 5> Fields; RegString.split(Fields, ':'); @@ -3444,6 +3844,9 @@ static inline int getMClassRegisterSYSmValueMask(StringRef RegString) { .Case("basepri_max", 0x12) .Case("faultmask", 0x13) .Case("control", 0x14) + .Case("msplim", 0x0a) + .Case("psplim", 0x0b) + .Case("sp", 0x18) .Default(-1); } @@ -3473,11 +3876,27 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead, if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13) return -1; + if (Subtarget->has8MSecExt() && Flags.lower() == "ns") { + Flags = ""; + SYSmvalue |= 0x80; + } + + if (!Subtarget->has8MSecExt() && + (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14)) + return -1; + + if (!Subtarget->hasV8MMainlineOps() && + (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 || + SYSmvalue == 0x93)) + return -1; + // If it was a read then we won't be expecting flags and so at this point // we can return the mask. if (IsRead) { - assert (Flags.empty() && "Unexpected flags for reading M class register."); - return SYSmvalue; + if (Flags.empty()) + return SYSmvalue; + else + return -1; } // We know we are now handling a write so need to get the mask for the flags. @@ -3563,7 +3982,7 @@ static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { // Lower the read_register intrinsic to ARM specific DAG nodes // using the supplied metadata string to select the instruction node to use // and the registers/masks to construct as operands for the node. -SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){ +bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); bool IsThumb2 = Subtarget->isThumb2(); @@ -3592,7 +4011,8 @@ SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){ Ops.push_back(getAL(CurDAG, DL)); Ops.push_back(CurDAG->getRegister(0, MVT::i32)); Ops.push_back(N->getOperand(0)); - return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops); + ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); + return true; } std::string SpecialReg = RegString->getString().lower(); @@ -3602,8 +4022,10 @@ SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){ Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; - return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, - DL, MVT::i32, MVT::Other, Ops); + ReplaceNode( + N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, + DL, MVT::i32, MVT::Other, Ops)); + return true; } // The VFP registers are read by creating SelectionDAG nodes with opcodes @@ -3623,27 +4045,37 @@ SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){ // If an opcode was found then we can lower the read to a VFP instruction. if (Opcode) { if (!Subtarget->hasVFP2()) - return nullptr; + return false; if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8()) - return nullptr; + return false; Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; - return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops); + ReplaceNode(N, + CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); + return true; } // If the target is M Class then need to validate that the register string // is an acceptable value, so check that a mask can be constructed from the // string. if (Subtarget->isMClass()) { - int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget); + StringRef Flags = "", Reg = SpecialReg; + if (Reg.endswith("_ns")) { + Flags = "ns"; + Reg = Reg.drop_back(3); + } + + int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget); if (SYSmValue == -1) - return nullptr; + return false; SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; - return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops); + ReplaceNode( + N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); + return true; } // Here we know the target is not M Class so we need to check if it is one @@ -3651,24 +4083,27 @@ SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){ if (SpecialReg == "apsr" || SpecialReg == "cpsr") { Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; - return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL, - MVT::i32, MVT::Other, Ops); + ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, + DL, MVT::i32, MVT::Other, Ops)); + return true; } if (SpecialReg == "spsr") { Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; - return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, - DL, MVT::i32, MVT::Other, Ops); + ReplaceNode( + N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, + MVT::i32, MVT::Other, Ops)); + return true; } - return nullptr; + return false; } // Lower the write_register intrinsic to ARM specific DAG nodes // using the supplied metadata string to select the instruction node to use // and the registers/masks to use in the nodes -SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){ +bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); bool IsThumb2 = Subtarget->isThumb2(); @@ -3698,7 +4133,8 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); Ops.push_back(N->getOperand(0)); - return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); + ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); + return true; } std::string SpecialReg = RegString->getString().lower(); @@ -3707,8 +4143,10 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){ Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; - return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, - DL, MVT::Other, Ops); + ReplaceNode( + N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, + DL, MVT::Other, Ops)); + return true; } // The VFP registers are written to by creating SelectionDAG nodes with @@ -3724,16 +4162,17 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){ if (Opcode) { if (!Subtarget->hasVFP2()) - return nullptr; + return false; Ops = { N->getOperand(2), getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; - return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); + ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); + return true; } - SmallVector<StringRef, 5> Fields; - StringRef(SpecialReg).split(Fields, '_', 1, false); - std::string Reg = Fields[0].str(); - StringRef Flags = Fields.size() == 2 ? Fields[1] : ""; + std::pair<StringRef, StringRef> Fields; + Fields = StringRef(SpecialReg).rsplit('_'); + std::string Reg = Fields.first.str(); + StringRef Flags = Fields.second; // If the target was M Class then need to validate the special register value // and retrieve the mask for use in the instruction node. @@ -3745,12 +4184,13 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){ } int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget); if (SYSmValue == -1) - return nullptr; + return false; SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), N->getOperand(2), getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; - return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops); + ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); + return true; } // We then check to see if a valid mask can be constructed for one of the @@ -3761,14 +4201,15 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){ Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; - return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, - DL, MVT::Other, Ops); + ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, + DL, MVT::Other, Ops)); + return true; } - return nullptr; + return false; } -SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ +bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ std::vector<SDValue> AsmNodeOperands; unsigned Flag, Kind; bool Changed = false; @@ -3823,6 +4264,17 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) IsTiedToChangedOp = OpChanged[DefIdx]; + // Memory operands to inline asm in the SelectionDAG are modeled with two + // operands: a constant of value InlineAsm::Kind_Mem followed by the input + // operand. If we get here and we have a Kind_Mem, skip the next operand (so + // it doesn't get misinterpreted), and continue. We do this here because + // it's important to update the OpChanged array correctly before moving on. + if (Kind == InlineAsm::Kind_Mem) { + SDValue op = N->getOperand(++i); + AsmNodeOperands.push_back(op); + continue; + } + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef && Kind != InlineAsm::Kind_RegDefEarlyClobber) continue; @@ -3912,12 +4364,13 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ if (Glue.getNode()) AsmNodeOperands.push_back(Glue); if (!Changed) - return nullptr; + return false; SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); New->setNodeId(-1); - return New.getNode(); + ReplaceNode(N, New.getNode()); + return true; } diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index 978e99cf511e..d6e7caf98a80 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -65,6 +65,13 @@ ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); +// Disabled for causing self-hosting failures once returned-attribute inference +// was enabled. +static cl::opt<bool> +EnableThisRetForwarding("arm-this-return-forwarding", cl::Hidden, + cl::desc("Directly forward this return"), + cl::init(false)); + namespace { class ARMCCState : public CCState { public: @@ -240,7 +247,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // Set the correct calling convention for ARMv7k WatchOS. It's just // AAPCS_VFP for functions as simple as libcalls. - if (Subtarget->isTargetWatchOS()) { + if (Subtarget->isTargetWatchABI()) { for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP); } @@ -254,7 +261,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // RTLIB if (Subtarget->isAAPCS_ABI() && (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || - Subtarget->isTargetAndroid())) { + Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) { static const struct { const RTLIB::Libcall Op; const char * const Name; @@ -390,10 +397,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, - { RTLIB::SDIV_I32, "__rt_sdiv", CallingConv::ARM_AAPCS_VFP }, - { RTLIB::UDIV_I32, "__rt_udiv", CallingConv::ARM_AAPCS_VFP }, - { RTLIB::SDIV_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS_VFP }, - { RTLIB::UDIV_I64, "__rt_udiv64", CallingConv::ARM_AAPCS_VFP }, }; for (const auto &LC : LibraryCalls) { @@ -410,17 +413,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); } - // The half <-> float conversion functions are always soft-float, but are - // needed for some targets which use a hard-float calling convention by - // default. - if (Subtarget->isAAPCS_ABI()) { - setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); - } else { - setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); - setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); - setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); + // The half <-> float conversion functions are always soft-float on + // non-watchos platforms, but are needed for some targets which use a + // hard-float calling convention by default. + if (!Subtarget->isTargetWatchABI()) { + if (Subtarget->isAAPCS_ABI()) { + setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); + } else { + setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); + setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); + setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); + } } // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have @@ -581,6 +586,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); + setOperationAction(ISD::CTPOP, MVT::v1i64, Expand); + setOperationAction(ISD::CTPOP, MVT::v2i64, Expand); + + setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); + setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); // NEON does not have single instruction CTTZ for vectors. setOperationAction(ISD::CTTZ, MVT::v8i8, Custom); @@ -712,6 +722,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setIndexedStoreAction(im, MVT::i16, Legal); setIndexedStoreAction(im, MVT::i32, Legal); } + } else { + // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}. + setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); } setOperationAction(ISD::SADDO, MVT::i32, Custom); @@ -758,10 +772,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) setOperationAction(ISD::CTLZ, MVT::i32, Expand); - // These just redirect to CTTZ and CTLZ on ARM. - setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); - // @llvm.readcyclecounter requires the Performance Monitors extension. // Default to the 0 expansion on unsupported platforms. // FIXME: Technically there are older ARM CPUs that have @@ -773,19 +783,30 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); - if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && - !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { + bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide() + : Subtarget->hasDivideInARMMode(); + if (!hasDivide) { // These are expanded into libcalls if the cpu doesn't have HW divider. setOperationAction(ISD::SDIV, MVT::i32, LibCall); setOperationAction(ISD::UDIV, MVT::i32, LibCall); } + if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) { + setOperationAction(ISD::SDIV, MVT::i32, Custom); + setOperationAction(ISD::UDIV, MVT::i32, Custom); + + setOperationAction(ISD::SDIV, MVT::i64, Custom); + setOperationAction(ISD::UDIV, MVT::i64, Custom); + } + setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); // Register based DivRem for AEABI (RTABI 4.2) - if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) { + if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || + Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) { setOperationAction(ISD::SREM, MVT::i64, Custom); setOperationAction(ISD::UREM, MVT::i64, Custom); + HasStandaloneRem = false; setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); @@ -807,6 +828,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SDIVREM, MVT::i32, Custom); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + setOperationAction(ISD::SDIVREM, MVT::i64, Custom); + setOperationAction(ISD::UDIVREM, MVT::i64, Custom); } else { setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); @@ -833,21 +856,21 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use - // the default expansion. If we are targeting a single threaded system, - // then set them all for expand so we can lower them later into their - // non-atomic form. - if (TM.Options.ThreadModel == ThreadModel::Single) - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { + // the default expansion. + InsertFencesForAtomic = false; + if (Subtarget->hasAnyDataBarrier() && + (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) { // ATOMIC_FENCE needs custom lowering; the others should have been expanded // to ldrex/strex loops already. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + if (!Subtarget->isThumb() || !Subtarget->isMClass()) + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); // On v8, we have particularly efficient implementations of atomic fences // if they can be combined with nearby atomic loads and stores. - if (!Subtarget->hasV8Ops()) { + if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) { // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. - setInsertFencesForAtomic(true); + InsertFencesForAtomic = true; } } else { // If there's anything we can use as a barrier, go through custom lowering @@ -909,6 +932,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + // Thumb-1 cannot currently select ARMISD::SUBE. + if (!Subtarget->isThumb1Only()) + setOperationAction(ISD::SETCCE, MVT::i32, Custom); + setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Custom); @@ -956,7 +983,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->isTargetWatchOS()) { + if (Subtarget->isTargetWatchABI()) { setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP); setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP); } @@ -1039,7 +1066,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setMinStackArgumentAlignment(4); // Prefer likely predicted branches to selects on out-of-order cores. - PredictableSelectIsExpensive = Subtarget->isLikeA9(); + PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } @@ -1106,7 +1133,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CALL: return "ARMISD::CALL"; case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; - case ARMISD::tCALL: return "ARMISD::tCALL"; case ARMISD::BRCOND: return "ARMISD::BRCOND"; case ARMISD::BR_JT: return "ARMISD::BR_JT"; case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; @@ -1123,6 +1149,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMOV: return "ARMISD::CMOV"; + case ARMISD::SSAT: return "ARMISD::SSAT"; + case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; @@ -1199,6 +1227,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VTBL2: return "ARMISD::VTBL2"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; + case ARMISD::UMAAL: return "ARMISD::UMAAL"; case ARMISD::UMLAL: return "ARMISD::UMLAL"; case ARMISD::SMLAL: return "ARMISD::SMLAL"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; @@ -1373,7 +1402,10 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, case CallingConv::ARM_APCS: case CallingConv::GHC: return CC; + case CallingConv::PreserveMost: + return CallingConv::PreserveMost; case CallingConv::ARM_AAPCS_VFP: + case CallingConv::Swift: return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) @@ -1415,18 +1447,18 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); case CallingConv::GHC: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); + case CallingConv::PreserveMost: + return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); } } /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. -SDValue -ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals, - bool isThisReturn, SDValue ThisVal) const { +SDValue ARMTargetLowering::LowerCallResult( + SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, + SDValue ThisVal) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -1442,7 +1474,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Pass 'this' value directly from the argument to return value, to avoid // reg unit interference - if (i == 0 && isThisReturn) { + if (i == 0 && isThisReturn && EnableThisRetForwarding) { assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && "unexpected return calling convention register assignment"); InVals.push_back(ThisVal); @@ -1506,23 +1538,21 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, } /// LowerMemOpCallTo - Store the argument to the stack. -SDValue -ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, - SDValue StackPtr, SDValue Arg, - SDLoc dl, SelectionDAG &DAG, - const CCValAssign &VA, - ISD::ArgFlagsTy Flags) const { +SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, + SDValue Arg, const SDLoc &dl, + SelectionDAG &DAG, + const CCValAssign &VA, + ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), StackPtr, PtrOff); return DAG.getStore( Chain, dl, Arg, PtrOff, - MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset), - false, false, 0); + MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset)); } -void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, +void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, @@ -1704,7 +1734,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), - false, false, false, DAG.InferPtrAlignment(AddArg)); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); @@ -1780,20 +1809,27 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. bool isDirect = false; - bool isARMFunc = false; + + const TargetMachine &TM = getTargetMachine(); + const Module *Mod = MF.getFunction()->getParent(); + const GlobalValue *GV = nullptr; + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + GV = G->getGlobal(); + bool isStub = + !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO(); + + bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); bool isLocalARMFunc = false; ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); auto PtrVt = getPointerTy(DAG.getDataLayout()); if (Subtarget->genLongCalls()) { - assert((Subtarget->isTargetWindows() || - getTargetMachine().getRelocationModel() == Reloc::Static) && - "long-calls with non-static relocation model!"); + assert((!isPositionIndependent() || Subtarget->isTargetWindows()) && + "long-calls codegen is not position independent!"); // Handle a global address or an external symbol. If it's not one of // those, the target's already in a register, so we don't need to do // anything extra. - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - const GlobalValue *GV = G->getGlobal(); + if (isa<GlobalAddressSDNode>(Callee)) { // Create a constant pool entry for the callee address unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = @@ -1804,8 +1840,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { const char *Sym = S->getSymbol(); @@ -1819,54 +1854,55 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); - } - } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - const GlobalValue *GV = G->getGlobal(); - isDirect = true; - bool isDef = GV->isStrongDefinitionForLinker(); - bool isStub = (!isDef && Subtarget->isTargetMachO()) && - getTargetMachine().getRelocationModel() != Reloc::Static; - isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); - // ARM call to a local ARM function is predicable. - isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); - // tBX takes a register source operand. - if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { - assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); - Callee = DAG.getNode( - ARMISD::WrapperPIC, dl, PtrVt, - DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); - Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, true, 0); - } else if (Subtarget->isTargetCOFF()) { - assert(Subtarget->isTargetWindows() && - "Windows is the only supported COFF target"); - unsigned TargetFlags = GV->hasDLLImportStorageClass() - ? ARMII::MO_DLLIMPORT - : ARMII::MO_NO_FLAG; - Callee = - DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags); - if (GV->hasDLLImportStorageClass()) + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + } + } else if (isa<GlobalAddressSDNode>(Callee)) { + // If we're optimizing for minimum size and the function is called three or + // more times in this block, we can improve codesize by calling indirectly + // as BLXr has a 16-bit encoding. + auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); + auto *BB = CLI.CS->getParent(); + bool PreferIndirect = + Subtarget->isThumb() && MF.getFunction()->optForMinSize() && + std::count_if(GV->user_begin(), GV->user_end(), [&BB](const User *U) { + return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB; + }) > 2; + + if (!PreferIndirect) { + isDirect = true; + bool isDef = GV->isStrongDefinitionForLinker(); + + // ARM call to a local ARM function is predicable. + isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); + // tBX takes a register source operand. + if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { + assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); + Callee = DAG.getNode( + ARMISD::WrapperPIC, dl, PtrVt, + DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); Callee = - DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), - DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), + DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, false, 0); - } else { - // On ELF targets for PIC code, direct calls should go through the PLT - unsigned OpFlags = 0; - if (Subtarget->isTargetELF() && - getTargetMachine().getRelocationModel() == Reloc::PIC_) - OpFlags = ARMII::MO_PLT; - Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, OpFlags); + /* Alignment = */ 0, MachineMemOperand::MOInvariant); + } else if (Subtarget->isTargetCOFF()) { + assert(Subtarget->isTargetWindows() && + "Windows is the only supported COFF target"); + unsigned TargetFlags = GV->hasDLLImportStorageClass() + ? ARMII::MO_DLLIMPORT + : ARMII::MO_NO_FLAG; + Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, + TargetFlags); + if (GV->hasDLLImportStorageClass()) + Callee = + DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), + DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), + MachinePointerInfo::getGOT(DAG.getMachineFunction())); + } else { + Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0); + } } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { isDirect = true; - bool isStub = Subtarget->isTargetMachO() && - getTargetMachine().getRelocationModel() != Reloc::Static; - isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); // tBX takes a register source operand. const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { @@ -1878,17 +1914,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel); } else { - unsigned OpFlags = 0; - // On ELF targets for PIC code, direct calls should go through the PLT - if (Subtarget->isTargetELF() && - getTargetMachine().getRelocationModel() == Reloc::PIC_) - OpFlags = ARMII::MO_PLT; - Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, OpFlags); + Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0); } } @@ -1898,11 +1928,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; else - CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; + CallOpc = ARMISD::CALL; } else { if (!isDirect && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; - else if (doesNotRet && isDirect && Subtarget->hasRAS() && + else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && // Emit regular call when code size is the priority !MF.getFunction()->optForMinSize()) // "mov lr, pc; b _foo" to avoid confusing the RSP @@ -2042,7 +2072,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, if (!Def) return false; if (!Flags.isByVal()) { - if (!TII->isLoadFromStackSlot(Def, FI)) + if (!TII->isLoadFromStackSlot(*Def, FI)) return false; } else { return false; @@ -2082,9 +2112,9 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const { - const Function *CallerF = DAG.getMachineFunction().getFunction(); + MachineFunction &MF = DAG.getMachineFunction(); + const Function *CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF->getCallingConv(); - bool CCMatch = CallerCC == CalleeCC; assert(Subtarget->supportsTailCall()); @@ -2122,41 +2152,25 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, return false; } - // If the calling conventions do not match, then we'd better make sure the - // results are returned in the same way as what the caller expects. - if (!CCMatch) { - SmallVector<CCValAssign, 16> RVLocs1; - ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1, - *DAG.getContext(), Call); - CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); - - SmallVector<CCValAssign, 16> RVLocs2; - ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2, - *DAG.getContext(), Call); - CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); - - if (RVLocs1.size() != RVLocs2.size()) + // Check that the call results are passed in the same way. + LLVMContext &C = *DAG.getContext(); + if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, + CCAssignFnForNode(CalleeCC, true, isVarArg), + CCAssignFnForNode(CallerCC, true, isVarArg))) + return false; + // The callee has to preserve all registers the caller needs to preserve. + const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); + if (CalleeCC != CallerCC) { + const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); + if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) return false; - for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { - if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) - return false; - if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) - return false; - if (RVLocs1[i].isRegLoc()) { - if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) - return false; - } else { - if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) - return false; - } - } } // If Caller's vararg or byval argument has been split between registers and // stack, do not perform tail call, since part of the argument is in caller's // local frame. - const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). - getInfo<ARMFunctionInfo>(); + const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>(); if (AFI_Caller->getArgRegsSaveSize()) return false; @@ -2166,13 +2180,10 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // Check if stack adjustment is needed. For now, do not do this if any // argument is passed on the stack. SmallVector<CCValAssign, 16> ArgLocs; - ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs, - *DAG.getContext(), Call); + ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC, false, isVarArg)); if (CCInfo.getNextStackOffset()) { - MachineFunction &MF = DAG.getMachineFunction(); - // Check if the arguments are already laid out in the right way as // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -2209,6 +2220,10 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } } + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) + return false; } return true; @@ -2226,7 +2241,7 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, } static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, - SDLoc DL, SelectionDAG &DAG) { + const SDLoc &DL, SelectionDAG &DAG) { const MachineFunction &MF = DAG.getMachineFunction(); const Function *F = MF.getFunction(); @@ -2259,11 +2274,11 @@ static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, } SDValue -ARMTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, +ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - SDLoc dl, SelectionDAG &DAG) const { + const SDLoc &dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. SmallVector<CCValAssign, 16> RVLocs; @@ -2521,9 +2536,9 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SDLoc DL(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; - if (RelocM == Reloc::Static) { + bool IsPositionIndependent = isPositionIndependent(); + if (!IsPositionIndependent) { CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); } else { unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; @@ -2534,11 +2549,10 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); - SDValue Result = - DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - false, false, false, 0); - if (RelocM == Reloc::Static) + SDValue Result = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + if (!IsPositionIndependent) return Result; SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); @@ -2584,7 +2598,8 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, SDValue FuncTLVGet = DAG.getLoad(MVT::i32, DL, Chain, DescAddr, MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, true, true, 4); + /* Alignment = */ 4, MachineMemOperand::MONonTemporal | + MachineMemOperand::MOInvariant); Chain = FuncTLVGet.getValue(1); MachineFunction &F = DAG.getMachineFunction(); @@ -2610,6 +2625,61 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1)); } +SDValue +ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering"); + + SDValue Chain = DAG.getEntryNode(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDLoc DL(Op); + + // Load the current TEB (thread environment block) + SDValue Ops[] = {Chain, + DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), + DAG.getConstant(15, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(13, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(2, DL, MVT::i32)}; + SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, + DAG.getVTList(MVT::i32, MVT::Other), Ops); + + SDValue TEB = CurrentTEB.getValue(0); + Chain = CurrentTEB.getValue(1); + + // Load the ThreadLocalStoragePointer from the TEB + // A pointer to the TLS array is located at offset 0x2c from the TEB. + SDValue TLSArray = + DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL)); + TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo()); + + // The pointer to the thread's TLS data area is at the TLS Index scaled by 4 + // offset into the TLSArray. + + // Load the TLS index from the C runtime + SDValue TLSIndex = + DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG); + TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex); + TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo()); + + SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, + DAG.getConstant(2, DL, MVT::i32)); + SDValue TLS = DAG.getLoad(PtrVT, DL, Chain, + DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot), + MachinePointerInfo()); + + // Get the offset of the start of the .tls section (section base) + const auto *GA = cast<GlobalAddressSDNode>(Op); + auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL); + SDValue Offset = DAG.getLoad( + PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32, + DAG.getTargetConstantPool(CPV, PtrVT, 4)), + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + + return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset); +} + // Lower ISD::GlobalTLSAddress using the "general dynamic" model SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, @@ -2625,10 +2695,9 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); - Argument = - DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - false, false, false, 0); + Argument = DAG.getLoad( + PtrVT, dl, DAG.getEntryNode(), Argument, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue Chain = Argument.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); @@ -2645,8 +2714,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), - DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args), - 0); + DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args)); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.first; @@ -2680,8 +2748,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); Chain = Offset.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); @@ -2689,8 +2756,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } else { // local exec model assert(model == TLSModel::LocalExec); @@ -2700,8 +2766,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } // The address of the thread local variable is the add of the thread @@ -2714,6 +2779,9 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { if (Subtarget->isTargetDarwin()) return LowerGlobalTLSAddressDarwin(Op, DAG); + if (Subtarget->isTargetWindows()) + return LowerGlobalTLSAddressWindows(Op, DAG); + // TODO: implement the "local dynamic" model assert(Subtarget->isTargetELF() && "Only ELF implemented here"); GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); @@ -2738,9 +2806,9 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { - bool UseGOT_PREL = - !(GV->hasHiddenVisibility() || GV->hasLocalLinkage()); + const TargetMachine &TM = getTargetMachine(); + if (isPositionIndependent()) { + bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -2756,15 +2824,14 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue Chain = Result.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); if (UseGOT_PREL) - Result = DAG.getLoad(PtrVT, dl, Chain, Result, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, false, 0); + Result = + DAG.getLoad(PtrVT, dl, Chain, Result, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } @@ -2781,8 +2848,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); return DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } } @@ -2791,7 +2857,6 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - Reloc::Model RelocM = getTargetMachine().getRelocationModel(); if (Subtarget->useMovt(DAG.getMachineFunction())) ++NumMovwMovt; @@ -2799,15 +2864,14 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into multiple nodes unsigned Wrapper = - RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper; + isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper; SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); - if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) + if (Subtarget->isGVIndirectSymbol(GV)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } @@ -2833,8 +2897,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, TargetFlags)); if (GV->hasDLLImportStorageClass()) Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } @@ -2873,7 +2936,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, "RBIT intrinsic must have i32 type!"); return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1)); } - case Intrinsic::arm_thread_pointer: { + case Intrinsic::thread_pointer: { EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); } @@ -2882,10 +2945,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); - Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; - unsigned PCAdj = (RelocM != Reloc::PIC_) - ? 0 : (Subtarget->isThumb() ? 4 : 8); + bool IsPositionIndependent = isPositionIndependent(); + unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0; ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, ARMCP::CPLSDA, PCAdj); @@ -2893,10 +2955,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); - if (RelocM == Reloc::PIC_) { + if (IsPositionIndependent) { SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } @@ -2962,7 +3023,8 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, if (Subtarget->isMClass()) { // Only a full system barrier exists in the M-class architectures. Domain = ARM_MB::SY; - } else if (Subtarget->isSwift() && Ord == Release) { + } else if (Subtarget->preferISHSTBarriers() && + Ord == AtomicOrdering::Release) { // Swift happens to implement ISHST barriers in a way that's compatible with // Release semantics but weaker than ISH so we'd be fools not to use // it. Beware: other processors probably don't! @@ -3012,13 +3074,14 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), - MachinePointerInfo(SV), false, false, 0); + MachinePointerInfo(SV)); } -SDValue -ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, - SDValue &Root, SelectionDAG &DAG, - SDLoc dl) const { +SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, + CCValAssign &NextVA, + SDValue &Root, + SelectionDAG &DAG, + const SDLoc &dl) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -3041,8 +3104,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); ArgValue2 = DAG.getLoad( MVT::i32, dl, Root, FIN, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false, - false, false, 0); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); } else { Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); @@ -3060,13 +3122,11 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, // these values; otherwise, this reassembles a (byval) structure that // was split between registers and memory. // Return: The frame index registers were stored into. -int -ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, - SDLoc dl, SDValue &Chain, - const Value *OrigArg, - unsigned InRegsParamRecordIdx, - int ArgOffset, - unsigned ArgSize) const { +int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, + const SDLoc &dl, SDValue &Chain, + const Value *OrigArg, + unsigned InRegsParamRecordIdx, + int ArgOffset, unsigned ArgSize) const { // Currently, two use-cases possible: // Case #1. Non-var-args function, and we meet first byval parameter. // Setup first unallocated register as first byval register; @@ -3104,9 +3164,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) { unsigned VReg = MF.addLiveIn(Reg, RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); - SDValue Store = - DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(OrigArg, 4 * i), false, false, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(OrigArg, 4 * i)); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT)); } @@ -3117,17 +3176,16 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, } // Setup stack frame, the va_list pointer will start from. -void -ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, - SDLoc dl, SDValue &Chain, - unsigned ArgOffset, - unsigned TotalArgRegsSaveSize, - bool ForceMutable) const { +void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, + const SDLoc &dl, SDValue &Chain, + unsigned ArgOffset, + unsigned TotalArgRegsSaveSize, + bool ForceMutable) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // Try to store any remaining integer argument regs - // to their spots on the stack so that they may be loaded by deferencing + // to their spots on the stack so that they may be loaded by dereferencing // the result of va_next. // If there is no regs to be stored, just point address after last // argument passed via stack. @@ -3137,14 +3195,10 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, AFI->setVarArgsFrameIndex(FrameIndex); } -SDValue -ARMTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> - &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) - const { +SDValue ARMTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -3226,10 +3280,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, if (VA.isMemLoc()) { int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgValue2 = DAG.getLoad( - MVT::f64, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), - false, false, false, 0); + ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI)); } else { ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); @@ -3322,10 +3375,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - InVals.push_back(DAG.getLoad( - VA.getValVT(), dl, Chain, FIN, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), - false, false, false, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI))); } lastInsIndex = index; } @@ -3369,10 +3421,9 @@ static bool isFloatingPointZero(SDValue Op) { /// Returns appropriate ARM CMP (cmp) and corresponding condition code for /// the given operands. -SDValue -ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMcc, SelectionDAG &DAG, - SDLoc dl) const { +SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &ARMcc, SelectionDAG &DAG, + const SDLoc &dl) const { if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); if (!isLegalICmpImmediate(C)) { @@ -3428,9 +3479,8 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. -SDValue -ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - SDLoc dl) const { +SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, + SelectionDAG &DAG, const SDLoc &dl) const { assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64); SDValue Cmp; if (!isFloatingPointZero(RHS)) @@ -3647,7 +3697,7 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, } } -SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, +SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, SDValue ARMcc, SDValue CCR, SDValue Cmp, SelectionDAG &DAG) const { if (Subtarget->isFPOnlySP() && VT == MVT::f64) { @@ -3673,14 +3723,149 @@ SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, } } +static bool isGTorGE(ISD::CondCode CC) { + return CC == ISD::SETGT || CC == ISD::SETGE; +} + +static bool isLTorLE(ISD::CondCode CC) { + return CC == ISD::SETLT || CC == ISD::SETLE; +} + +// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating. +// All of these conditions (and their <= and >= counterparts) will do: +// x < k ? k : x +// x > k ? x : k +// k < x ? x : k +// k > x ? k : x +static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, + const SDValue TrueVal, const SDValue FalseVal, + const ISD::CondCode CC, const SDValue K) { + return (isGTorGE(CC) && + ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) || + (isLTorLE(CC) && + ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))); +} + +// Similar to isLowerSaturate(), but checks for upper-saturating conditions. +static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, + const SDValue TrueVal, const SDValue FalseVal, + const ISD::CondCode CC, const SDValue K) { + return (isGTorGE(CC) && + ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) || + (isLTorLE(CC) && + ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))); +} + +// Check if two chained conditionals could be converted into SSAT. +// +// SSAT can replace a set of two conditional selectors that bound a number to an +// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples: +// +// x < -k ? -k : (x > k ? k : x) +// x < -k ? -k : (x < k ? x : k) +// x > -k ? (x > k ? k : x) : -k +// x < k ? (x < -k ? -k : x) : k +// etc. +// +// It returns true if the conversion can be done, false otherwise. +// Additionally, the variable is returned in parameter V and the constant in K. +static bool isSaturatingConditional(const SDValue &Op, SDValue &V, + uint64_t &K) { + + SDValue LHS1 = Op.getOperand(0); + SDValue RHS1 = Op.getOperand(1); + SDValue TrueVal1 = Op.getOperand(2); + SDValue FalseVal1 = Op.getOperand(3); + ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + + const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1; + if (Op2.getOpcode() != ISD::SELECT_CC) + return false; + + SDValue LHS2 = Op2.getOperand(0); + SDValue RHS2 = Op2.getOperand(1); + SDValue TrueVal2 = Op2.getOperand(2); + SDValue FalseVal2 = Op2.getOperand(3); + ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get(); + + // Find out which are the constants and which are the variables + // in each conditional + SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1) + ? &RHS1 + : NULL; + SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2) + ? &RHS2 + : NULL; + SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2; + SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1; + SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2; + SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2; + + // We must detect cases where the original operations worked with 16- or + // 8-bit values. In such case, V2Tmp != V2 because the comparison operations + // must work with sign-extended values but the select operations return + // the original non-extended value. + SDValue V2TmpReg = V2Tmp; + if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG) + V2TmpReg = V2Tmp->getOperand(0); + + // Check that the registers and the constants have the correct values + // in both conditionals + if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp || + V2TmpReg != V2) + return false; + + // Figure out which conditional is saturating the lower/upper bound. + const SDValue *LowerCheckOp = + isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) + ? &Op + : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 + : NULL; + const SDValue *UpperCheckOp = + isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) + ? &Op + : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 + : NULL; + + if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp) + return false; + + // Check that the constant in the lower-bound check is + // the opposite of the constant in the upper-bound check + // in 1's complement. + int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue(); + int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue(); + int64_t PosVal = std::max(Val1, Val2); + + if (((Val1 > Val2 && UpperCheckOp == &Op) || + (Val1 < Val2 && UpperCheckOp == &Op2)) && + Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) { + + V = V2; + K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive + return true; + } + + return false; +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc dl(Op); + + // Try to convert two saturating conditional selects into a single SSAT + SDValue SatValue; + uint64_t SatConstant; + if (isSaturatingConditional(Op, SatValue, SatConstant)) + return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue, + DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); + SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); - SDLoc dl(Op); if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, @@ -3781,10 +3966,9 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { return DAG.getConstant(0, SDLoc(Op), MVT::i32); if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) - return DAG.getLoad(MVT::i32, SDLoc(Op), - Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), - Ld->isVolatile(), Ld->isNonTemporal(), - Ld->isInvariant(), Ld->getAlignment()); + return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(), + Ld->getPointerInfo(), Ld->getAlignment(), + Ld->getMemOperand()->getFlags()); llvm_unreachable("Unknown VFP cmp argument!"); } @@ -3801,21 +3985,17 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { SDValue Ptr = Ld->getBasePtr(); - RetVal1 = DAG.getLoad(MVT::i32, dl, - Ld->getChain(), Ptr, - Ld->getPointerInfo(), - Ld->isVolatile(), Ld->isNonTemporal(), - Ld->isInvariant(), Ld->getAlignment()); + RetVal1 = + DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), + Ld->getAlignment(), Ld->getMemOperand()->getFlags()); EVT PtrType = Ptr.getValueType(); unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); SDValue NewPtr = DAG.getNode(ISD::ADD, dl, PtrType, Ptr, DAG.getConstant(4, dl, PtrType)); - RetVal2 = DAG.getLoad(MVT::i32, dl, - Ld->getChain(), NewPtr, - Ld->getPointerInfo().getWithOffset(4), - Ld->isVolatile(), Ld->isNonTemporal(), - Ld->isInvariant(), NewAlign); + RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr, + Ld->getPointerInfo().getWithOffset(4), NewAlign, + Ld->getMemOperand()->getFlags()); return; } @@ -3908,8 +4088,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { if (getTargetMachine().Options.UnsafeFPMath && (CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE || CC == ISD::SETUNE)) { - SDValue Result = OptimizeVFPBrcond(Op, DAG); - if (Result.getNode()) + if (SDValue Result = OptimizeVFPBrcond(Op, DAG)) return Result; } @@ -3950,19 +4129,17 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, Addr, Op.getOperand(2), JTI); } - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { + if (isPositionIndependent()) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, - MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), - false, false, false, 0); + MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); } else { Addr = DAG.getLoad(PTy, dl, Chain, Addr, - MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), - false, false, false, 0); + MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); } @@ -4156,7 +4333,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ SDValue Offset = DAG.getConstant(4, dl, MVT::i32); return DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), - MachinePointerInfo(), false, false, false, 0); + MachinePointerInfo()); } // Return LR, which contains the return address. Mark it an implicit live-in. @@ -4178,8 +4355,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, - MachinePointerInfo(), - false, false, false, 0); + MachinePointerInfo()); return FrameAddr; } @@ -4322,7 +4498,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { /// not support i64 elements, so sometimes the zero vectors will need to be /// explicitly constructed. Regardless, use a canonical VMOV to create the /// zero vector. -static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) { +static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) { assert(VT.isVector() && "Expected a vector type"); // The canonical modified immediate encoding of a zero vector is....0! SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32); @@ -4826,12 +5002,36 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { return Result; } +static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue Carry = Op.getOperand(2); + SDValue Cond = Op.getOperand(3); + SDLoc DL(Op); + + assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only."); + + assert(Carry.getOpcode() != ISD::CARRY_FALSE); + SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); + SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry); + + SDValue FVal = DAG.getConstant(0, DL, MVT::i32); + SDValue TVal = DAG.getConstant(1, DL, MVT::i32); + SDValue ARMcc = DAG.getConstant( + IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR, + Cmp.getValue(1), SDValue()); + return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc, + CCR, Chain.getValue(1)); +} + /// isNEONModifiedImm - Check if the specified splat value corresponds to a /// valid vector constant for a NEON instruction with a "modified immediate" /// operand (e.g., VMOV). If so, return the encoded value. static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, - SDLoc dl, EVT &VT, bool is128Bits, + const SDLoc &dl, EVT &VT, bool is128Bits, NEONModImmType type) { unsigned OpCmode, Imm; @@ -4979,7 +5179,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, return SDValue(); // Try splatting with a VMOV.f32... - APFloat FPVal = CFP->getValueAPF(); + const APFloat &FPVal = CFP->getValueAPF(); int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); if (ImmVal != -1) { @@ -5421,7 +5621,7 @@ static bool isReverseMask(ArrayRef<int> M, EVT VT) { // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, - const ARMSubtarget *ST, SDLoc dl) { + const ARMSubtarget *ST, const SDLoc &dl) { uint64_t Val; if (!isa<ConstantSDNode>(N)) return SDValue(); @@ -5502,7 +5702,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SDValue Value; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) + if (V.isUndef()) continue; if (i > 0) isOnlyLowElement = false; @@ -5585,7 +5785,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); - SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); + SDValue Val = DAG.getBuildVector(VecVT, dl, Ops); Val = LowerBUILD_VECTOR(Val, DAG, ST); if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); @@ -5635,7 +5835,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SDValue Vec = DAG.getUNDEF(VT); for (unsigned i = 0 ; i < NumElts; ++i) { SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) + if (V.isUndef()) continue; SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); @@ -5681,7 +5881,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, SmallVector<ShuffleSourceInfo, 2> Sources; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) + if (V.isUndef()) continue; else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { // A shuffle can only come from building a vector from various @@ -5808,7 +6008,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits(); for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { SDValue Entry = Op.getOperand(i); - if (Entry.getOpcode() == ISD::UNDEF) + if (Entry.isUndef()) continue; auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0)); @@ -5845,7 +6045,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, ShuffleOps[i] = Sources[i].ShuffleVec; SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], - ShuffleOps[1], &Mask[0]); + ShuffleOps[1], Mask); return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); } @@ -5895,7 +6095,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, - SDLoc dl) { + const SDLoc &dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); @@ -5982,12 +6182,12 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32)); - if (V2.getNode()->getOpcode() == ISD::UNDEF) + if (V2.getNode()->isUndef()) return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); + DAG.getBuildVector(MVT::v8i8, DL, VTBLMask)); return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); + DAG.getBuildVector(MVT::v8i8, DL, VTBLMask)); } static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, @@ -6024,7 +6224,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { unsigned EltSize = VT.getVectorElementType().getSizeInBits(); if (EltSize <= 32) { - if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { + if (SVN->isSplat()) { int Lane = SVN->getSplatIndex(); // If this is undef splat, generate it via "just" vdup, if possible. if (Lane == -1) Lane = 0; @@ -6040,7 +6240,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { !isa<ConstantSDNode>(V1.getOperand(0))) { bool IsScalarToVector = true; for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) - if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { + if (!V1.getOperand(i).isUndef()) { IsScalarToVector = false; break; } @@ -6067,8 +6267,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (isVREVMask(ShuffleMask, VT, 16)) return DAG.getNode(ARMISD::VREV16, dl, VT, V1); - if (V2->getOpcode() == ISD::UNDEF && - isSingletonVEXTMask(ShuffleMask, VT, Imm)) { + if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) { return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, DAG.getConstant(Imm, dl, MVT::i32)); } @@ -6103,8 +6302,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // -> // concat(VZIP(v1, v2):0, :1) // - if (V1->getOpcode() == ISD::CONCAT_VECTORS && - V2->getOpcode() == ISD::UNDEF) { + if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) { SDValue SubV1 = V1->getOperand(0); SDValue SubV2 = V1->getOperand(1); EVT SubVT = SubV1.getValueType(); @@ -6175,11 +6373,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); - if (VT == MVT::v8i8) { - SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); - if (NewOp.getNode()) + if (VT == MVT::v8i8) + if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG)) return NewOp; - } return SDValue(); } @@ -6218,11 +6414,11 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { SDValue Val = DAG.getUNDEF(MVT::v2f64); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); - if (Op0.getOpcode() != ISD::UNDEF) + if (!Op0.isUndef()) Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), DAG.getIntPtrConstant(0, dl)); - if (Op1.getOpcode() != ISD::UNDEF) + if (!Op1.isUndef()) Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), DAG.getIntPtrConstant(1, dl)); @@ -6351,17 +6547,16 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { // The load already has the right type. if (ExtendedTy == LD->getMemoryVT()) return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), - LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), - LD->getAlignment()); + LD->getBasePtr(), LD->getPointerInfo(), + LD->getAlignment(), LD->getMemOperand()->getFlags()); // We need to create a zextload/sextload. We cannot just create a load // followed by a zext/zext node because LowerMUL is also run during normal // operation legalization where we can't create illegal types. return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), - LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(), - LD->isNonTemporal(), LD->getAlignment()); + LD->getMemoryVT(), LD->getAlignment(), + LD->getMemOperand()->getFlags()); } /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, @@ -6387,8 +6582,9 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { assert(BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0; - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32, - BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); + return DAG.getBuildVector( + MVT::v2i32, SDLoc(N), + {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)}); } // Construct a new BUILD_VECTOR with elements truncated to half the size. assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); @@ -6405,8 +6601,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::getVectorVT(TruncVT, NumElts), Ops); + return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops); } static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { @@ -6506,8 +6701,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); } -static SDValue -LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { +static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, + SelectionDAG &DAG) { // TODO: Should this propagate fast-math-flags? // Convert to float @@ -6528,8 +6723,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { // float4 result = as_float4(as_int4(xf*recip) + 0xb000); X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); - Y = DAG.getConstant(0xb000, dl, MVT::i32); - Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); + Y = DAG.getConstant(0xb000, dl, MVT::v4i32); X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); // Convert back to short. @@ -6538,8 +6732,8 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { return X; } -static SDValue -LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { +static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, + SelectionDAG &DAG) { // TODO: Should this propagate fast-math-flags? SDValue N2; @@ -6567,8 +6761,7 @@ LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { // float4 result = as_float4(as_int4(xf*recip) + 0x89); N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); - N1 = DAG.getConstant(0x89, dl, MVT::i32); - N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); + N1 = DAG.getConstant(0x89, dl, MVT::v4i32); N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); // Convert back to integer and return. @@ -6679,8 +6872,7 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { // float4 result = as_float4(as_int4(xf*recip) + 2); N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); - N1 = DAG.getConstant(2, dl, MVT::i32); - N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); + N1 = DAG.getConstant(2, dl, MVT::v4i32); N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); // Convert back to integer and return. @@ -6766,21 +6958,21 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(DAG.getEntryNode()) - .setCallee(CC, RetTy, Callee, std::move(Args), 0) + .setCallee(CC, RetTy, Callee, std::move(Args)) .setDiscardResult(ShouldUseSRet); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); if (!ShouldUseSRet) return CallResult.first; - SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, - MachinePointerInfo(), false, false, false, 0); + SDValue LoadSin = + DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo()); // Address of cos field. SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet, DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl)); - SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, - MachinePointerInfo(), false, false, false, 0); + SDValue LoadCos = + DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo()); SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, @@ -6819,7 +7011,7 @@ SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()), - ES, std::move(Args), 0); + ES, std::move(Args)); return LowerCallTo(CLI).first; } @@ -6867,13 +7059,13 @@ void ARMTargetLowering::ExpandDIV_Windows( } static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { - // Monotonic load/store is legal for all targets - if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) - return Op; + if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering())) + // Acquire/Release load/store is not legal for targets without a dmb or + // equivalent available. + return SDValue(); - // Acquire/Release load/store is not legal for targets without a - // dmb or equivalent available. - return SDValue(); + // Monotonic load/store is legal for all targets. + return Op; } static void ReplaceREADCYCLECOUNTER(SDNode *N, @@ -6899,6 +7091,46 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, Results.push_back(Cycles32.getValue(1)); } +static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) { + SDLoc dl(V.getNode()); + SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32); + SDValue VHi = DAG.getAnyExtOrTrunc( + DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)), + dl, MVT::i32); + SDValue RegClass = + DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); + SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32); + SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32); + const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 }; + return SDValue( + DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0); +} + +static void ReplaceCMP_SWAP_64Results(SDNode *N, + SmallVectorImpl<SDValue> & Results, + SelectionDAG &DAG) { + assert(N->getValueType(0) == MVT::i64 && + "AtomicCmpSwap on types less than 64 should be legal"); + SDValue Ops[] = {N->getOperand(1), + createGPRPairNode(DAG, N->getOperand(2)), + createGPRPairNode(DAG, N->getOperand(3)), + N->getOperand(0)}; + SDNode *CmpSwap = DAG.getMachineNode( + ARM::CMP_SWAP_64, SDLoc(N), + DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1); + + Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32, + SDValue(CmpSwap, 0))); + Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32, + SDValue(CmpSwap, 0))); + Results.push_back(SDValue(CmpSwap, 2)); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); @@ -6948,6 +7180,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); case ISD::SETCC: return LowerVSETCC(Op, DAG); + case ISD::SETCCE: return LowerSETCCE(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); @@ -6956,8 +7189,14 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); - case ISD::SDIV: return LowerSDIV(Op, DAG); - case ISD::UDIV: return LowerUDIV(Op, DAG); + case ISD::SDIV: + if (Subtarget->isTargetWindows()) + return LowerDIV_Windows(Op, DAG, /* Signed */ true); + return LowerSDIV(Op, DAG); + case ISD::UDIV: + if (Subtarget->isTargetWindows()) + return LowerDIV_Windows(Op, DAG, /* Signed */ false); + return LowerUDIV(Op, DAG); case ISD::ADDC: case ISD::ADDE: case ISD::SUBC: @@ -7005,6 +7244,13 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::UREM: Res = LowerREM(N, DAG); break; + case ISD::SDIVREM: + case ISD::UDIVREM: + Res = LowerDivRem(SDValue(N, 0), DAG); + assert(Res.getNumOperands() == 2 && "DivRem needs two values"); + Results.push_back(Res.getValue(0)); + Results.push_back(Res.getValue(1)); + return; case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; @@ -7013,6 +7259,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows"); return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV, Results); + case ISD::ATOMIC_CMP_SWAP: + ReplaceCMP_SWAP_64Results(N, Results, DAG); + return; } if (Res.getNode()) Results.push_back(Res); @@ -7024,11 +7273,12 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. -void ARMTargetLowering:: -SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, - MachineBasicBlock *DispatchBB, int FI) const { +void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, + MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB, + int FI) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); MachineConstantPool *MCP = MF->getConstantPool(); @@ -7139,10 +7389,10 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, } } -void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, +void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *MBB) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); MachineFrameInfo *MFI = MF->getFrameInfo(); @@ -7182,7 +7432,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, // Get an ordered list of the machine basic blocks for the jump table. std::vector<MachineBasicBlock*> LPadList; - SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs; + SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs; LPadList.reserve(CallSiteNumToLPad.size()); for (unsigned I = 1; I <= MaxCSNum; ++I) { SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; @@ -7200,7 +7450,6 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); unsigned MJTI = JTI->createJumpTableIndex(LPadList); - Reloc::Model RelocM = getTargetMachine().getRelocationModel(); // Create the MBBs for the dispatch code. @@ -7244,6 +7493,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, // registers being marked as clobbered. MIB.addRegMask(RI.getNoPreservedMask()); + bool IsPositionIndependent = isPositionIndependent(); unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); @@ -7357,7 +7607,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, .addMemOperand(JTMMOLd)); unsigned NewVReg6 = NewVReg5; - if (RelocM == Reloc::PIC_) { + if (IsPositionIndependent) { NewVReg6 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) .addReg(ARM::CPSR, RegState::Define) @@ -7440,7 +7690,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, .addImm(0) .addMemOperand(JTMMOLd)); - if (RelocM == Reloc::PIC_) { + if (IsPositionIndependent) { BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) .addReg(NewVReg5, RegState::Kill) .addReg(NewVReg4) @@ -7524,7 +7774,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, (*I)->setIsEHPad(false); // The instruction is gone now. - MI->eraseFromParent(); + MI.eraseFromParent(); } static @@ -7576,8 +7826,8 @@ static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) { /// Emit a post-increment load operation with given size. The instructions /// will be added to BB at Pos. -static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos, - const TargetInstrInfo *TII, DebugLoc dl, +static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, + const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2) { unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2); @@ -7608,8 +7858,8 @@ static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos, /// Emit a post-increment store operation with given size. The instructions /// will be added to BB at Pos. -static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos, - const TargetInstrInfo *TII, DebugLoc dl, +static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, + const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2) { unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2); @@ -7637,7 +7887,7 @@ static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos, } MachineBasicBlock * -ARMTargetLowering::EmitStructByval(MachineInstr *MI, +ARMTargetLowering::EmitStructByval(MachineInstr &MI, MachineBasicBlock *BB) const { // This pseudo instruction has 3 operands: dst, src, size // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). @@ -7646,11 +7896,11 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); - unsigned dest = MI->getOperand(0).getReg(); - unsigned src = MI->getOperand(1).getReg(); - unsigned SizeVal = MI->getOperand(2).getImm(); - unsigned Align = MI->getOperand(3).getImm(); - DebugLoc dl = MI->getDebugLoc(); + unsigned dest = MI.getOperand(0).getReg(); + unsigned src = MI.getOperand(1).getReg(); + unsigned SizeVal = MI.getOperand(2).getImm(); + unsigned Align = MI.getOperand(3).getImm(); + DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -7722,7 +7972,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, srcIn = srcOut; destIn = destOut; } - MI->eraseFromParent(); // The instruction is gone now. + MI.eraseFromParent(); // The instruction is gone now. return BB; } @@ -7848,7 +8098,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // Add epilogue to handle BytesLeft. BB = exitMBB; - MachineInstr *StartOfExit = exitMBB->begin(); + auto StartOfExit = exitMBB->begin(); // [scratch, srcOut] = LDRB_POST(srcLoop, 1) // [destOut] = STRB_POST(scratch, destLoop, 1) @@ -7866,16 +8116,16 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, destIn = destOut; } - MI->eraseFromParent(); // The instruction is gone now. + MI.eraseFromParent(); // The instruction is gone now. return BB; } MachineBasicBlock * -ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, +ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, MachineBasicBlock *MBB) const { const TargetMachine &TM = getTargetMachine(); const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); assert(Subtarget->isTargetWindows() && "__chkstk is only supported on Windows"); @@ -7930,24 +8180,26 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP) - .addReg(ARM::SP).addReg(ARM::R4))); + .addReg(ARM::SP, RegState::Kill) + .addReg(ARM::R4, RegState::Kill) + .setMIFlags(MachineInstr::FrameSetup))); - MI->eraseFromParent(); + MI.eraseFromParent(); return MBB; } MachineBasicBlock * -ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI, +ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI, MachineBasicBlock *MBB) const { - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock(); - MF->push_back(ContBB); + MF->insert(++MBB->getIterator(), ContBB); ContBB->splice(ContBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); - MBB->addSuccessor(ContBB); + ContBB->transferSuccessorsAndUpdatePHIs(MBB); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); MF->push_back(TrapBB); @@ -7955,74 +8207,89 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI, MBB->addSuccessor(TrapBB); BuildMI(*MBB, MI, DL, TII->get(ARM::tCBZ)) - .addReg(MI->getOperand(0).getReg()) + .addReg(MI.getOperand(0).getReg()) .addMBB(TrapBB); + AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::t2B)).addMBB(ContBB)); + MBB->addSuccessor(ContBB); - MI->eraseFromParent(); + MI.eraseFromParent(); return ContBB; } MachineBasicBlock * -ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, +ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc dl = MI.getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { default: { - MI->dump(); + MI.dump(); llvm_unreachable("Unexpected instr type to insert"); } + + // Thumb1 post-indexed loads are really just single-register LDMs. + case ARM::tLDR_postidx: { + BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD)) + .addOperand(MI.getOperand(1)) // Rn_wb + .addOperand(MI.getOperand(2)) // Rn + .addOperand(MI.getOperand(3)) // PredImm + .addOperand(MI.getOperand(4)) // PredReg + .addOperand(MI.getOperand(0)); // Rt + MI.eraseFromParent(); + return BB; + } + // The Thumb2 pre-indexed stores have the same MI operands, they just // define them differently in the .td files from the isel patterns, so // they need pseudos. case ARM::t2STR_preidx: - MI->setDesc(TII->get(ARM::t2STR_PRE)); + MI.setDesc(TII->get(ARM::t2STR_PRE)); return BB; case ARM::t2STRB_preidx: - MI->setDesc(TII->get(ARM::t2STRB_PRE)); + MI.setDesc(TII->get(ARM::t2STRB_PRE)); return BB; case ARM::t2STRH_preidx: - MI->setDesc(TII->get(ARM::t2STRH_PRE)); + MI.setDesc(TII->get(ARM::t2STRH_PRE)); return BB; case ARM::STRi_preidx: case ARM::STRBi_preidx: { - unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? - ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; + unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM + : ARM::STRB_PRE_IMM; // Decode the offset. - unsigned Offset = MI->getOperand(4).getImm(); + unsigned Offset = MI.getOperand(4).getImm(); bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; Offset = ARM_AM::getAM2Offset(Offset); if (isSub) Offset = -Offset; - MachineMemOperand *MMO = *MI->memoperands_begin(); + MachineMemOperand *MMO = *MI.memoperands_begin(); BuildMI(*BB, MI, dl, TII->get(NewOpc)) - .addOperand(MI->getOperand(0)) // Rn_wb - .addOperand(MI->getOperand(1)) // Rt - .addOperand(MI->getOperand(2)) // Rn - .addImm(Offset) // offset (skip GPR==zero_reg) - .addOperand(MI->getOperand(5)) // pred - .addOperand(MI->getOperand(6)) - .addMemOperand(MMO); - MI->eraseFromParent(); + .addOperand(MI.getOperand(0)) // Rn_wb + .addOperand(MI.getOperand(1)) // Rt + .addOperand(MI.getOperand(2)) // Rn + .addImm(Offset) // offset (skip GPR==zero_reg) + .addOperand(MI.getOperand(5)) // pred + .addOperand(MI.getOperand(6)) + .addMemOperand(MMO); + MI.eraseFromParent(); return BB; } case ARM::STRr_preidx: case ARM::STRBr_preidx: case ARM::STRH_preidx: { unsigned NewOpc; - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { default: llvm_unreachable("unexpected opcode!"); case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; } MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); - for (unsigned i = 0; i < MI->getNumOperands(); ++i) - MIB.addOperand(MI->getOperand(i)); - MI->eraseFromParent(); + for (unsigned i = 0; i < MI.getNumOperands(); ++i) + MIB.addOperand(MI.getOperand(i)); + MI.eraseFromParent(); return BB; } @@ -8055,8 +8322,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); - BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) - .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); + BuildMI(BB, dl, TII->get(ARM::tBcc)) + .addMBB(sinkMBB) + .addImm(MI.getOperand(3).getImm()) + .addReg(MI.getOperand(4).getReg()); // copy0MBB: // %FalseValue = ... @@ -8070,12 +8339,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(*BB, BB->begin(), dl, - TII->get(ARM::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) - .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg()) + .addReg(MI.getOperand(1).getReg()) + .addMBB(copy0MBB) + .addReg(MI.getOperand(2).getReg()) + .addMBB(thisMBB); - MI->eraseFromParent(); // The pseudo instruction is gone now. + MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -8086,10 +8356,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Compare both parts that make up the double comparison separately for // equality. - bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; + bool RHSisZero = MI.getOpcode() == ARM::BCCZi64; - unsigned LHS1 = MI->getOperand(1).getReg(); - unsigned LHS2 = MI->getOperand(2).getReg(); + unsigned LHS1 = MI.getOperand(1).getReg(); + unsigned LHS2 = MI.getOperand(2).getReg(); if (RHSisZero) { AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) @@ -8098,8 +8368,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addReg(LHS2).addImm(0) .addImm(ARMCC::EQ).addReg(ARM::CPSR); } else { - unsigned RHS1 = MI->getOperand(3).getReg(); - unsigned RHS2 = MI->getOperand(4).getReg(); + unsigned RHS1 = MI.getOperand(3).getReg(); + unsigned RHS2 = MI.getOperand(4).getReg(); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(LHS1).addReg(RHS1)); @@ -8108,9 +8378,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addImm(ARMCC::EQ).addReg(ARM::CPSR); } - MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); + MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB(); MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); - if (MI->getOperand(0).getImm() == ARMCC::NE) + if (MI.getOperand(0).getImm() == ARMCC::NE) std::swap(destMBB, exitMBB); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -8120,7 +8390,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, else BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); - MI->eraseFromParent(); // The pseudo instruction is gone now. + MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -8157,9 +8427,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, Fn->insert(BBI, RSBBB); Fn->insert(BBI, SinkBB); - unsigned int ABSSrcReg = MI->getOperand(1).getReg(); - unsigned int ABSDstReg = MI->getOperand(0).getReg(); - bool ABSSrcKIll = MI->getOperand(1).isKill(); + unsigned int ABSSrcReg = MI.getOperand(1).getReg(); + unsigned int ABSDstReg = MI.getOperand(0).getReg(); + bool ABSSrcKIll = MI.getOperand(1).isKill(); bool isThumb2 = Subtarget->isThumb2(); MachineRegisterInfo &MRI = Fn->getRegInfo(); // In Thumb mode S must not be specified if source register is the SP or @@ -8204,7 +8474,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addReg(ABSSrcReg).addMBB(BB); // remove ABS instruction - MI->eraseFromParent(); + MI.eraseFromParent(); // return last added BB return SinkBB; @@ -8223,38 +8493,38 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, /// when it is expanded into LDM/STM. This is done as a post-isel lowering /// instead of as a custom inserter because we need the use list from the SDNode. static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, - MachineInstr *MI, const SDNode *Node) { + MachineInstr &MI, const SDNode *Node) { bool isThumb1 = Subtarget->isThumb1Only(); - DebugLoc DL = MI->getDebugLoc(); - MachineFunction *MF = MI->getParent()->getParent(); + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MI.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineInstrBuilder MIB(*MF, MI); // If the new dst/src is unused mark it as dead. if (!Node->hasAnyUseOfValue(0)) { - MI->getOperand(0).setIsDead(true); + MI.getOperand(0).setIsDead(true); } if (!Node->hasAnyUseOfValue(1)) { - MI->getOperand(1).setIsDead(true); + MI.getOperand(1).setIsDead(true); } // The MEMCPY both defines and kills the scratch registers. - for (unsigned I = 0; I != MI->getOperand(4).getImm(); ++I) { + for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) { unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); MIB.addReg(TmpReg, RegState::Define|RegState::Dead); } } -void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, +void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const { - if (MI->getOpcode() == ARM::MEMCPY) { + if (MI.getOpcode() == ARM::MEMCPY) { attachMEMCPYScratchRegs(Subtarget, MI, Node); return; } - const MCInstrDesc *MCID = &MI->getDesc(); + const MCInstrDesc *MCID = &MI.getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional // operand is still set to noreg. If needed, set the optional operand's @@ -8263,24 +8533,24 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>). // Rename pseudo opcodes. - unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); + unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode()); if (NewOpc) { const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo(); MCID = &TII->get(NewOpc); - assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && + assert(MCID->getNumOperands() == MI.getDesc().getNumOperands() + 1 && "converted opcode should be the same except for cc_out"); - MI->setDesc(*MCID); + MI.setDesc(*MCID); // Add the optional cc_out operand - MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); + MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); } unsigned ccOutIdx = MCID->getNumOperands() - 1; // Any ARM instruction that sets the 's' bit should specify an optional // "cc_out" operand in the last operand position. - if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { + if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { assert(!NewOpc && "Optional cc_out operand required"); return; } @@ -8288,14 +8558,14 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // since we already have an optional CPSR def. bool definesCPSR = false; bool deadCPSR = false; - for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands(); - i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e; + ++i) { + const MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { definesCPSR = true; if (MO.isDead()) deadCPSR = true; - MI->RemoveOperand(i); + MI.RemoveOperand(i); break; } } @@ -8305,14 +8575,14 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, } assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); if (deadCPSR) { - assert(!MI->getOperand(ccOutIdx).getReg() && + assert(!MI.getOperand(ccOutIdx).getReg() && "expect uninitialized optional cc_out operand"); return; } // If this instruction was defined with an optional CPSR def and its dag node // had a live implicit CPSR def, then activate the optional CPSR def. - MachineOperand &MO = MI->getOperand(ccOutIdx); + MachineOperand &MO = MI.getOperand(ccOutIdx); MO.setReg(ARM::CPSR); MO.setIsDef(true); } @@ -8442,16 +8712,12 @@ SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - if (N0.getNode()->hasOneUse()) { - SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes); - if (Result.getNode()) + if (N0.getNode()->hasOneUse()) + if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes)) return Result; - } - if (N1.getNode()->hasOneUse()) { - SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes); - if (Result.getNode()) + if (N1.getNode()->hasOneUse()) + if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes)) return Result; - } return SDValue(); } @@ -8533,7 +8799,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, // Get widened type and narrowed type. MVT widenType; unsigned numElem = VT.getVectorNumElements(); - + EVT inputLaneType = Vec.getValueType().getVectorElementType(); switch (inputLaneType.getSimpleVT().SimpleTy) { case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; @@ -8559,11 +8825,6 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - if (Subtarget->isThumb1Only()) return SDValue(); - - // Only perform the checks after legalize when the pattern is available. - if (DCI.isBeforeLegalize()) return SDValue(); - // Look for multiply add opportunities. // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where // each add nodes consumes a value from ISD::UMUL_LOHI and there is @@ -8691,14 +8952,97 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, return resNode; } +static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // UMAAL is similar to UMLAL except that it adds two unsigned values. + // While trying to combine for the other MLAL nodes, first search for the + // chance to use UMAAL. Check if Addc uses another addc node which can first + // be combined into a UMLAL. The other pattern is AddcNode being combined + // into an UMLAL and then using another addc is handled in ISelDAGToDAG. + + if (!Subtarget->hasV6Ops()) + return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); + + SDNode *PrevAddc = nullptr; + if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC) + PrevAddc = AddcNode->getOperand(0).getNode(); + else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC) + PrevAddc = AddcNode->getOperand(1).getNode(); + + // If there's no addc chains, just return a search for any MLAL. + if (PrevAddc == nullptr) + return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); + + // Try to convert the addc operand to an MLAL and if that fails try to + // combine AddcNode. + SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget); + if (MLAL != SDValue(PrevAddc, 0)) + return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); + + // Find the converted UMAAL or quit if it doesn't exist. + SDNode *UmlalNode = nullptr; + SDValue AddHi; + if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) { + UmlalNode = AddcNode->getOperand(0).getNode(); + AddHi = AddcNode->getOperand(1); + } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) { + UmlalNode = AddcNode->getOperand(1).getNode(); + AddHi = AddcNode->getOperand(0); + } else { + return SDValue(); + } + + // The ADDC should be glued to an ADDE node, which uses the same UMLAL as + // the ADDC as well as Zero. + auto *Zero = dyn_cast<ConstantSDNode>(UmlalNode->getOperand(3)); + + if (!Zero || Zero->getZExtValue() != 0) + return SDValue(); + + // Check that we have a glued ADDC node. + if (AddcNode->getValueType(1) != MVT::Glue) + return SDValue(); + + // Look for the glued ADDE. + SDNode* AddeNode = AddcNode->getGluedUser(); + if (!AddeNode) + return SDValue(); + + if ((AddeNode->getOperand(0).getNode() == Zero && + AddeNode->getOperand(1).getNode() == UmlalNode) || + (AddeNode->getOperand(0).getNode() == UmlalNode && + AddeNode->getOperand(1).getNode() == Zero)) { + + SelectionDAG &DAG = DCI.DAG; + SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1), + UmlalNode->getOperand(2), AddHi }; + SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode), + DAG.getVTList(MVT::i32, MVT::i32), Ops); + + // Replace the ADDs' nodes uses by the UMAAL node's values. + DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0)); + + // Return original node to notify the driver to stop replacing. + return SDValue(AddcNode, 0); + } + return SDValue(); +} + /// PerformADDCCombine - Target-specific dag combine transform from -/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL. +/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or +/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL static SDValue PerformADDCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - return AddCombineTo64bitMLAL(N, DCI, Subtarget); + if (Subtarget->isThumb1Only()) return SDValue(); + // Only perform the checks after legalize when the pattern is available. + if (DCI.isBeforeLegalize()) return SDValue(); + + return AddCombineTo64bitUMAAL(N, DCI, Subtarget); } /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with @@ -8710,15 +9054,13 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, const ARMSubtarget *Subtarget){ // Attempt to create vpaddl for this add. - SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); - if (Result.getNode()) + if (SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget)) return Result; // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) - if (N0.getNode()->hasOneUse()) { - SDValue Result = combineSelectAndUse(N, N0, N1, DCI); - if (Result.getNode()) return Result; - } + if (N0.getNode()->hasOneUse()) + if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI)) + return Result; return SDValue(); } @@ -8731,8 +9073,7 @@ static SDValue PerformADDCombine(SDNode *N, SDValue N1 = N->getOperand(1); // First try with the default operand order. - SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget); - if (Result.getNode()) + if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget)) return Result; // If that didn't work, try again with the operands commuted. @@ -8747,10 +9088,9 @@ static SDValue PerformSUBCombine(SDNode *N, SDValue N1 = N->getOperand(1); // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) - if (N1.getNode()->hasOneUse()) { - SDValue Result = combineSelectAndUse(N, N1, N0, DCI); - if (Result.getNode()) return Result; - } + if (N1.getNode()->hasOneUse()) + if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI)) + return Result; return SDValue(); } @@ -8920,8 +9260,7 @@ static SDValue PerformANDCombine(SDNode *N, if (!Subtarget->isThumb1Only()) { // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) - SDValue Result = combineSelectAndUseCommutative(N, true, DCI); - if (Result.getNode()) + if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI)) return Result; } @@ -8963,8 +9302,7 @@ static SDValue PerformORCombine(SDNode *N, if (!Subtarget->isThumb1Only()) { // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) - SDValue Result = combineSelectAndUseCommutative(N, false, DCI); - if (Result.getNode()) + if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) return Result; } @@ -9137,8 +9475,7 @@ static SDValue PerformXORCombine(SDNode *N, if (!Subtarget->isThumb1Only()) { // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) - SDValue Result = combineSelectAndUseCommutative(N, false, DCI); - if (Result.getNode()) + if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) return Result; } @@ -9300,17 +9637,15 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, SelectionDAG &DAG = DCI.DAG; SDLoc DL(LD); SDValue BasePtr = LD->getBasePtr(); - SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, - LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), - LD->getAlignment()); + SDValue NewLD1 = + DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(), + LD->getAlignment(), LD->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, DL, MVT::i32)); - SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, - LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), - std::min(4U, LD->getAlignment() / 2)); + SDValue NewLD2 = DAG.getLoad( + MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(), + std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags()); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); if (DCI.DAG.getDataLayout().isBigEndian()) @@ -9364,11 +9699,9 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, // into a pair of GPRs, which is fine when the value is used as a scalar, // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. SelectionDAG &DAG = DCI.DAG; - if (N->getNumOperands() == 2) { - SDValue RV = PerformVMOVDRRCombine(N, DAG); - if (RV.getNode()) + if (N->getNumOperands() == 2) + if (SDValue RV = PerformVMOVDRRCombine(N, DAG)) return RV; - } // Load i64 elements as f64 values so that type legalization does not split // them up into i32 values. @@ -9385,7 +9718,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, DCI.AddToWorklist(V.getNode()); } EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops); + SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, BV); } @@ -9434,7 +9767,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Assume only bit cast to i32 will go away. if (Elt->getOperand(0).getValueType() == MVT::i32) ++NumOfBitCastedElts; - } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt)) + } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt)) // Constants are statically casted, thus do not count them as // relevant operands. --NumOfRelevantElts; @@ -9461,7 +9794,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDLoc dl(N); for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) { SDValue V = N->getOperand(Idx); - if (V.getOpcode() == ISD::UNDEF) + if (V.isUndef()) continue; if (V.getOpcode() == ISD::BITCAST && V->getOperand(0).getValueType() == MVT::i32) @@ -9529,8 +9862,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); SDValue Concat0Op1 = Op0.getOperand(1); SDValue Concat1Op1 = Op1.getOperand(1); - if (Concat0Op1.getOpcode() != ISD::UNDEF || - Concat1Op1.getOpcode() != ISD::UNDEF) + if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef()) return SDValue(); // Skip the transformation if any of the types are illegal. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -9557,7 +9889,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { NewMask.push_back(NewElt); } return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat, - DAG.getUNDEF(VT), NewMask.data()); + DAG.getUNDEF(VT), NewMask); } /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, @@ -9953,7 +10285,7 @@ static SDValue PerformSTORECombine(SDNode *N, SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), - ShuffleVec.data()); + ShuffleVec); // At this point all of the data is stored at the bottom of the // register. We now need to save it to mem. @@ -9984,8 +10316,8 @@ static SDValue PerformSTORECombine(SDNode *N, StoreType, ShuffWide, DAG.getIntPtrConstant(I, DL)); SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, - St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); + St->getPointerInfo(), St->getAlignment(), + St->getMemOperand()->getFlags()); BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment); Chains.push_back(Ch); @@ -10004,18 +10336,18 @@ static SDValue PerformSTORECombine(SDNode *N, bool isBigEndian = DAG.getDataLayout().isBigEndian(); SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); - SDValue NewST1 = DAG.getStore(St->getChain(), DL, - StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), - BasePtr, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); + SDValue NewST1 = DAG.getStore( + St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0), + BasePtr, St->getPointerInfo(), St->getAlignment(), + St->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, DL, MVT::i32)); return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(isBigEndian ? 0 : 1), - OffsetPtr, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), - std::min(4U, St->getAlignment() / 2)); + OffsetPtr, St->getPointerInfo(), + std::min(4U, St->getAlignment() / 2), + St->getMemOperand()->getFlags()); } if (StVal.getValueType() == MVT::i64 && @@ -10038,9 +10370,8 @@ static SDValue PerformSTORECombine(SDNode *N, DCI.AddToWorklist(ExtElt.getNode()); DCI.AddToWorklist(V.getNode()); return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), - St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment(), - St->getAAInfo()); + St->getPointerInfo(), St->getAlignment(), + St->getMemOperand()->getFlags(), St->getAAInfo()); } // If this is a legal vector store, try to combine it into a VST1_UPD. @@ -10066,7 +10397,8 @@ static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue Op = N->getOperand(0); - if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL) + if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() || + Op.getOpcode() != ISD::FMUL) return SDValue(); SDValue ConstVec = Op->getOperand(1); @@ -10123,7 +10455,7 @@ static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, SDValue Op = N->getOperand(0); unsigned OpOpcode = Op.getNode()->getOpcode(); - if (!N->getValueType(0).isVector() || + if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() || (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) return SDValue(); @@ -10464,7 +10796,7 @@ static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero, // The operand to BFI is already a mask suitable for removing the bits it // sets. ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2)); - APInt Mask = CI->getAPIntValue(); + const APInt &Mask = CI->getAPIntValue(); KnownZero &= Mask; KnownOne &= Mask; return; @@ -10522,7 +10854,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D } else { assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?"); } - + if (Op1->getOpcode() != ISD::OR) return SDValue(); @@ -10552,7 +10884,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDLoc dl(X); EVT VT = X.getValueType(); unsigned BitInX = AndC->getAPIntValue().logBase2(); - + if (BitInX != 0) { // We must shift X first. X = DAG.getNode(ISD::SRL, dl, VT, X, @@ -10573,6 +10905,46 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D return V; } +/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND. +SDValue +ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { + SDValue Cmp = N->getOperand(4); + if (Cmp.getOpcode() != ARMISD::CMPZ) + // Only looking at NE cases. + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc dl(N); + SDValue LHS = Cmp.getOperand(0); + SDValue RHS = Cmp.getOperand(1); + SDValue Chain = N->getOperand(0); + SDValue BB = N->getOperand(1); + SDValue ARMcc = N->getOperand(2); + ARMCC::CondCodes CC = + (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); + + // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0)) + // -> (brcond Chain BB CC CPSR Cmp) + if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() && + LHS->getOperand(0)->getOpcode() == ARMISD::CMOV && + LHS->getOperand(0)->hasOneUse()) { + auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0)); + auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1)); + auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); + auto *RHSC = dyn_cast<ConstantSDNode>(RHS); + if ((LHS00C && LHS00C->getZExtValue() == 0) && + (LHS01C && LHS01C->getZExtValue() == 1) && + (LHS1C && LHS1C->getZExtValue() == 1) && + (RHSC && RHSC->getZExtValue() == 0)) { + return DAG.getNode( + ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2), + LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4)); + } + } + + return SDValue(); +} + /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. SDValue ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { @@ -10626,6 +10998,21 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { N->getOperand(3), NewCmp); } + // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0)) + // -> (cmov F T CC CPSR Cmp) + if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) { + auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)); + auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); + auto *RHSC = dyn_cast<ConstantSDNode>(RHS); + if ((LHS0C && LHS0C->getZExtValue() == 0) && + (LHS1C && LHS1C->getZExtValue() == 1) && + (RHSC && RHSC->getZExtValue() == 0)) { + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, + LHS->getOperand(2), LHS->getOperand(3), + LHS->getOperand(4)); + } + } + if (Res.getNode()) { APInt KnownZero, KnownOne; DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); @@ -10676,6 +11063,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); + case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG); case ISD::LOAD: return PerformLOADCombine(N, DCI); case ARMISD::VLD2DUP: case ARMISD::VLD3DUP: @@ -11198,22 +11586,37 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { - if (Subtarget->isThumb1Only()) - return false; - EVT VT; SDValue Ptr; - bool isSEXTLoad = false; + bool isSEXTLoad = false, isNonExt; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { VT = LD->getMemoryVT(); Ptr = LD->getBasePtr(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD; } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { VT = ST->getMemoryVT(); Ptr = ST->getBasePtr(); + isNonExt = !ST->isTruncatingStore(); } else return false; + if (Subtarget->isThumb1Only()) { + // Thumb-1 can do a limited post-inc load or store as an updating LDM. It + // must be non-extending/truncating, i32, with an offset of 4. + assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!"); + if (Op->getOpcode() != ISD::ADD || !isNonExt) + return false; + auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1)); + if (!RHS || RHS->getZExtValue() != 4) + return false; + + Offset = Op->getOperand(1); + Base = Op->getOperand(0); + AM = ISD::POST_INC; + return true; + } + bool isInc; bool isLegal = false; if (Subtarget->isThumb2()) @@ -11322,6 +11725,26 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { return false; } +const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const { + // At this point, we have to lower this constraint to something else, so we + // lower it to an "r" or "w". However, by doing this we will force the result + // to be in register, while the X constraint is much more permissive. + // + // Although we are correct (we are free to emit anything, without + // constraints), we might break use cases that would expect us to be more + // efficient and emit something else. + if (!Subtarget->hasVFP2()) + return "r"; + if (ConstraintVT.isFloatingPoint()) + return "w"; + if (ConstraintVT.isVector() && Subtarget->hasNEON() && + (ConstraintVT.getSizeInBits() == 64 || + ConstraintVT.getSizeInBits() == 128)) + return "w"; + + return "r"; +} + /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. ARMTargetLowering::ConstraintType @@ -11640,7 +12063,8 @@ static TargetLowering::ArgListTy getDivRemArgList( } SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { - assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) && + assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || + Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) && "Register-based DivRem lowering only"); unsigned Opcode = Op->getOpcode(); assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && @@ -11664,7 +12088,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) - .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); @@ -11702,7 +12126,7 @@ SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const { // Lower call CallLoweringInfo CLI(DAG); CLI.setChain(InChain) - .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args), 0) + .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N)); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); @@ -11950,23 +12374,20 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { - if (!getInsertFencesForAtomic()) - return nullptr; - switch (Ord) { - case NotAtomic: - case Unordered: + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: llvm_unreachable("Invalid fence: unordered/non-atomic"); - case Monotonic: - case Acquire: + case AtomicOrdering::Monotonic: + case AtomicOrdering::Acquire: return nullptr; // Nothing to do - case SequentiallyConsistent: + case AtomicOrdering::SequentiallyConsistent: if (!IsStore) return nullptr; // Nothing to do /*FALLTHROUGH*/ - case Release: - case AcquireRelease: - if (Subtarget->isSwift()) + case AtomicOrdering::Release: + case AtomicOrdering::AcquireRelease: + if (Subtarget->preferISHSTBarriers()) return makeDMB(Builder, ARM_MB::ISHST); // FIXME: add a comment with a link to documentation justifying this. else @@ -11978,19 +12399,16 @@ Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { - if (!getInsertFencesForAtomic()) - return nullptr; - switch (Ord) { - case NotAtomic: - case Unordered: + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: llvm_unreachable("Invalid fence: unordered/not-atomic"); - case Monotonic: - case Release: + case AtomicOrdering::Monotonic: + case AtomicOrdering::Release: return nullptr; // Nothing to do - case Acquire: - case AcquireRelease: - case SequentiallyConsistent: + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: return makeDMB(Builder, ARM_MB::ISH); } llvm_unreachable("Unknown fence ordering in emitTrailingFence"); @@ -12031,7 +12449,17 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { - return true; + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement cmpxchg without spilling. If the address being exchanged is also + // on the stack and close enough to the spill slot, this can lead to a + // situation where the monitor always gets cleared and the atomic operation + // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. + return getTargetMachine().getOptLevel() != 0; +} + +bool ARMTargetLowering::shouldInsertFencesForAtomic( + const Instruction *I) const { + return InsertFencesForAtomic; } // This has so far only been implemented for MachO. @@ -12080,7 +12508,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Type *ValTy = cast<PointerType>(Addr->getType())->getElementType(); - bool IsAcquire = isAtLeastAcquire(Ord); + bool IsAcquire = isAcquireOrStronger(Ord); // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd // intrinsic must return {i32, i32} and we have to recombine them into a @@ -12124,7 +12552,7 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - bool IsRelease = isAtLeastRelease(Ord); + bool IsRelease = isReleaseOrStronger(Ord); // Since the intrinsics must have legal type, the i64 intrinsics take two // parameters: "i32, i32". We must marshal Val into the appropriate form diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 96b56c3ec330..4906686616bc 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -43,7 +43,6 @@ namespace llvm { CALL, // Function call. CALL_PRED, // Function call that's predicable. CALL_NOLINK, // Function call with branch not branch-and-link. - tCALL, // Thumb function call. BRCOND, // Conditional branch. BR_JT, // Jumptable branch. BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump). @@ -61,6 +60,8 @@ namespace llvm { CMOV, // ARM conditional move instructions. + SSAT, // Signed saturation + BCC_i64, SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out. @@ -164,6 +165,7 @@ namespace llvm { UMLAL, // 64bit Unsigned Accumulate Multiply SMLAL, // 64bit Signed Accumulate Multiply + UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other @@ -251,13 +253,14 @@ namespace llvm { EVT VT) const override; MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const override; + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *MBB) const override; - void AdjustInstrPostInstrSelection(MachineInstr *MI, + void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override; SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const; + SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const; SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; @@ -335,6 +338,8 @@ namespace llvm { getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; + const char *LowerXConstraint(EVT ConstraintVT) const override; + /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is /// true it means one of the asm constraint of the inline asm instruction @@ -453,6 +458,7 @@ namespace llvm { bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override; + bool shouldInsertFencesForAtomic(const Instruction *I) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; @@ -468,6 +474,14 @@ namespace llvm { bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + bool supportSwiftError() const override { + return true; + } + + bool hasStandaloneRem(EVT VT) const override { + return HasStandaloneRem; + } + protected: std::pair<const TargetRegisterClass *, uint8_t> findRepresentativeClass(const TargetRegisterInfo *TRI, @@ -486,29 +500,34 @@ namespace llvm { /// unsigned ARMPCLabelIndex; + // TODO: remove this, and have shouldInsertFencesForAtomic do the proper + // check. + bool InsertFencesForAtomic; + + bool HasStandaloneRem = true; + void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); void addDRTypeForNEON(MVT VT); void addQRTypeForNEON(MVT VT); std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const; typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector; - void PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, - SDValue Chain, SDValue &Arg, - RegsToPassVector &RegsToPass, + void PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain, + SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, SmallVectorImpl<SDValue> &MemOpChains, ISD::ArgFlagsTy Flags) const; SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, - SDLoc dl) const; + const SDLoc &dl) const; CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC, bool isVarArg) const; CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, - SDLoc dl, SelectionDAG &DAG, + const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const; SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; @@ -527,6 +546,7 @@ namespace llvm { SelectionDAG &DAG, TLSModel::Model model) const; SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; @@ -576,9 +596,9 @@ namespace llvm { SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals, - bool isThisReturn, SDValue ThisVal) const; + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals, bool isThisReturn, + SDValue ThisVal) const; bool supportSplitCSR(MachineFunction *MF) const override { return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && @@ -590,23 +610,19 @@ namespace llvm { const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; SDValue - LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const override; - - int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, - SDLoc dl, SDValue &Chain, - const Value *OrigArg, - unsigned InRegsParamRecordIdx, - int ArgOffset, + LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const override; + + int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &dl, + SDValue &Chain, const Value *OrigArg, + unsigned InRegsParamRecordIdx, int ArgOffset, unsigned ArgSize) const; void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, - SDLoc dl, SDValue &Chain, - unsigned ArgOffset, - unsigned TotalArgRegsSaveSize, + const SDLoc &dl, SDValue &Chain, + unsigned ArgOffset, unsigned TotalArgRegsSaveSize, bool ForceMutable = false) const; SDValue @@ -634,42 +650,39 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const override; - SDValue - LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - SDLoc dl, SelectionDAG &DAG) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SDLoc &dl, SelectionDAG &DAG) const override; bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; bool mayBeEmittedAsTailCall(CallInst *CI) const override; - SDValue getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, SDValue TrueVal, + SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, SDValue ARMcc, SDValue CCR, SDValue Cmp, SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const; - SDValue getVFPCmp(SDValue LHS, SDValue RHS, - SelectionDAG &DAG, SDLoc dl) const; + SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const; + SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, + const SDLoc &dl) const; SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const; SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; - void SetupEntryBlockForSjLj(MachineInstr *MI, - MachineBasicBlock *MBB, + void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const; - void EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const; + void EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *MBB) const; - bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const; + bool RemapAddSubWithFlags(MachineInstr &MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitStructByval(MachineInstr *MI, + MachineBasicBlock *EmitStructByval(MachineInstr &MI, MachineBasicBlock *MBB) const; - MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI, + MachineBasicBlock *EmitLowered__chkstk(MachineInstr &MI, MachineBasicBlock *MBB) const; - MachineBasicBlock *EmitLowered__dbzchk(MachineInstr *MI, + MachineBasicBlock *EmitLowered__dbzchk(MachineInstr &MI, MachineBasicBlock *MBB) const; }; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td index e79608d360ca..37a83f70a1fb 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -246,23 +246,33 @@ def shr_imm64 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 64; }]> { let ParserMatchClass = shr_imm64_asm_operand; } + +// ARM Assembler operand for ldr Rd, =expression which generates an offset +// to a constant pool entry or a MOV depending on the value of expression +def const_pool_asm_operand : AsmOperandClass { let Name = "ConstPoolAsmImm"; } +def const_pool_asm_imm : Operand<i32> { + let ParserMatchClass = const_pool_asm_operand; +} + + //===----------------------------------------------------------------------===// // ARM Assembler alias templates. // -class ARMInstAlias<string Asm, dag Result, bit Emit = 0b1> - : InstAlias<Asm, Result, Emit>, Requires<[IsARM]>; -class tInstAlias<string Asm, dag Result, bit Emit = 0b1> - : InstAlias<Asm, Result, Emit>, Requires<[IsThumb]>; -class t2InstAlias<string Asm, dag Result, bit Emit = 0b1> - : InstAlias<Asm, Result, Emit>, Requires<[IsThumb2]>; -class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1> - : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>; -class VFP2DPInstAlias<string Asm, dag Result, bit Emit = 0b1> - : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2,HasDPVFP]>; -class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1> - : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>; -class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1> - : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>; +// Note: When EmitPriority == 1, the alias will be used for printing +class ARMInstAlias<string Asm, dag Result, bit EmitPriority = 0> + : InstAlias<Asm, Result, EmitPriority>, Requires<[IsARM]>; +class tInstAlias<string Asm, dag Result, bit EmitPriority = 0> + : InstAlias<Asm, Result, EmitPriority>, Requires<[IsThumb]>; +class t2InstAlias<string Asm, dag Result, bit EmitPriority = 0> + : InstAlias<Asm, Result, EmitPriority>, Requires<[IsThumb2]>; +class VFP2InstAlias<string Asm, dag Result, bit EmitPriority = 0> + : InstAlias<Asm, Result, EmitPriority>, Requires<[HasVFP2]>; +class VFP2DPInstAlias<string Asm, dag Result, bit EmitPriority = 0> + : InstAlias<Asm, Result, EmitPriority>, Requires<[HasVFP2,HasDPVFP]>; +class VFP3InstAlias<string Asm, dag Result, bit EmitPriority = 0> + : InstAlias<Asm, Result, EmitPriority>, Requires<[HasVFP3]>; +class NEONInstAlias<string Asm, dag Result, bit EmitPriority = 0> + : InstAlias<Asm, Result, EmitPriority>, Requires<[HasNEON]>; class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>, @@ -563,12 +573,12 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, class AIldaex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : AIldr_ex_or_acq<opcod, 0b10, oops, iops, itin, opc, asm, pattern>, - Requires<[IsARM, HasV8]>; + Requires<[IsARM, HasAcquireRelease, HasV7Clrex]>; class AIstlex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : AIstr_ex_or_rel<opcod, 0b10, oops, iops, itin, opc, asm, pattern>, - Requires<[IsARM, HasV8]> { + Requires<[IsARM, HasAcquireRelease, HasV7Clrex]> { bits<4> Rd; let Inst{15-12} = Rd; } @@ -593,12 +603,12 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern> class AIldracq<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : AIldr_ex_or_acq<opcod, 0b00, oops, iops, itin, opc, asm, pattern>, - Requires<[IsARM, HasV8]>; + Requires<[IsARM, HasAcquireRelease]>; class AIstrrel<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : AIstr_ex_or_rel<opcod, 0b00, oops, iops, itin, opc, asm, pattern>, - Requires<[IsARM, HasV8]> { + Requires<[IsARM, HasAcquireRelease]> { let Inst{15-12} = 0b1111; } @@ -1379,11 +1389,6 @@ class T2Ipostldst<bit signed, bits<2> opcod, bit load, bit pre, let DecoderMethod = "DecodeT2LdStPre"; } -// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode. -class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> { - list<Predicate> Predicates = [IsThumb, IsThumb1Only, HasV5T]; -} - // T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode. class T1Pat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsThumb, IsThumb1Only]; @@ -1495,6 +1500,32 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, let D = VFPNeonDomain; } +class AHI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, + InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : VFPI<oops, iops, AddrMode5, 4, IndexModeNone, + VFPLdStFrm, itin, opc, asm, "", pattern> { + list<Predicate> Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<13> addr; + + // Encode instruction operands. + let Inst{23} = addr{8}; // U (add = (U == '1')) + let Inst{22} = Sd{0}; + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Sd{4-1}; + let Inst{7-0} = addr{7-0}; // imm8 + + let Inst{27-24} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1001; // Half precision + + // Loads & stores operate on both NEON and VFP pipelines. + let D = VFPNeonDomain; +} + // VFP Load / store multiple pseudo instructions. class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr, list<dag> pattern> @@ -1817,6 +1848,114 @@ class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{22} = Sd{0}; } +// Half precision, unary, predicated +class AHuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, + string asm, list<dag> pattern> + : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> { + list<Predicate> Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-8} = 0b1001; // Half precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + +// Half precision, unary, non-predicated +class AHuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, + string asm, list<dag> pattern> + : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, + VFPUnaryFrm, itin, asm, "", pattern> { + list<Predicate> Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-8} = 0b1001; // Half precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + +// Half precision, binary +class AHbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> + : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> { + list<Predicate> Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1001; // Half precision + let Inst{6} = op6; + let Inst{4} = op4; +} + +// Half precision, binary, not predicated +class AHbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, + InstrItinClass itin, string asm, list<dag> pattern> + : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, + VFPBinaryFrm, itin, asm, "", pattern> { + list<Predicate> Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1001; // Half precision + let Inst{6} = opcod3; + let Inst{4} = 0; +} + // VFP conversion instructions class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, @@ -2321,22 +2460,25 @@ class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> { } // VFP/NEON Instruction aliases for type suffices. -class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> : - InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>; +// Note: When EmitPriority == 1, the alias will be used for printing +class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result, bit EmitPriority = 0> : + InstAlias<!strconcat(opc, dt, "\t", asm), Result, EmitPriority>, Requires<[HasVFP2]>; -multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> { - def : VFPDataTypeInstAlias<opc, ".8", asm, Result>; - def : VFPDataTypeInstAlias<opc, ".16", asm, Result>; - def : VFPDataTypeInstAlias<opc, ".32", asm, Result>; - def : VFPDataTypeInstAlias<opc, ".64", asm, Result>; +// Note: When EmitPriority == 1, the alias will be used for printing +multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result, bit EmitPriority = 0> { + def : VFPDataTypeInstAlias<opc, ".8", asm, Result, EmitPriority>; + def : VFPDataTypeInstAlias<opc, ".16", asm, Result, EmitPriority>; + def : VFPDataTypeInstAlias<opc, ".32", asm, Result, EmitPriority>; + def : VFPDataTypeInstAlias<opc, ".64", asm, Result, EmitPriority>; } -multiclass NEONDTAnyInstAlias<string opc, string asm, dag Result> { +// Note: When EmitPriority == 1, the alias will be used for printing +multiclass NEONDTAnyInstAlias<string opc, string asm, dag Result, bit EmitPriority = 0> { let Predicates = [HasNEON] in { - def : VFPDataTypeInstAlias<opc, ".8", asm, Result>; - def : VFPDataTypeInstAlias<opc, ".16", asm, Result>; - def : VFPDataTypeInstAlias<opc, ".32", asm, Result>; - def : VFPDataTypeInstAlias<opc, ".64", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".8", asm, Result, EmitPriority>; + def : VFPDataTypeInstAlias<opc, ".16", asm, Result, EmitPriority>; + def : VFPDataTypeInstAlias<opc, ".32", asm, Result, EmitPriority>; + def : VFPDataTypeInstAlias<opc, ".64", asm, Result, EmitPriority>; } } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp index cf973d68085f..98b1b4ca4272 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -90,29 +90,29 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } -void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI, - Reloc::Model RM) const { +void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const { MachineFunction &MF = *MI->getParent()->getParent(); const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>(); + const TargetMachine &TM = MF.getTarget(); if (!Subtarget.useMovt(MF)) { - if (RM == Reloc::PIC_) - expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12, RM); + if (TM.isPositionIndependent()) + expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12); else - expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12, RM); + expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12); return; } - if (RM != Reloc::PIC_) { - expandLoadStackGuardBase(MI, ARM::MOVi32imm, ARM::LDRi12, RM); + if (!TM.isPositionIndependent()) { + expandLoadStackGuardBase(MI, ARM::MOVi32imm, ARM::LDRi12); return; } const GlobalValue *GV = cast<GlobalValue>((*MI->memoperands_begin())->getValue()); - if (!Subtarget.GVIsIndirectSymbol(GV, RM)) { - expandLoadStackGuardBase(MI, ARM::MOV_ga_pcrel, ARM::LDRi12, RM); + if (!Subtarget.isGVIndirectSymbol(GV)) { + expandLoadStackGuardBase(MI, ARM::MOV_ga_pcrel, ARM::LDRi12); return; } @@ -123,9 +123,9 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI, MIB = BuildMI(MBB, MI, DL, get(ARM::MOV_ga_pcrel_ldr), Reg) .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); - unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( - MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4); + MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4); MIB.addMemOperand(MMO); MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg); MIB.addReg(Reg, RegState::Kill).addImm(0); diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h index 90f34ea08401..4b1b7097b18d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h @@ -39,8 +39,7 @@ public: const ARMRegisterInfo &getRegisterInfo() const override { return RI; } private: - void expandLoadStackGuard(MachineBasicBlock::iterator MI, - Reloc::Model RM) const override; + void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; }; } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index c446ba3109e4..060376b0a273 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -90,12 +90,6 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, SDTCisVT<1, i32>, SDTCisVT<4, i32>]>; -def SDT_ARM64bitmlal : SDTypeProfile<2,4, [ SDTCisVT<0, i32>, SDTCisVT<1, i32>, - SDTCisVT<2, i32>, SDTCisVT<3, i32>, - SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >; -def ARMUmlal : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>; -def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>; - // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; @@ -128,6 +122,8 @@ def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall, def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, [SDNPInGlue]>; +def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; + def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; @@ -201,6 +197,12 @@ def NoV6 : Predicate<"!Subtarget->hasV6Ops()">; def HasV6M : Predicate<"Subtarget->hasV6MOps()">, AssemblerPredicate<"HasV6MOps", "armv6m or armv6t2">; +def HasV8MBaseline : Predicate<"Subtarget->hasV8MBaselineOps()">, + AssemblerPredicate<"HasV8MBaselineOps", + "armv8m.base">; +def HasV8MMainline : Predicate<"Subtarget->hasV8MMainlineOps()">, + AssemblerPredicate<"HasV8MMainlineOps", + "armv8m.main">; def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, AssemblerPredicate<"HasV6T2Ops", "armv6t2">; def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; @@ -235,6 +237,8 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; +def HasRAS : Predicate<"Subtarget->hasRAS()">, + AssemblerPredicate<"FeatureRAS", "ras">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float conversions">; def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, @@ -251,6 +255,12 @@ def HasDSP : Predicate<"Subtarget->hasDSP()">, def HasDB : Predicate<"Subtarget->hasDataBarrier()">, AssemblerPredicate<"FeatureDB", "data-barriers">; +def HasV7Clrex : Predicate<"Subtarget->hasV7Clrex()">, + AssemblerPredicate<"FeatureV7Clrex", + "v7 clrex">; +def HasAcquireRelease : Predicate<"Subtarget->hasAcquireRelease()">, + AssemblerPredicate<"FeatureAcquireRelease", + "acquire/release">; def HasMP : Predicate<"Subtarget->hasMPExtension()">, AssemblerPredicate<"FeatureMP", "mp-extensions">; @@ -260,6 +270,9 @@ def HasVirtualization: Predicate<"false">, def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">, AssemblerPredicate<"FeatureTrustZone", "TrustZone">; +def Has8MSecExt : Predicate<"Subtarget->has8MSecExt()">, + AssemblerPredicate<"Feature8MSecExt", + "ARMv8-M Security Extensions">; def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; @@ -279,6 +292,8 @@ def IsARM : Predicate<"!Subtarget->isThumb()">, def IsMachO : Predicate<"Subtarget->isTargetMachO()">; def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; +def IsWindows : Predicate<"Subtarget->isTargetWindows()">; +def IsNotWindows : Predicate<"!Subtarget->isTargetWindows()">; def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">, AssemblerPredicate<"FeatureNaClTrap", "NaCl">; def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">; @@ -301,19 +316,16 @@ def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion ==" " Subtarget->hasVFP4()) || " "Subtarget->isTargetDarwin()">; -// VGETLNi32 is microcoded on Swift - prefer VMOV. -def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">; -def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">; +def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">; +def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">; -// VDUP.32 is microcoded on Swift - prefer VMOV. -def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">; -def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">; +def HasFastVDUP32 : Predicate<"!Subtarget->hasSlowVDUP32()">; +def HasSlowVDUP32 : Predicate<"Subtarget->hasSlowVDUP32()">; -// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as -// this allows more effective execution domain optimization. See -// setExecutionDomain(). -def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">; -def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">; +def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||" + "!Subtarget->useNEONForSinglePrecisionFP()">; +def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&" + "Subtarget->useNEONForSinglePrecisionFP()">; def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">; def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">; @@ -360,8 +372,6 @@ def lo16AllZero : PatLeaf<(i32 imm), [{ return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0; }], hi16>; -class BinOpWithFlagFrag<dag res> : - PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>; class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>; @@ -408,34 +418,35 @@ def brtarget : Operand<OtherVT> { let DecoderMethod = "DecodeT2BROperand"; } -// FIXME: get rid of this one? -def uncondbrtarget : Operand<OtherVT> { - let EncoderMethod = "getUnconditionalBranchTargetOpValue"; - let OperandType = "OPERAND_PCREL"; +// Branches targeting ARM-mode must be divisible by 4 if they're a raw +// immediate. +def ARMBranchTarget : AsmOperandClass { + let Name = "ARMBranchTarget"; } -// Branch target for ARM. Handles conditional/unconditional -def br_target : Operand<OtherVT> { - let EncoderMethod = "getARMBranchTargetOpValue"; - let OperandType = "OPERAND_PCREL"; +// Branches targeting Thumb-mode must be divisible by 2 if they're a raw +// immediate. +def ThumbBranchTarget : AsmOperandClass { + let Name = "ThumbBranchTarget"; } -// Call target. -// FIXME: rename bltarget to t2_bl_target? -def bltarget : Operand<i32> { - // Encoded the same as branch targets. - let EncoderMethod = "getBranchTargetOpValue"; +def arm_br_target : Operand<OtherVT> { + let ParserMatchClass = ARMBranchTarget; + let EncoderMethod = "getARMBranchTargetOpValue"; let OperandType = "OPERAND_PCREL"; } // Call target for ARM. Handles conditional/unconditional // FIXME: rename bl_target to t2_bltarget? -def bl_target : Operand<i32> { +def arm_bl_target : Operand<i32> { + let ParserMatchClass = ARMBranchTarget; let EncoderMethod = "getARMBLTargetOpValue"; let OperandType = "OPERAND_PCREL"; } -def blx_target : Operand<i32> { +// Target for BLX *from* ARM mode. +def arm_blx_target : Operand<i32> { + let ParserMatchClass = ThumbBranchTarget; let EncoderMethod = "getARMBLXTargetOpValue"; let OperandType = "OPERAND_PCREL"; } @@ -981,6 +992,21 @@ def addrmode5_pre : AddrMode5 { let PrintMethod = "printAddrMode5Operand<true>"; } +// addrmode5fp16 := reg +/- imm8*2 +// +def AddrMode5FP16AsmOperand : AsmOperandClass { let Name = "AddrMode5FP16"; } +class AddrMode5FP16 : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode5FP16", []> { + let EncoderMethod = "getAddrMode5FP16OpValue"; + let DecoderMethod = "DecodeAddrMode5FP16Operand"; + let ParserMatchClass = AddrMode5FP16AsmOperand; + let MIOperandInfo = (ops GPR:$base, i32imm); +} + +def addrmode5fp16 : AddrMode5FP16 { + let PrintMethod = "printAddrMode5FP16Operand<false>"; +} + // addrmode6 := reg with optional alignment // def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; } @@ -1224,7 +1250,7 @@ include "ARMInstrFormats.td" let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AsI1_bin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> { + SDPatternOperator opnode, bit Commutable = 0> { // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { @@ -1297,7 +1323,7 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AsI1_rbin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> { + SDNode opnode, bit Commutable = 0> { // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { @@ -1369,7 +1395,7 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, /// AdjustInstrPostInstrSelection after giving them an optional CPSR operand. let hasPostISelHook = 1, Defs = [CPSR] in { multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir, - InstrItinClass iis, PatFrag opnode, + InstrItinClass iis, SDNode opnode, bit Commutable = 0> { def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p), 4, iii, @@ -1402,7 +1428,7 @@ multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir, /// operands are reversed. let hasPostISelHook = 1, Defs = [CPSR] in { multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir, - InstrItinClass iis, PatFrag opnode, + InstrItinClass iis, SDNode opnode, bit Commutable = 0> { def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p), 4, iii, @@ -1431,8 +1457,8 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir, let isCompare = 1, Defs = [CPSR] in { multiclass AI1_cmp_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0, - string rrDecoderMethod = ""> { + SDPatternOperator opnode, bit Commutable = 0, + string rrDecoderMethod = ""> { def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii, opc, "\t$Rn, $imm", [(opnode GPR:$Rn, mod_imm:$imm)]>, @@ -1561,7 +1587,7 @@ class AI_exta_rrot_np<bits<8> opcod, string opc> /// AI1_adde_sube_irs - Define instructions and patterns for adde and sube. let TwoOperandAliasConstraint = "$Rn = $Rd" in -multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, +multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, SDNode opnode, bit Commutable = 0> { let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), @@ -1632,7 +1658,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, /// AI1_rsc_irs - Define instructions and patterns for rsc let TwoOperandAliasConstraint = "$Rn = $Rd" in -multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> { +multiclass AI1_rsc_irs<bits<4> opcod, string opc, SDNode opnode> { let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", @@ -1880,6 +1906,7 @@ def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary, bits<8> imm; let Inst{27-8} = 0b00110010000011110000; let Inst{7-0} = imm; + let DecoderMethod = "DecodeHINTInstruction"; } def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6K]>; @@ -1888,6 +1915,7 @@ def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6K]>; def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6K]>; def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6K]>; def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>; +def : InstAlias<"esb$p", (HINT 16, pred:$p)>, Requires<[IsARM, HasRAS]>; def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { @@ -1915,7 +1943,7 @@ def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, let Inst{7-4} = 0b0111; } // default immediate for breakpoint mnemonic -def : InstAlias<"bkpt", (BKPT 0)>, Requires<[IsARM]>; +def : InstAlias<"bkpt", (BKPT 0), 0>, Requires<[IsARM]>; def HLT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, "hlt", "\t$val", []>, Requires<[IsARM, HasV8]> { @@ -2181,7 +2209,7 @@ let isCall = 1, // at least be a pseudo instruction expanding to the predicated version // at MC lowering time. Defs = [LR], Uses = [SP] in { - def BL : ABXI<0b1011, (outs), (ins bl_target:$func), + def BL : ABXI<0b1011, (outs), (ins arm_bl_target:$func), IIC_Br, "bl\t$func", [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM]>, Sched<[WriteBrL]> { @@ -2191,7 +2219,7 @@ let isCall = 1, let DecoderMethod = "DecodeBranchImmInstruction"; } - def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func), + def BL_pred : ABI<0b1011, (outs), (ins arm_bl_target:$func), IIC_Br, "bl", "\t$func", [(ARMcall_pred tglobaladdr:$func)]>, Requires<[IsARM]>, Sched<[WriteBrL]> { @@ -2232,7 +2260,7 @@ let isCall = 1, // mov lr, pc; b if callee is marked noreturn to avoid confusing the // return stack predictor. - def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func), + def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins arm_bl_target:$func), 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, Requires<[IsARM]>, Sched<[WriteBr]>; } @@ -2240,7 +2268,7 @@ let isCall = 1, let isBranch = 1, isTerminator = 1 in { // FIXME: should be able to write a pattern for ARMBrcond, but can't use // a two-value operand where a dag node expects two operands. :( - def Bcc : ABI<0b1010, (outs), (ins br_target:$target), + def Bcc : ABI<0b1010, (outs), (ins arm_br_target:$target), IIC_Br, "b", "\t$target", [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>, Sched<[WriteBr]> { @@ -2255,8 +2283,9 @@ let isBranch = 1, isTerminator = 1 in { // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly // should be sufficient. // FIXME: Is B really a Barrier? That doesn't seem right. - def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br, - [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>, + def B : ARMPseudoExpand<(outs), (ins arm_br_target:$target), 4, IIC_Br, + [(br bb:$target)], (Bcc arm_br_target:$target, + (ops 14, zero_reg))>, Sched<[WriteBr]>; let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in { @@ -2283,7 +2312,7 @@ let isBranch = 1, isTerminator = 1 in { } // BLX (immediate) -def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary, +def BLXi : AXI<(outs), (ins arm_blx_target:$target), BrMiscFrm, NoItinerary, "blx\t$target", []>, Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { let Inst{31-25} = 0b1111101; @@ -2313,9 +2342,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>, Sched<[WriteBr]>; - def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst), + def TAILJMPd : ARMPseudoExpand<(outs), (ins arm_br_target:$dst), 4, IIC_Br, [], - (Bcc br_target:$dst, (ops 14, zero_reg))>, + (Bcc arm_br_target:$dst, (ops 14, zero_reg))>, Requires<[IsARM]>, Sched<[WriteBr]>; def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst), @@ -2467,14 +2496,12 @@ def ERET : ABI<0b0001, (outs), (ins), NoItinerary, "eret", "", []>, // Load -defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si, - UnOpFrag<(load node:$Src)>>; +defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si, load>; defm LDRB : AI_ldr1nopc<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si, - UnOpFrag<(zextloadi8 node:$Src)>>; -defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si, - BinOpFrag<(store node:$LHS, node:$RHS)>>; + zextloadi8>; +defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si, store>; defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si, - BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; + truncstorei8>; // Special LDR for loads from non-pc-relative constpools. let canFoldAsLoad = 1, mayLoad = 1, hasSideEffects = 0, @@ -2764,6 +2791,12 @@ def LDRBT_POST : ARMAsmPseudo<"ldrbt${q} $Rt, $addr", (ins addr_offset_none:$addr, pred:$q), (outs GPR:$Rt)>; +// Pseudo instruction ldr Rt, =immediate +def LDRConstPool + : ARMAsmPseudo<"ldr${q} $Rt, $immediate", + (ins const_pool_asm_imm:$immediate, pred:$q), + (outs GPR:$Rt)>; + // Store // Stores with truncate @@ -3299,8 +3332,8 @@ def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm), } def : InstAlias<"mov${p} $Rd, $imm", - (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p)>, - Requires<[IsARM]>; + (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p), 0>, + Requires<[IsARM, HasV6T2]>; def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, @@ -3439,11 +3472,9 @@ def UBFX : I<(outs GPRnopc:$Rd), // defm ADD : AsI1_bin_irs<0b0100, "add", - IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(add node:$LHS, node:$RHS)>, 1>; + IIC_iALUi, IIC_iALUr, IIC_iALUsr, add, 1>; defm SUB : AsI1_bin_irs<0b0010, "sub", - IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(sub node:$LHS, node:$RHS)>>; + IIC_iALUi, IIC_iALUr, IIC_iALUsr, sub>; // ADD and SUB with 's' bit set. // @@ -3455,27 +3486,21 @@ defm SUB : AsI1_bin_irs<0b0010, "sub", // FIXME: Eliminate ADDS/SUBS pseudo opcodes after adding tablegen // support for an optional CPSR definition that corresponds to the DAG // node's second value. We can then eliminate the implicit def of CPSR. -defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>; -defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; +defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMaddc, 1>; +defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMsubc>; -defm ADC : AI1_adde_sube_irs<0b0101, "adc", - BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>; -defm SBC : AI1_adde_sube_irs<0b0110, "sbc", - BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>; +defm ADC : AI1_adde_sube_irs<0b0101, "adc", ARMadde, 1>; +defm SBC : AI1_adde_sube_irs<0b0110, "sbc", ARMsube>; defm RSB : AsI1_rbin_irs<0b0011, "rsb", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(sub node:$LHS, node:$RHS)>>; + sub>; // FIXME: Eliminate them if we can write def : Pat patterns which defines // CPSR and the implicit def of CPSR is not needed. -defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; +defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMsubc>; -defm RSC : AI1_rsc_irs<0b0111, "rsc", - BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>; +defm RSC : AI1_rsc_irs<0b0111, "rsc", ARMsube>; // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. // The assume-no-carry-in form uses the negation of the input since add/sub @@ -3685,20 +3710,19 @@ def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm1_32:$pos), (SSAT imm1_32:$pos, GPRnopc:$a, 0)>; def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos), (USAT imm0_31:$pos, GPRnopc:$a, 0)>; +def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), + (SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; //===----------------------------------------------------------------------===// // Bitwise Instructions. // defm AND : AsI1_bin_irs<0b0000, "and", - IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; + IIC_iBITi, IIC_iBITr, IIC_iBITsr, and, 1>; defm ORR : AsI1_bin_irs<0b1100, "orr", - IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; + IIC_iBITi, IIC_iBITr, IIC_iBITsr, or, 1>; defm EOR : AsI1_bin_irs<0b0001, "eor", - IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; + IIC_iBITi, IIC_iBITr, IIC_iBITsr, xor, 1>; defm BIC : AsI1_bin_irs<0b1110, "bic", IIC_iBITi, IIC_iBITr, IIC_iBITsr, BinOpFrag<(and node:$LHS, (not node:$RHS))>>; @@ -3923,9 +3947,10 @@ def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + IIC_iMAC64, "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]> { + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]> { bits<4> RdLo; bits<4> RdHi; bits<4> Rm; @@ -3989,28 +4014,28 @@ def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>, Requires<[IsARM, HasV6]>; -multiclass AI_smul<string opc, PatFrag opnode> { +multiclass AI_smul<string opc> { def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16), + [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), (sext_inreg GPR:$Rm, i16)))]>, Requires<[IsARM, HasV5TE]>; def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16), + [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), (sra GPR:$Rm, (i32 16))))]>, Requires<[IsARM, HasV5TE]>; def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)), + [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), (sext_inreg GPR:$Rm, i16)))]>, Requires<[IsARM, HasV5TE]>; def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)), + [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), (sra GPR:$Rm, (i32 16))))]>, Requires<[IsARM, HasV5TE]>; @@ -4026,13 +4051,13 @@ multiclass AI_smul<string opc, PatFrag opnode> { } -multiclass AI_smla<string opc, PatFrag opnode> { +multiclass AI_smla<string opc> { let DecoderMethod = "DecodeSMLAInstruction" in { def BB : AMulxyIa<0b0001000, 0b00, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra", [(set GPRnopc:$Rd, (add GPR:$Ra, - (opnode (sext_inreg GPRnopc:$Rn, i16), + (mul (sext_inreg GPRnopc:$Rn, i16), (sext_inreg GPRnopc:$Rm, i16))))]>, Requires<[IsARM, HasV5TE, UseMulOps]>; @@ -4040,7 +4065,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", [(set GPRnopc:$Rd, - (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16), + (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16), (sra GPRnopc:$Rm, (i32 16)))))]>, Requires<[IsARM, HasV5TE, UseMulOps]>; @@ -4048,7 +4073,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", [(set GPRnopc:$Rd, - (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)), + (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), (sext_inreg GPRnopc:$Rm, i16))))]>, Requires<[IsARM, HasV5TE, UseMulOps]>; @@ -4056,7 +4081,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", [(set GPRnopc:$Rd, - (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)), + (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), (sra GPRnopc:$Rm, (i32 16)))))]>, Requires<[IsARM, HasV5TE, UseMulOps]>; @@ -4074,8 +4099,8 @@ multiclass AI_smla<string opc, PatFrag opnode> { } } -defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; -defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; +defm SMUL : AI_smul<"smul">; +defm SMLA : AI_smla<"smla">; // Halfword multiply accumulate long: SMLAL<x><y>. def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), @@ -4336,8 +4361,7 @@ def SETPAN : AInoP<(outs), (ins imm0_1:$imm), MiscFrm, NoItinerary, "setpan", // defm CMP : AI1_cmp_irs<0b1010, "cmp", - IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr, - BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; + IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr, ARMcmp>; // ARMcmpZ can re-use the above instruction definitions. def : ARMPat<(ARMcmpZ GPR:$src, mod_imm:$imm), @@ -4745,7 +4769,7 @@ def : ARMPat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), class acquiring_load<PatFrag base> : PatFrag<(ops node:$ptr), (base node:$ptr), [{ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); - return isAtLeastAcquire(Ordering); + return isAcquireOrStronger(Ordering); }]>; def atomic_load_acquire_8 : acquiring_load<atomic_load_8>; @@ -4755,7 +4779,7 @@ def atomic_load_acquire_32 : acquiring_load<atomic_load_32>; class releasing_store<PatFrag base> : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); - return isAtLeastRelease(Ordering); + return isReleaseOrStronger(Ordering); }]>; def atomic_store_release_8 : releasing_store<atomic_store_8>; @@ -4831,21 +4855,21 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, } class ACI<dag oops, dag iops, string opc, string asm, - IndexMode im = IndexModeNone> + list<dag> pattern, IndexMode im = IndexModeNone> : I<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary, - opc, asm, "", []> { + opc, asm, "", pattern> { let Inst{27-25} = 0b110; } class ACInoP<dag oops, dag iops, string opc, string asm, - IndexMode im = IndexModeNone> + list<dag> pattern, IndexMode im = IndexModeNone> : InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary, - opc, asm, "", []> { + opc, asm, "", pattern> { let Inst{31-28} = 0b1111; let Inst{27-25} = 0b110; } -multiclass LdStCop<bit load, bit Dbit, string asm> { +multiclass LdStCop<bit load, bit Dbit, string asm, list<dag> pattern> { def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), - asm, "\t$cop, $CRd, $addr"> { + asm, "\t$cop, $CRd, $addr", pattern> { bits<13> addr; bits<4> cop; bits<4> CRd; @@ -4861,7 +4885,7 @@ multiclass LdStCop<bit load, bit Dbit, string asm> { let DecoderMethod = "DecodeCopMemInstruction"; } def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr), - asm, "\t$cop, $CRd, $addr!", IndexModePre> { + asm, "\t$cop, $CRd, $addr!", [], IndexModePre> { bits<13> addr; bits<4> cop; bits<4> CRd; @@ -4878,7 +4902,7 @@ multiclass LdStCop<bit load, bit Dbit, string asm> { } def _POST: ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, postidx_imm8s4:$offset), - asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> { + asm, "\t$cop, $CRd, $addr, $offset", [], IndexModePost> { bits<9> offset; bits<4> addr; bits<4> cop; @@ -4897,7 +4921,7 @@ multiclass LdStCop<bit load, bit Dbit, string asm> { def _OPTION : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, coproc_option_imm:$option), - asm, "\t$cop, $CRd, $addr, $option"> { + asm, "\t$cop, $CRd, $addr, $option", []> { bits<8> option; bits<4> addr; bits<4> cop; @@ -4914,9 +4938,9 @@ multiclass LdStCop<bit load, bit Dbit, string asm> { let DecoderMethod = "DecodeCopMemInstruction"; } } -multiclass LdSt2Cop<bit load, bit Dbit, string asm> { +multiclass LdSt2Cop<bit load, bit Dbit, string asm, list<dag> pattern> { def _OFFSET : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), - asm, "\t$cop, $CRd, $addr"> { + asm, "\t$cop, $CRd, $addr", pattern> { bits<13> addr; bits<4> cop; bits<4> CRd; @@ -4932,7 +4956,7 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> { let DecoderMethod = "DecodeCopMemInstruction"; } def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr), - asm, "\t$cop, $CRd, $addr!", IndexModePre> { + asm, "\t$cop, $CRd, $addr!", [], IndexModePre> { bits<13> addr; bits<4> cop; bits<4> CRd; @@ -4949,7 +4973,7 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> { } def _POST: ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, postidx_imm8s4:$offset), - asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> { + asm, "\t$cop, $CRd, $addr, $offset", [], IndexModePost> { bits<9> offset; bits<4> addr; bits<4> cop; @@ -4968,7 +4992,7 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> { def _OPTION : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, coproc_option_imm:$option), - asm, "\t$cop, $CRd, $addr, $option"> { + asm, "\t$cop, $CRd, $addr, $option", []> { bits<8> option; bits<4> addr; bits<4> cop; @@ -4986,14 +5010,15 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> { } } -defm LDC : LdStCop <1, 0, "ldc">; -defm LDCL : LdStCop <1, 1, "ldcl">; -defm STC : LdStCop <0, 0, "stc">; -defm STCL : LdStCop <0, 1, "stcl">; -defm LDC2 : LdSt2Cop<1, 0, "ldc2">, Requires<[PreV8]>; -defm LDC2L : LdSt2Cop<1, 1, "ldc2l">, Requires<[PreV8]>; -defm STC2 : LdSt2Cop<0, 0, "stc2">, Requires<[PreV8]>; -defm STC2L : LdSt2Cop<0, 1, "stc2l">, Requires<[PreV8]>; +defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>; +defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; +defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>; +defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>; + +defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>; +defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; +defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>; +defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>; //===----------------------------------------------------------------------===// // Move between coprocessor and ARM core register. @@ -5118,9 +5143,9 @@ def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */, (outs GPRnopc:$Rt, GPRnopc:$Rt2), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>; -class MovRRCopro2<string opc, bit direction, list<dag> pattern = []> - : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, - GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary, +class MovRRCopro2<string opc, bit direction, dag oops, dag iops, + list<dag> pattern = []> + : ABXI<0b1100, oops, iops, NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>, Requires<[PreV8]> { let Inst{31-28} = 0b1111; @@ -5139,13 +5164,18 @@ class MovRRCopro2<string opc, bit direction, list<dag> pattern = []> let Inst{7-4} = opc1; let Inst{3-0} = CRm; - let DecoderMethod = "DecodeMRRC2"; + let DecoderMethod = "DecoderForMRRC2AndMCRR2"; } def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, c_imm:$CRm), [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, imm:$CRm)]>; -def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>; + +def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */, + (outs GPRnopc:$Rt, GPRnopc:$Rt2), + (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>; //===----------------------------------------------------------------------===// // Move between special register and ARM core register @@ -5164,7 +5194,7 @@ def MRS : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary, let Unpredictable{11-0} = 0b110100001111; } -def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p)>, +def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p), 0>, Requires<[IsARM]>; // The MRSsys instruction is the MRS instruction from the ARM ARM, @@ -5206,6 +5236,7 @@ def MRSbanked : ABI<0b0001, (outs GPRnopc:$Rd), (ins banked_reg:$banked), // to distinguish between them. The mask operand contains the special register // (R Bit) in bit 4 and bits 3-0 contains the mask with the fields to be // accessed in the special register. +let Defs = [CPSR] in def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, "msr", "\t$mask, $Rn", []> { bits<5> mask; @@ -5220,6 +5251,7 @@ def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, let Inst{3-0} = Rn; } +let Defs = [CPSR] in def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, mod_imm:$imm), NoItinerary, "msr", "\t$mask, $imm", []> { bits<5> mask; @@ -5268,8 +5300,8 @@ let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK, [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; let usesCustomInserter = 1, Defs = [CPSR] in - def WIN__DBZCHK : PseudoInst<(outs), (ins GPR:$divisor), NoItinerary, - [(win__dbzchk GPR:$divisor)]>; + def WIN__DBZCHK : PseudoInst<(outs), (ins tGPR:$divisor), NoItinerary, + [(win__dbzchk tGPR:$divisor)]>; //===----------------------------------------------------------------------===// // TLS Instructions @@ -5423,6 +5455,8 @@ def : Pat<(load (ARMWrapperPIC tglobaltlsaddr:$addr)), def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>, Requires<[IsARM, UseMovt]>; +def : ARMPat<(ARMWrapper texternalsym :$dst), (MOVi32imm texternalsym :$dst)>, + Requires<[IsARM, UseMovt]>; def : ARMPat<(ARMWrapperJT tjumptable:$dst), (LEApcrelJT tjumptable:$dst)>; @@ -5568,9 +5602,9 @@ include "ARMInstrNEON.td" // // Memory barriers -def : InstAlias<"dmb", (DMB 0xf)>, Requires<[IsARM, HasDB]>; -def : InstAlias<"dsb", (DSB 0xf)>, Requires<[IsARM, HasDB]>; -def : InstAlias<"isb", (ISB 0xf)>, Requires<[IsARM, HasDB]>; +def : InstAlias<"dmb", (DMB 0xf), 0>, Requires<[IsARM, HasDB]>; +def : InstAlias<"dsb", (DSB 0xf), 0>, Requires<[IsARM, HasDB]>; +def : InstAlias<"isb", (ISB 0xf), 0>, Requires<[IsARM, HasDB]>; // System instructions def : MnemonicAlias<"swi", "svc">; @@ -5583,13 +5617,13 @@ def : MnemonicAlias<"stmfd", "stmdb">; def : MnemonicAlias<"stmia", "stm">; def : MnemonicAlias<"stmea", "stm">; -// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the -// shift amount is zero (i.e., unspecified). +// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT with the +// input operands swapped when the shift amount is zero (i.e., unspecified). def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm", - (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>, + (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p), 0>, Requires<[IsARM, HasV6]>; def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm", - (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>, + (PKHBT GPRnopc:$Rd, GPRnopc:$Rm, GPRnopc:$Rn, 0, pred:$p), 0>, Requires<[IsARM, HasV6]>; // PUSH/POP aliases for STM/LDM @@ -5747,23 +5781,23 @@ def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>, // the instruction definitions need difference constraints pre-v6. // Use these aliases for the assembly parsing on pre-v6. def : InstAlias<"mul${s}${p} $Rd, $Rn, $Rm", - (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, + (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s), 0>, Requires<[IsARM, NoV6]>; def : InstAlias<"mla${s}${p} $Rd, $Rn, $Rm, $Ra", (MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, - pred:$p, cc_out:$s)>, + pred:$p, cc_out:$s), 0>, Requires<[IsARM, NoV6]>; def : InstAlias<"smlal${s}${p} $RdLo, $RdHi, $Rn, $Rm", - (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>, Requires<[IsARM, NoV6]>; def : InstAlias<"umlal${s}${p} $RdLo, $RdHi, $Rn, $Rm", - (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>, Requires<[IsARM, NoV6]>; def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm", - (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>, Requires<[IsARM, NoV6]>; def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", - (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>, Requires<[IsARM, NoV6]>; // 'it' blocks in ARM mode just validate the predicates. The IT itself @@ -5775,3 +5809,36 @@ let mayLoad = 1, mayStore =1, hasSideEffects = 1 in def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), NoItinerary, [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>; + +//===---------------------------------- +// Atomic cmpxchg for -O0 +//===---------------------------------- + +// The fast register allocator used during -O0 inserts spills to cover any VRegs +// live across basic block boundaries. When this happens between an LDXR and an +// STXR it can clear the exclusive monitor, causing all cmpxchg attempts to +// fail. + +// Unfortunately, this means we have to have an alternative (expanded +// post-regalloc) path for -O0 compilations. Fortunately this path can be +// significantly more naive than the standard expansion: we conservatively +// assume seq_cst, strong cmpxchg and omit clrex on failure. + +let Constraints = "@earlyclobber $Rd,@earlyclobber $status", + mayLoad = 1, mayStore = 1 in { +def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$status), + (ins GPR:$addr, GPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; + +def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$status), + (ins GPR:$addr, GPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; + +def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$status), + (ins GPR:$addr, GPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; + +def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status), + (ins GPR:$addr, GPRPair:$desired, GPRPair:$new), + NoItinerary, []>, Sched<[]>; +} diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td index 5b1f9a06442e..93a174f3678a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -15,10 +15,6 @@ // Thumb specific DAG Nodes. // -def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; - def imm_sr_XFORM: SDNodeXForm<imm, [{ unsigned Imm = N->getZExtValue(); return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32); @@ -70,6 +66,14 @@ def thumb_immshifted_shamt : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32); }]>; +def imm256_510 : ImmLeaf<i32, [{ + return Imm >= 256 && Imm < 511; +}]>; + +def thumb_imm256_510_addend : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() - 255, SDLoc(N), MVT::i32); +}]>; + // Scaled 4 immediate. def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; } def t_imm0_1020s4 : Operand<i32> { @@ -121,26 +125,38 @@ def t_adrlabel : Operand<i32> { let ParserMatchClass = UnsignedOffset_b8s2; } -def t_bcctarget : Operand<i32> { - let EncoderMethod = "getThumbBCCTargetOpValue"; - let DecoderMethod = "DecodeThumbBCCTargetOperand"; -} -def t_cbtarget : Operand<i32> { - let EncoderMethod = "getThumbCBTargetOpValue"; - let DecoderMethod = "DecodeThumbCmpBROperand"; +def thumb_br_target : Operand<OtherVT> { + let ParserMatchClass = ThumbBranchTarget; + let EncoderMethod = "getThumbBranchTargetOpValue"; + let OperandType = "OPERAND_PCREL"; } -def t_bltarget : Operand<i32> { +def thumb_bl_target : Operand<i32> { + let ParserMatchClass = ThumbBranchTarget; let EncoderMethod = "getThumbBLTargetOpValue"; let DecoderMethod = "DecodeThumbBLTargetOperand"; } -def t_blxtarget : Operand<i32> { +// Target for BLX *from* thumb mode. +def thumb_blx_target : Operand<i32> { + let ParserMatchClass = ARMBranchTarget; let EncoderMethod = "getThumbBLXTargetOpValue"; let DecoderMethod = "DecodeThumbBLXOffset"; } +def thumb_bcc_target : Operand<OtherVT> { + let ParserMatchClass = ThumbBranchTarget; + let EncoderMethod = "getThumbBCCTargetOpValue"; + let DecoderMethod = "DecodeThumbBCCTargetOperand"; +} + +def thumb_cb_target : Operand<OtherVT> { + let ParserMatchClass = ThumbBranchTarget; + let EncoderMethod = "getThumbCBTargetOpValue"; + let DecoderMethod = "DecodeThumbCmpBROperand"; +} + // t_addrmode_pc := <label> => pc + imm8 * 4 // def t_addrmode_pc : MemOperand { @@ -278,16 +294,17 @@ def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm", let Inst{7-4} = imm; } -class tHintAlias<string Asm, dag Result> : tInstAlias<Asm, Result> { +// Note: When EmitPriority == 1, the alias will be used for printing +class tHintAlias<string Asm, dag Result, bit EmitPriority = 0> : tInstAlias<Asm, Result, EmitPriority> { let Predicates = [IsThumb, HasV6M]; } -def : tHintAlias<"nop$p", (tHINT 0, pred:$p)>; // A8.6.110 -def : tHintAlias<"yield$p", (tHINT 1, pred:$p)>; // A8.6.410 -def : tHintAlias<"wfe$p", (tHINT 2, pred:$p)>; // A8.6.408 -def : tHintAlias<"wfi$p", (tHINT 3, pred:$p)>; // A8.6.409 -def : tHintAlias<"sev$p", (tHINT 4, pred:$p)>; // A8.6.157 -def : tInstAlias<"sevl$p", (tHINT 5, pred:$p)> { +def : tHintAlias<"nop$p", (tHINT 0, pred:$p), 1>; // A8.6.110 +def : tHintAlias<"yield$p", (tHINT 1, pred:$p), 1>; // A8.6.410 +def : tHintAlias<"wfe$p", (tHINT 2, pred:$p), 1>; // A8.6.408 +def : tHintAlias<"wfi$p", (tHINT 3, pred:$p), 1>; // A8.6.409 +def : tHintAlias<"sev$p", (tHINT 4, pred:$p), 1>; // A8.6.157 +def : tInstAlias<"sevl$p", (tHINT 5, pred:$p), 1> { let Predicates = [IsThumb2, HasV8]; } @@ -302,7 +319,7 @@ def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val", let Inst{7-0} = val; } // default immediate for breakpoint mnemonic -def : InstAlias<"bkpt", (tBKPT 0)>, Requires<[IsThumb]>; +def : InstAlias<"bkpt", (tBKPT 0), 0>, Requires<[IsThumb]>; def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val", []>, T1Encoding<0b101110>, Requires<[IsThumb, HasV8]> { @@ -439,6 +456,14 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { let Inst{2-0} = 0b000; let Unpredictable{2-0} = 0b111; } + def tBXNS : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bxns${p}\t$Rm", []>, + Requires<[IsThumb, Has8MSecExt]>, + T1Special<{1,1,0,?}>, Sched<[WriteBr]> { + bits<4> Rm; + let Inst{6-3} = Rm; + let Inst{2-0} = 0b100; + let Unpredictable{1-0} = 0b11; + } } let isReturn = 1, isTerminator = 1, isBarrier = 1 in { @@ -458,9 +483,9 @@ let isCall = 1, Defs = [LR], Uses = [SP] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, - (outs), (ins pred:$p, t_bltarget:$func), IIC_Br, + (outs), (ins pred:$p, thumb_bl_target:$func), IIC_Br, "bl${p}\t$func", - [(ARMtcall tglobaladdr:$func)]>, + [(ARMcall tglobaladdr:$func)]>, Requires<[IsThumb]>, Sched<[WriteBrL]> { bits<24> func; let Inst{26} = func{23}; @@ -472,9 +497,8 @@ let isCall = 1, // ARMv5T and above, also used for Thumb2 def tBLXi : TIx2<0b11110, 0b11, 0, - (outs), (ins pred:$p, t_blxtarget:$func), IIC_Br, - "blx${p}\t$func", - [(ARMcall tglobaladdr:$func)]>, + (outs), (ins pred:$p, thumb_blx_target:$func), IIC_Br, + "blx${p}\t$func", []>, Requires<[IsThumb, HasV5T, IsNotMClass]>, Sched<[WriteBrL]> { bits<24> func; let Inst{26} = func{23}; @@ -488,7 +512,7 @@ let isCall = 1, // Also used for Thumb2 def tBLXr : TI<(outs), (ins pred:$p, GPR:$func), IIC_Br, "blx${p}\t$func", - [(ARMtcall GPR:$func)]>, + [(ARMcall GPR:$func)]>, Requires<[IsThumb, HasV5T]>, T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { // A6.2.3 & A8.6.24; bits<4> func; @@ -496,6 +520,17 @@ let isCall = 1, let Inst{2-0} = 0b000; } + // ARMv8-M Security Extensions + def tBLXNSr : TI<(outs), (ins pred:$p, GPRnopc:$func), IIC_Br, + "blxns${p}\t$func", []>, + Requires<[IsThumb, Has8MSecExt]>, + T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { + bits<4> func; + let Inst{6-3} = func; + let Inst{2-0} = 0b100; + let Unpredictable{1-0} = 0b11; + } + // ARMv4T def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func), 4, IIC_Br, @@ -517,8 +552,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { // Just a pseudo for a tBL instruction. Needed to let regalloc know about // the clobber of LR. let Defs = [LR] in - def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target, pred:$p), - 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>, + def tBfar : tPseudoExpand<(outs), (ins thumb_bl_target:$target, pred:$p), + 4, IIC_Br, [], + (tBL pred:$p, thumb_bl_target:$target)>, Sched<[WriteBrTbl]>; def tBR_JTr : tPseudoInst<(outs), @@ -534,7 +570,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { // FIXME: should be able to write a pattern for ARMBrcond, but can't use // a two-value operand where a dag node expects two operands. :( let isBranch = 1, isTerminator = 1 in - def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br, + def tBcc : T1I<(outs), (ins thumb_bcc_target:$target, pred:$p), IIC_Br, "b${p}\t$target", [/*(ARMbrcond bb:$target, imm:$cc)*/]>, T1BranchCond<{1,1,0,1}>, Sched<[WriteBr]> { @@ -663,19 +699,19 @@ multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc, defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr, t_addrmode_is4, AddrModeT1_4, IIC_iLoad_r, IIC_iLoad_i, "ldr", - UnOpFrag<(load node:$Src)>>; + load>; // A8.6.64 & A8.6.61 defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr, t_addrmode_is1, AddrModeT1_1, IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb", - UnOpFrag<(zextloadi8 node:$Src)>>; + zextloadi8>; // A8.6.76 & A8.6.73 defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr, t_addrmode_is2, AddrModeT1_2, IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh", - UnOpFrag<(zextloadi16 node:$Src)>>; + zextloadi16>; let AddedComplexity = 10 in def tLDRSB : // A8.6.80 @@ -706,19 +742,19 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr, t_addrmode_is4, AddrModeT1_4, IIC_iStore_r, IIC_iStore_i, "str", - BinOpFrag<(store node:$LHS, node:$RHS)>>; + store>; // A8.6.197 & A8.6.195 defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr, t_addrmode_is1, AddrModeT1_1, IIC_iStore_bh_r, IIC_iStore_bh_i, "strb", - BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; + truncstorei8>; // A8.6.207 & A8.6.205 defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr, t_addrmode_is2, AddrModeT1_2, IIC_iStore_bh_r, IIC_iStore_bh_i, "strh", - BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; + truncstorei16>; //===----------------------------------------------------------------------===// @@ -770,7 +806,7 @@ def tSTMIA_UPD : Thumb1I<(outs GPR:$wb), } // hasSideEffects def : InstAlias<"ldm${p} $Rn!, $regs", - (tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)>, + (tLDMIA tGPR:$Rn, pred:$p, reglist:$regs), 0>, Requires<[IsThumb, IsThumb1Only]>; let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in @@ -1310,7 +1346,14 @@ def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), AddrModeNone, 0, IndexModeNone, Pseudo, NoItinerary, "", "", [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsThumb]>; + Requires<[IsThumb,IsNotWindows]>; + +let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1, + Defs = [ R11, LR, SP ] in +def tInt_WIN_eh_sjlj_longjmp + : XI<(outs), (ins GPR:$src, GPR:$scratch), AddrModeNone, 0, IndexModeNone, + Pseudo, NoItinerary, "", "", [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, + Requires<[IsThumb,IsWindows]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -1380,16 +1423,9 @@ def : T1Pat<(ARMWrapperJT tjumptable:$dst), (tLEApcrelJT tjumptable:$dst)>; // Direct calls -def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>, +def : T1Pat<(ARMcall texternalsym:$func), (tBL texternalsym:$func)>, Requires<[IsThumb]>; -def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>, - Requires<[IsThumb, HasV5T, IsNotMClass]>; - -// Indirect calls to ARM routines -def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>, - Requires<[IsThumb, HasV5T]>; - // zextload i1 -> zextload i8 def : T1Pat<(zextloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; @@ -1415,6 +1451,24 @@ def : T1Pat<(extloadi8 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>; def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>; def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>; +// post-inc loads and stores + +// post-inc LDR -> LDM r0!, {r1}. The way operands are layed out in LDMs is +// different to how ISel expects them for a post-inc load, so use a pseudo +// and expand it just after ISel. +let usesCustomInserter = 1, + Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in + def tLDR_postidx: tPseudoInst<(outs rGPR:$Rt, rGPR:$Rn_wb), + (ins rGPR:$Rn, pred:$p), + 4, IIC_iStore_ru, + []>; + +// post-inc STR -> STM r0!, {r1}. The layout of this (because it doesn't def +// multiple registers) is the same in ISel as MachineInstr, so there's no need +// for a pseudo. +def : T1Pat<(post_store rGPR:$Rt, rGPR:$Rn, 4), + (tSTMIA_UPD rGPR:$Rn, rGPR:$Rt)>; + // If it's impossible to use [r,r] address mode for sextload, select to // ldr{b|h} + sxt{b|h} instead. def : T1Pat<(sextloadi8 t_addrmode_is1:$addr), @@ -1474,6 +1528,10 @@ def : T1Pat<(i32 thumb_immshifted:$src), def : T1Pat<(i32 imm0_255_comp:$src), (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>; +def : T1Pat<(i32 imm256_510:$src), + (tADDi8 (tMOVi8 255), + (thumb_imm256_510_addend imm:$src))>; + // Pseudo instruction that combines ldr from constpool and add pc. This should // be expanded into two instructions late to allow if-conversion and // scheduling. @@ -1502,7 +1560,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // In Thumb1, "nop" is encoded as a "mov r8, r8". Technically, the bf00 // encoding is available on ARMv6K, but we don't differentiate that finely. -def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>; +def : InstAlias<"nop", (tMOVr R8, R8, 14, 0), 0>, Requires<[IsThumb, IsThumb1Only]>; // For round-trip assembly/disassembly, we have to handle a CPS instruction @@ -1524,3 +1582,8 @@ def : tInstAlias<"lsr${s}${p} $Rdm, $imm", (tLSRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>; def : tInstAlias<"asr${s}${p} $Rdm, $imm", (tASRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>; + +// Pseudo instruction ldr Rt, =immediate +def tLDRConstPool + : tAsmPseudo<"ldr${p} $Rt, $immediate", + (ins tGPR:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index f42f4569b2f8..55e5308be40e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -270,7 +270,7 @@ def t2addrmode_so_reg : MemOperand, let EncoderMethod = "getT2AddrModeSORegOpValue"; let DecoderMethod = "DecodeT2AddrModeSOReg"; let ParserMatchClass = t2addrmode_so_reg_asmoperand; - let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm); + let MIOperandInfo = (ops GPRnopc:$base, rGPR:$offsreg, i32imm:$offsimm); } // Addresses for the TBB/TBH instructions. @@ -576,8 +576,8 @@ class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4, /// changed to modify CPSR. multiclass T2I_bin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0, - string wide = ""> { + SDPatternOperator opnode, bit Commutable = 0, + string wide = ""> { // shifted imm def ri : T2sTwoRegImm< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii, @@ -632,7 +632,7 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, // the ".w" suffix to indicate that they are wide. multiclass T2I_bin_w_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> : + SDPatternOperator opnode, bit Commutable = 0> : T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w"> { // Assembler aliases w/ the ".w" suffix. def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"), @@ -661,7 +661,7 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are /// reversed. The 'rr' form is only defined for the disassembler; for codegen /// it is equivalent to the T2I_bin_irs counterpart. -multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { +multiclass T2I_rbin_irs<bits<4> opcod, string opc, SDNode opnode> { // shifted imm def ri : T2sTwoRegImm< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, @@ -705,7 +705,7 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. let hasPostISelHook = 1, Defs = [CPSR] in { multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir, - InstrItinClass iis, PatFrag opnode, + InstrItinClass iis, SDNode opnode, bit Commutable = 0> { // shifted imm def ri : t2PseudoInst<(outs rGPR:$Rd), @@ -735,7 +735,7 @@ multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir, /// T2I_rbin_s_is - Same as T2I_bin_s_irs, except selection DAG /// operands are reversed. let hasPostISelHook = 1, Defs = [CPSR] in { -multiclass T2I_rbin_s_is<PatFrag opnode> { +multiclass T2I_rbin_s_is<SDNode opnode> { // shifted imm def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p), @@ -755,7 +755,7 @@ multiclass T2I_rbin_s_is<PatFrag opnode> { /// T2I_bin_ii12rs - Defines a set of (op reg, {so_imm|imm0_4095|r|so_reg}) /// patterns for a binary operation that produces a value. -multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, +multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, SDNode opnode, bit Commutable = 0> { // shifted imm // The register-immediate version is re-materializable. This is useful @@ -824,7 +824,7 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, /// for a binary operation that produces a value and use the carry /// bit. It's not predicable. let Defs = [CPSR], Uses = [CPSR] in { -multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, +multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, SDNode opnode, bit Commutable = 0> { // shifted imm def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), @@ -864,7 +864,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, /// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift / // rotate operation that produces a value. -multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> { +multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, SDNode opnode> { // 5-bit imm def ri : T2sTwoRegShiftImm< (outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi, @@ -919,7 +919,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> { /// a explicit result, only implicitly set CPSR. multiclass T2I_cmp_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode> { + SDPatternOperator opnode> { let isCompare = 1, Defs = [CPSR] in { // shifted imm def ri : T2OneRegCmpImm< @@ -1260,20 +1260,19 @@ def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), // Load let canFoldAsLoad = 1, isReMaterializable = 1 in -defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR, - UnOpFrag<(load node:$Src)>>; +defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR, load>; // Loads with zero extension defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - GPRnopc, UnOpFrag<(zextloadi16 node:$Src)>>; + GPRnopc, zextloadi16>; defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - GPRnopc, UnOpFrag<(zextloadi8 node:$Src)>>; + GPRnopc, zextloadi8>; // Loads with sign extension defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - GPRnopc, UnOpFrag<(sextloadi16 node:$Src)>>; + GPRnopc, sextloadi16>; defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - GPRnopc, UnOpFrag<(sextloadi8 node:$Src)>>; + GPRnopc, sextloadi8>; let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // Load doubleword @@ -1414,7 +1413,7 @@ def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>; class T2Ildacq<bits<4> bits23_20, bits<2> bit54, dag oops, dag iops, string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary, - opc, asm, "", pattern>, Requires<[IsThumb, HasV8]> { + opc, asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]> { bits<4> Rt; bits<4> addr; @@ -1438,12 +1437,11 @@ def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>; // Store -defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR, - BinOpFrag<(store node:$LHS, node:$RHS)>>; +defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR, store>; defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si, - rGPR, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; + rGPR, truncstorei8>; defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, - rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; + rGPR, truncstorei16>; // Store doubleword let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in @@ -1586,7 +1584,7 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb), class T2Istrrel<bits<2> bit54, dag oops, dag iops, string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary, opc, - asm, "", pattern>, Requires<[IsThumb, HasV8]> { + asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]> { bits<4> Rt; bits<4> addr; @@ -1906,7 +1904,8 @@ def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi, "movw", "\t$Rd, $imm", - [(set rGPR:$Rd, imm0_65535:$imm)]>, Sched<[WriteALU]> { + [(set rGPR:$Rd, imm0_65535:$imm)]>, Sched<[WriteALU]>, + Requires<[IsThumb, HasV8MBaseline]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0010; @@ -1924,8 +1923,9 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi, let DecoderMethod = "DecodeT2MOVTWInstruction"; } -def : t2InstAlias<"mov${p} $Rd, $imm", - (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p)>; +def : InstAlias<"mov${p} $Rd, $imm", + (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p), 0>, + Requires<[IsThumb, HasV8MBaseline]>; def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; @@ -1936,7 +1936,8 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd), "movt", "\t$Rd, $imm", [(set rGPR:$Rd, (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]>, - Sched<[WriteALU]> { + Sched<[WriteALU]>, + Requires<[IsThumb, HasV8MBaseline]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0110; @@ -1956,7 +1957,7 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd), def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, - Sched<[WriteALU]>; + Sched<[WriteALU]>, Requires<[IsThumb, HasV8MBaseline]>; } // Constraints def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>; @@ -1997,7 +1998,7 @@ def t2UXTB : T2I_ext_rrot<0b101, "uxtb", def t2UXTH : T2I_ext_rrot<0b001, "uxth", UnOpFrag<(and node:$Src, 0x0000FFFF)>>; def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16", - UnOpFrag<(and node:$Src, 0x00FF00FF)>>; + UnOpFrag<(and node:$Src, 0x00FF00FF)>>; // FIXME: This pattern incorrectly assumes the shl operator is a rotate. // The transformation should probably be done as a combiner action @@ -2029,10 +2030,8 @@ def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), // Arithmetic Instructions. // -defm t2ADD : T2I_bin_ii12rs<0b000, "add", - BinOpFrag<(add node:$LHS, node:$RHS)>, 1>; -defm t2SUB : T2I_bin_ii12rs<0b101, "sub", - BinOpFrag<(sub node:$LHS, node:$RHS)>>; +defm t2ADD : T2I_bin_ii12rs<0b000, "add", add, 1>; +defm t2SUB : T2I_bin_ii12rs<0b101, "sub", sub>; // ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants. // @@ -2044,25 +2043,20 @@ defm t2SUB : T2I_bin_ii12rs<0b101, "sub", // FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen // support for an optional CPSR definition that corresponds to the DAG // node's second value. We can then eliminate the implicit def of CPSR. -defm t2ADDS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, - BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>; -defm t2SUBS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, - BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; +defm t2ADDS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, ARMaddc, 1>; +defm t2SUBS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, ARMsubc>; let hasPostISelHook = 1 in { -defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", - BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>; -defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", - BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>; +defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1>; +defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", ARMsube>; } // RSB -defm t2RSB : T2I_rbin_irs <0b1110, "rsb", - BinOpFrag<(sub node:$LHS, node:$RHS)>>; +defm t2RSB : T2I_rbin_irs <0b1110, "rsb", sub>; // FIXME: Eliminate them if we can write def : Pat patterns which defines // CPSR and the implicit def of CPSR is not needed. -defm t2RSBS : T2I_rbin_s_is <BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; +defm t2RSBS : T2I_rbin_s_is <ARMsubc>; // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. // The assume-no-carry-in form uses the negation of the input since add/sub @@ -2293,19 +2287,17 @@ def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn), def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>; def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>; +def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), + (t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; //===----------------------------------------------------------------------===// // Shift and rotate Instructions. // -defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31, - BinOpFrag<(shl node:$LHS, node:$RHS)>>; -defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr, - BinOpFrag<(srl node:$LHS, node:$RHS)>>; -defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr, - BinOpFrag<(sra node:$LHS, node:$RHS)>>; -defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, - BinOpFrag<(rotr node:$LHS, node:$RHS)>>; +defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31, shl>; +defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr, srl>; +defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr, sra>; +defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, rotr>; // (rotr x, (and y, 0x...1f)) ==> (ROR x, y) def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), @@ -2362,14 +2354,11 @@ def t2MOVsra_flag : T2TwoRegShiftImm< // defm t2AND : T2I_bin_w_irs<0b0000, "and", - IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; + IIC_iBITi, IIC_iBITr, IIC_iBITsi, and, 1>; defm t2ORR : T2I_bin_w_irs<0b0010, "orr", - IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; + IIC_iBITi, IIC_iBITr, IIC_iBITsi, or, 1>; defm t2EOR : T2I_bin_w_irs<0b0100, "eor", - IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; + IIC_iBITi, IIC_iBITr, IIC_iBITsi, xor, 1>; defm t2BIC : T2I_bin_w_irs<0b0001, "bic", IIC_iBITi, IIC_iBITr, IIC_iBITsi, @@ -2516,7 +2505,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, let AddedComplexity = 1 in defm t2MVN : T2I_un_irs <0b0011, "mvn", IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi, - UnOpFrag<(not node:$Src)>, 1, 1, 1>; + not, 1, 1, 1>; let AddedComplexity = 1 in def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm), @@ -2606,8 +2595,9 @@ def t2UMLAL : T2MlaLong<0b110, 0b0000, def t2UMAAL : T2MulLong<0b110, 0b0110, (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsThumb2, HasDSP]>; } // hasSideEffects @@ -2677,7 +2667,7 @@ def t2SMMLSR:T2FourReg< let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) } -multiclass T2I_smul<string opc, PatFrag opnode> { +multiclass T2I_smul<string opc, SDNode opnode> { def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16), @@ -2756,7 +2746,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> { } -multiclass T2I_smla<string opc, PatFrag opnode> { +multiclass T2I_smla<string opc, SDNode opnode> { def BB : T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra", @@ -2835,8 +2825,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> { } } -defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; -defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; +defm t2SMUL : T2I_smul<"smul", mul>; +defm t2SMLA : T2I_smla<"smla", mul>; // Halfword multiple accumulate long: SMLAL<x><y> def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd), @@ -2923,7 +2913,7 @@ def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb2]> { + Requires<[HasDivide, IsThumb, HasV8MBaseline]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011100; let Inst{20} = 0b1; @@ -2934,7 +2924,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb2]> { + Requires<[HasDivide, IsThumb, HasV8MBaseline]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011101; let Inst{20} = 0b1; @@ -3080,8 +3070,7 @@ def t2CRC32CW : T2I_crc32<1, 0b10, "cw", int_arm_crc32cw>; // Comparison Instructions... // defm t2CMP : T2I_cmp_irs<0b1101, "cmp", - IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, - BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; + IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, ARMcmp>; def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm), (t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>; @@ -3288,15 +3277,18 @@ let mayLoad = 1 in { def t2LDREXB : T2I_ldrex<0b0100, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexb", "\t$Rt, $addr", "", - [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>; + [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8MBaseline]>; def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexh", "\t$Rt, $addr", "", - [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>; + [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8MBaseline]>; def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr), AddrModeNone, 4, NoItinerary, "ldrex", "\t$Rt, $addr", "", - [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]> { + [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]>, + Requires<[IsThumb, HasV8MBaseline]> { bits<4> Rt; bits<12> addr; let Inst{31-27} = 0b11101; @@ -3320,17 +3312,17 @@ def t2LDAEXB : T2I_ldrex<0b1100, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaexb", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8]>; + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>; def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaexh", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8]>; + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>; def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaex", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8]> { + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> { bits<4> Rt; bits<4> addr; let Inst{31-27} = 0b11101; @@ -3345,7 +3337,8 @@ def t2LDAEXD : T2I_ldrex<0b1111, (outs rGPR:$Rt, rGPR:$Rt2), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaexd", "\t$Rt, $Rt2, $addr", "", - [], {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> { + [], {?, ?, ?, ?}>, Requires<[IsThumb, + HasAcquireRelease, HasV7Clrex, IsNotMClass]> { bits<4> Rt2; let Inst{11-8} = Rt2; @@ -3359,20 +3352,23 @@ def t2STREXB : T2I_strex<0b0100, (outs rGPR:$Rd), AddrModeNone, 4, NoItinerary, "strexb", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, - (strex_1 rGPR:$Rt, addr_offset_none:$addr))]>; + (strex_1 rGPR:$Rt, addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8MBaseline]>; def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexh", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, - (strex_2 rGPR:$Rt, addr_offset_none:$addr))]>; + (strex_2 rGPR:$Rt, addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8MBaseline]>; def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_imm0_1020s4:$addr), AddrModeNone, 4, NoItinerary, "strex", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, - (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]> { + (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]>, + Requires<[IsThumb, HasV8MBaseline]> { bits<4> Rd; bits<4> Rt; bits<12> addr; @@ -3399,7 +3395,8 @@ def t2STLEXB : T2I_strex<0b1100, (outs rGPR:$Rd), "stlexb", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (stlex_1 rGPR:$Rt, addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8]>; + Requires<[IsThumb, HasAcquireRelease, + HasV7Clrex]>; def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), @@ -3407,7 +3404,8 @@ def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd), "stlexh", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (stlex_2 rGPR:$Rt, addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8]>; + Requires<[IsThumb, HasAcquireRelease, + HasV7Clrex]>; def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), @@ -3415,7 +3413,7 @@ def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, "stlex", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (stlex_4 rGPR:$Rt, addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8]> { + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> { bits<4> Rd; bits<4> Rt; bits<4> addr; @@ -3431,14 +3429,15 @@ def t2STLEXD : T2I_strex<0b1111, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], - {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> { + {?, ?, ?, ?}>, Requires<[IsThumb, HasAcquireRelease, + HasV7Clrex, IsNotMClass]> { bits<4> Rt2; let Inst{11-8} = Rt2; } } def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", [(int_arm_clrex)]>, - Requires<[IsThumb2, HasV7]> { + Requires<[IsThumb, HasV7Clrex]> { let Inst{31-16} = 0xf3bf; let Inst{15-14} = 0b10; let Inst{13} = 0; @@ -3449,22 +3448,30 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", [(int_arm_clrex)]>, } def : T2Pat<(and (ldrex_1 addr_offset_none:$addr), 0xff), - (t2LDREXB addr_offset_none:$addr)>; + (t2LDREXB addr_offset_none:$addr)>, + Requires<[IsThumb, HasV8MBaseline]>; def : T2Pat<(and (ldrex_2 addr_offset_none:$addr), 0xffff), - (t2LDREXH addr_offset_none:$addr)>; + (t2LDREXH addr_offset_none:$addr)>, + Requires<[IsThumb, HasV8MBaseline]>; def : T2Pat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), - (t2STREXB GPR:$Rt, addr_offset_none:$addr)>; + (t2STREXB GPR:$Rt, addr_offset_none:$addr)>, + Requires<[IsThumb, HasV8MBaseline]>; def : T2Pat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), - (t2STREXH GPR:$Rt, addr_offset_none:$addr)>; + (t2STREXH GPR:$Rt, addr_offset_none:$addr)>, + Requires<[IsThumb, HasV8MBaseline]>; def : T2Pat<(and (ldaex_1 addr_offset_none:$addr), 0xff), - (t2LDAEXB addr_offset_none:$addr)>; + (t2LDAEXB addr_offset_none:$addr)>, + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>; def : T2Pat<(and (ldaex_2 addr_offset_none:$addr), 0xffff), - (t2LDAEXH addr_offset_none:$addr)>; + (t2LDAEXH addr_offset_none:$addr)>, + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>; def : T2Pat<(stlex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), - (t2STLEXB GPR:$Rt, addr_offset_none:$addr)>; + (t2STLEXB GPR:$Rt, addr_offset_none:$addr)>, + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>; def : T2Pat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), - (t2STLEXH GPR:$Rt, addr_offset_none:$addr)>; + (t2STLEXH GPR:$Rt, addr_offset_none:$addr)>, + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>; //===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics @@ -3517,9 +3524,10 @@ def t2LDMIA_RET: t2PseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in -def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, +def t2B : T2I<(outs), (ins thumb_br_target:$target), IIC_Br, "b", ".w\t$target", - [(br bb:$target)]>, Sched<[WriteBr]> { + [(br bb:$target)]>, Sched<[WriteBr]>, + Requires<[IsThumb, HasV8MBaseline]> { let Inst{31-27} = 0b11110; let Inst{15-14} = 0b10; let Inst{12} = 1; @@ -3609,9 +3617,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS version. let Uses = [SP] in def tTAILJMPd: tPseudoExpand<(outs), - (ins uncondbrtarget:$dst, pred:$p), + (ins thumb_br_target:$dst, pred:$p), 4, IIC_Br, [], - (t2B uncondbrtarget:$dst, pred:$p)>, + (t2B thumb_br_target:$dst, pred:$p)>, Requires<[IsThumb2, IsMachO]>, Sched<[WriteBr]>; } @@ -3647,10 +3655,10 @@ def t2BXJ : T2I<(outs), (ins GPRnopc:$func), NoItinerary, "bxj", "\t$func", []>, // Compare and branch on zero / non-zero let isBranch = 1, isTerminator = 1 in { - def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br, + def tCBZ : T1I<(outs), (ins tGPR:$Rn, thumb_cb_target:$target), IIC_Br, "cbz\t$Rn, $target", []>, T1Misc<{0,0,?,1,?,?,?}>, - Requires<[IsThumb2]>, Sched<[WriteBr]> { + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteBr]> { // A8.6.27 bits<6> target; bits<3> Rn; @@ -3659,10 +3667,10 @@ let isBranch = 1, isTerminator = 1 in { let Inst{2-0} = Rn; } - def tCBNZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br, + def tCBNZ : T1I<(outs), (ins tGPR:$Rn, thumb_cb_target:$target), IIC_Br, "cbnz\t$Rn, $target", []>, T1Misc<{1,0,?,1,?,?,?}>, - Requires<[IsThumb2]>, Sched<[WriteBr]> { + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteBr]> { // A8.6.27 bits<6> target; bits<3> Rn; @@ -3715,15 +3723,21 @@ def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm", let Inst{7-0} = imm; } -def : t2InstAlias<"hint$p $imm", (t2HINT imm0_239:$imm, pred:$p)>; -def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>; -def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>; -def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>; -def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p)>; -def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p)>; -def : t2InstAlias<"sevl$p.w", (t2HINT 5, pred:$p)> { +def : t2InstAlias<"hint$p $imm", (t2HINT imm0_239:$imm, pred:$p), 0>; +def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p), 1>; +def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p), 1>; +def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p), 1>; +def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p), 1>; +def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p), 1>; +def : t2InstAlias<"sevl$p.w", (t2HINT 5, pred:$p), 1> { let Predicates = [IsThumb2, HasV8]; } +def : t2InstAlias<"esb$p.w", (t2HINT 16, pred:$p), 1> { + let Predicates = [IsThumb2, HasRAS]; +} +def : t2InstAlias<"esb$p", (t2HINT 16, pred:$p), 0> { + let Predicates = [IsThumb2, HasRAS]; +} def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", [(int_arm_dbg imm0_15:$opt)]> { @@ -3848,7 +3862,7 @@ def : t2InstAlias<"hvc\t$imm16", (t2HVC imm0_65535:$imm16)>; // ERET - Return from exception in Hypervisor mode. // B9.3.3, B9.3.20: ERET is an alias for "SUBS PC, LR, #0" in an implementation that // includes virtualization extensions. -def t2ERET : InstAlias<"eret${p}", (t2SUBS_PC_LR 0, pred:$p)>, +def t2ERET : InstAlias<"eret${p}", (t2SUBS_PC_LR 0, pred:$p), 1>, Requires<[IsThumb2, HasVirtualization]>; //===----------------------------------------------------------------------===// @@ -3871,7 +3885,7 @@ let isReMaterializable = 1 in { def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), IIC_iMOVix2addpc, [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, - Requires<[IsThumb2, UseMovt]>; + Requires<[IsThumb, HasV8MBaseline, UseMovt]>; } @@ -3883,12 +3897,13 @@ def : T2Pat<(ARMWrapper tglobaltlsaddr:$dst), Requires<[IsThumb2, UseMovt]>; // ConstantPool, GlobalAddress, and JumpTable -def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; -def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>, - Requires<[IsThumb2, UseMovt]>; +def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; +def : T2Pat<(ARMWrapper texternalsym :$dst), (t2MOVi32imm texternalsym :$dst)>, + Requires<[IsThumb, HasV8MBaseline, UseMovt]>; +def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>, + Requires<[IsThumb, HasV8MBaseline, UseMovt]>; -def : T2Pat<(ARMWrapperJT tjumptable:$dst), - (t2LEApcrelJT tjumptable:$dst)>; +def : T2Pat<(ARMWrapperJT tjumptable:$dst), (t2LEApcrelJT tjumptable:$dst)>; // Pseudo instruction that combines ldr from constpool and add pc. This should // be expanded into two instructions late to allow if-conversion and @@ -3910,16 +3925,16 @@ def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src), //===----------------------------------------------------------------------===// // Coprocessor load/store -- for disassembly only // -class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm> - : T2I<oops, iops, NoItinerary, opc, asm, []> { +class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm, list<dag> pattern> + : T2I<oops, iops, NoItinerary, opc, asm, pattern> { let Inst{31-28} = op31_28; let Inst{27-25} = 0b110; } -multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> { +multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm, list<dag> pattern> { def _OFFSET : T2CI<op31_28, (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), - asm, "\t$cop, $CRd, $addr"> { + asm, "\t$cop, $CRd, $addr", pattern> { bits<13> addr; bits<4> cop; bits<4> CRd; @@ -3936,7 +3951,7 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> { } def _PRE : T2CI<op31_28, (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr), - asm, "\t$cop, $CRd, $addr!"> { + asm, "\t$cop, $CRd, $addr!", []> { bits<13> addr; bits<4> cop; bits<4> CRd; @@ -3954,7 +3969,7 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> { def _POST: T2CI<op31_28, (outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, postidx_imm8s4:$offset), - asm, "\t$cop, $CRd, $addr, $offset"> { + asm, "\t$cop, $CRd, $addr, $offset", []> { bits<9> offset; bits<4> addr; bits<4> cop; @@ -3973,7 +3988,7 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> { def _OPTION : T2CI<op31_28, (outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, coproc_option_imm:$option), - asm, "\t$cop, $CRd, $addr, $option"> { + asm, "\t$cop, $CRd, $addr, $option", []> { bits<8> option; bits<4> addr; bits<4> cop; @@ -3991,14 +4006,15 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> { } } -defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">; -defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">; -defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">; -defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">; -defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8,IsThumb2]>; -defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8,IsThumb2]>; -defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8,IsThumb2]>; -defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8,IsThumb2]>; +defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>; +defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; +defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; + +defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>; +defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; +defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; //===----------------------------------------------------------------------===// @@ -4070,6 +4086,7 @@ def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$SYSm), NoItinerary, // same and the assembly parser has no way to distinguish between them. The mask // operand contains the special register (R Bit) in bit 4 and bits 3-0 contains // the mask with the fields to be accessed in the special register. +let Defs = [CPSR] in def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn), NoItinerary, "msr", "\t$mask, $Rn", []>, Requires<[IsThumb2,IsNotMClass]> { @@ -4105,6 +4122,7 @@ def t2MSRbanked : T2I<(outs), (ins banked_reg:$banked, rGPR:$Rn), // M class MSR. // // Move from ARM core register to Special Register +let Defs = [CPSR] in def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn), NoItinerary, "msr", "\t$SYSm, $Rn", []>, Requires<[IsThumb,IsMClass]> { @@ -4314,6 +4332,37 @@ def t2SETPAN : T1I<(outs), (ins imm0_1:$imm), NoItinerary, "setpan\t$imm", []>, } //===----------------------------------------------------------------------===// +// ARMv8-M Security Extensions instructions +// + +let hasSideEffects = 1 in +def t2SG : T2I<(outs), (ins), NoItinerary, "sg", "", []>, + Requires<[Has8MSecExt]> { + let Inst = 0xe97fe97f; +} + +class T2TT<bits<2> at, string asm, list<dag> pattern> + : T2I<(outs rGPR:$Rt), (ins GPRnopc:$Rn), NoItinerary, asm, "\t$Rt, $Rn", + pattern> { + bits<4> Rn; + bits<4> Rt; + + let Inst{31-20} = 0b111010000100; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b1111; + let Inst{11-8} = Rt; + let Inst{7-6} = at; + let Inst{5-0} = 0b000000; + + let Unpredictable{5-0} = 0b111111; +} + +def t2TT : T2TT<0b00, "tt", []>, Requires<[IsThumb,Has8MSecExt]>; +def t2TTT : T2TT<0b01, "ttt", []>, Requires<[IsThumb,Has8MSecExt]>; +def t2TTA : T2TT<0b10, "tta", []>, Requires<[IsThumb,Has8MSecExt]>; +def t2TTAT : T2TT<0b11, "ttat", []>, Requires<[IsThumb,Has8MSecExt]>; + +//===----------------------------------------------------------------------===// // Non-Instruction Patterns // @@ -4488,9 +4537,9 @@ def : t2InstAlias<"tst${p} $Rn, $Rm", (t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; // Memory barriers -def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p)>, Requires<[HasDB]>; -def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p)>, Requires<[HasDB]>; -def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p)>, Requires<[HasDB]>; +def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p), 0>, Requires<[HasDB]>; +def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p), 0>, Requires<[HasDB]>; +def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p), 0>, Requires<[HasDB]>; // Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional // width specifier. @@ -4535,13 +4584,13 @@ def : t2InstAlias<"mvn${s}${p} $Rd, $Rm", def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm", (t2MVNs rGPR:$Rd, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; -// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the -// shift amount is zero (i.e., unspecified). +// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT with the +// input operands swapped when the shift amount is zero (i.e., unspecified). def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm", - (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, Requires<[HasT2ExtractPack, IsThumb2]>; def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm", - (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + (t2PKHBT rGPR:$Rd, rGPR:$Rm, rGPR:$Rn, 0, pred:$p), 0>, Requires<[HasT2ExtractPack, IsThumb2]>; // PUSH/POP aliases for STM/LDM @@ -4620,16 +4669,16 @@ def : t2InstAlias<"strh${p} $Rt, $addr", // Extend instruction optional rotate operand. def : InstAlias<"sxtab${p} $Rd, $Rn, $Rm", - (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, Requires<[HasT2ExtractPack, IsThumb2]>; def : InstAlias<"sxtah${p} $Rd, $Rn, $Rm", - (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, Requires<[HasT2ExtractPack, IsThumb2]>; def : InstAlias<"sxtab16${p} $Rd, $Rn, $Rm", - (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, Requires<[HasT2ExtractPack, IsThumb2]>; def : InstAlias<"sxtb16${p} $Rd, $Rm", - (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>, + (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p), 0>, Requires<[HasT2ExtractPack, IsThumb2]>; def : t2InstAlias<"sxtb${p} $Rd, $Rm", @@ -4642,16 +4691,16 @@ def : t2InstAlias<"sxth${p}.w $Rd, $Rm", (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; def : InstAlias<"uxtab${p} $Rd, $Rn, $Rm", - (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, Requires<[HasT2ExtractPack, IsThumb2]>; def : InstAlias<"uxtah${p} $Rd, $Rn, $Rm", - (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, Requires<[HasT2ExtractPack, IsThumb2]>; def : InstAlias<"uxtab16${p} $Rd, $Rn, $Rm", - (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, Requires<[HasT2ExtractPack, IsThumb2]>; def : InstAlias<"uxtb16${p} $Rd, $Rm", - (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>, + (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p), 0>, Requires<[HasT2ExtractPack, IsThumb2]>; def : t2InstAlias<"uxtb${p} $Rd, $Rm", @@ -4667,7 +4716,7 @@ def : t2InstAlias<"uxth${p}.w $Rd, $Rm", def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot", (t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; def : InstAlias<"uxtb16${p} $Rd, $Rm$rot", - (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>, + (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p), 0>, Requires<[HasT2ExtractPack, IsThumb2]>; def : t2InstAlias<"uxth${p} $Rd, $Rm$rot", (t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; @@ -4675,7 +4724,7 @@ def : t2InstAlias<"uxth${p} $Rd, $Rm$rot", def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot", (t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; def : InstAlias<"sxtb16${p} $Rd, $Rm$rot", - (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>, + (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p), 0>, Requires<[HasT2ExtractPack, IsThumb2]>; def : t2InstAlias<"sxth${p} $Rd, $Rm$rot", (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; @@ -4764,9 +4813,14 @@ def : t2InstAlias<"ldrsh${p}.w $Rt, $addr", def : t2InstAlias<"add${p} $Rd, pc, $imm", (t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>; +// Pseudo instruction ldr Rt, =immediate +def t2LDRConstPool + : t2AsmPseudo<"ldr${p} $Rt, $immediate", + (ins GPRnopc:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; + // PLD/PLDW/PLI with alternate literal form. def : t2InstAlias<"pld${p} $addr", (t2PLDpci t2ldr_pcrel_imm12:$addr, pred:$p)>; def : InstAlias<"pli${p} $addr", - (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p)>, + (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p), 0>, Requires<[IsThumb2,HasV7]>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td index 63e7940bb14e..e29d265ae3d1 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -30,6 +30,18 @@ def FPImmOperand : AsmOperandClass { let ParserMethod = "parseFPImm"; } +def vfp_f16imm : Operand<f16>, + PatLeaf<(f16 fpimm), [{ + return ARM_AM::getFP16Imm(N->getValueAPF()) != -1; + }], SDNodeXForm<fpimm, [{ + APFloat InVal = N->getValueAPF(); + uint32_t enc = ARM_AM::getFP16Imm(InVal); + return CurDAG->getTargetConstant(enc, MVT::i32); + }]>> { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = FPImmOperand; +} + def vfp_f32imm : Operand<f32>, PatLeaf<(f32 fpimm), [{ return ARM_AM::getFP32Imm(N->getValueAPF()) != -1; @@ -98,6 +110,11 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), let D = VFPNeonDomain; } +def VLDRH : AHI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5fp16:$addr), + IIC_fpLoad16, "vldr", ".16\t$Sd, $addr", + []>, + Requires<[HasFullFP16]>; + } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in' def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), @@ -112,6 +129,11 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), let D = VFPNeonDomain; } +def VSTRH : AHI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5fp16:$addr), + IIC_fpStore16, "vstr", ".16\t$Sd, $addr", + []>, + Requires<[HasFullFP16]>; + //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -200,6 +222,37 @@ defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpStore_m, IIC_fpStore_mu>; def : MnemonicAlias<"vldm", "vldmia">; def : MnemonicAlias<"vstm", "vstmia">; + +//===----------------------------------------------------------------------===// +// Lazy load / store multiple Instructions +// +let mayLoad = 1 in +def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone, + IIC_fpLoad_m, "vlldm${p}\t$Rn", "", []>, + Requires<[HasV8MMainline, Has8MSecExt]> { + let Inst{24-23} = 0b00; + let Inst{22} = 0; + let Inst{21} = 1; + let Inst{20} = 1; + let Inst{15-12} = 0; + let Inst{7-0} = 0; + let mayLoad = 1; +} + +let mayStore = 1 in +def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone, + IIC_fpStore_m, "vlstm${p}\t$Rn", "", []>, + Requires<[HasV8MMainline, Has8MSecExt]> { + let Inst{24-23} = 0b00; + let Inst{22} = 0; + let Inst{21} = 1; + let Inst{20} = 0; + let Inst{15-12} = 0; + let Inst{7-0} = 0; + let mayStore = 1; +} + + // FLDM/FSTM - Load / Store multiple single / double precision registers for // pre-ARMv6 cores. // These instructions are deprecated! @@ -221,13 +274,13 @@ def : VFP2MnemonicAlias<"fstmdbd", "vstmdb">; def : VFP2MnemonicAlias<"fstmead", "vstmia">; def : VFP2MnemonicAlias<"fstmfdd", "vstmdb">; -def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>, +def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r), 0>, Requires<[HasVFP2]>; -def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>, +def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r), 0>, Requires<[HasVFP2]>; -def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>, +def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r), 0>, Requires<[HasVFP2]>; -def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>, +def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r), 0>, Requires<[HasVFP2]>; defm : VFPDTAnyInstAlias<"vpush${p}", "$r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>; @@ -295,6 +348,12 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0, let D = VFPNeonA8Domain; } +let TwoOperandAliasConstraint = "$Sn = $Sd" in +def VADDH : AHbI<0b11100, 0b11, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm", + []>; + let TwoOperandAliasConstraint = "$Dn = $Dd" in def VSUBD : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), @@ -311,6 +370,12 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0, let D = VFPNeonA8Domain; } +let TwoOperandAliasConstraint = "$Sn = $Sd" in +def VSUBH : AHbI<0b11100, 0b11, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm", + []>; + let TwoOperandAliasConstraint = "$Dn = $Dd" in def VDIVD : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), @@ -323,6 +388,12 @@ def VDIVS : ASbI<0b11101, 0b00, 0, 0, IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>; +let TwoOperandAliasConstraint = "$Sn = $Sd" in +def VDIVH : AHbI<0b11101, 0b00, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm", + []>; + let TwoOperandAliasConstraint = "$Dn = $Dd" in def VMULD : ADbI<0b11100, 0b10, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), @@ -339,6 +410,12 @@ def VMULS : ASbIn<0b11100, 0b10, 0, 0, let D = VFPNeonA8Domain; } +let TwoOperandAliasConstraint = "$Sn = $Sd" in +def VMULH : AHbI<0b11100, 0b10, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm", + []>; + def VNMULD : ADbI<0b11100, 0b10, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm", @@ -353,9 +430,20 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, let D = VFPNeonA8Domain; } +def VNMULH : AHbI<0b11100, 0b10, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm", + []>; + multiclass vsel_inst<string op, bits<2> opc, int CC> { let DecoderNamespace = "VFPV8", PostEncoderMethod = "", Uses = [CPSR], AddedComplexity = 4 in { + def H : AHbInp<0b11100, opc, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat("vsel", op, ".f16\t$Sd, $Sn, $Sm"), + []>, + Requires<[HasFullFP16]>; + def S : ASbInp<0b11100, opc, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"), @@ -378,6 +466,12 @@ defm VSELVS : vsel_inst<"vs", 0b01, 6>; multiclass vmaxmin_inst<string op, bit opc, SDNode SD> { let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { + def H : AHbInp<0b11101, 0b00, opc, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat(op, ".f16\t$Sd, $Sn, $Sm"), + []>, + Requires<[HasFullFP16]>; + def S : ASbInp<0b11101, 0b00, opc, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"), @@ -418,6 +512,12 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, let D = VFPNeonA8Domain; } +def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0, + (outs), (ins SPR:$Sd, SPR:$Sm), + IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm", + []>; + + // FIXME: Verify encoding after integrated assembler is working. def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$Dd, DPR:$Dm), @@ -432,6 +532,11 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, // VFP pipelines on A8. let D = VFPNeonA8Domain; } + +def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0, + (outs), (ins SPR:$Sd, SPR:$Sm), + IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm", + []>; } // Defs = [FPSCR_NZCV] //===----------------------------------------------------------------------===// @@ -452,6 +557,11 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, let D = VFPNeonA8Domain; } +def VABSH : AHuI<0b11101, 0b11, 0b0000, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA16, "vabs", ".f16\t$Sd, $Sm", + []>; + let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), @@ -473,6 +583,14 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, let D = VFPNeonA8Domain; } +def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, + (outs), (ins SPR:$Sd), + IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0", + []> { + let Inst{3-0} = 0b0000; + let Inst{5} = 0; +} + // FIXME: Verify encoding after integrated assembler is working. def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$Dd), @@ -493,6 +611,14 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, // VFP pipelines on A8. let D = VFPNeonA8Domain; } + +def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0, + (outs), (ins SPR:$Sd), + IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0", + []> { + let Inst{3-0} = 0b0000; + let Inst{5} = 0; +} } // Defs = [FPSCR_NZCV] def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, @@ -627,6 +753,22 @@ def : Pat<(f64 (f16_to_fp GPR:$a)), multiclass vcvt_inst<string opc, bits<2> rm, SDPatternOperator node = null_frag> { let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + def SH : AHuInp<0b11101, 0b11, 0b1100, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".s32.f16\t$Sd, $Sm"), + []>, + Requires<[HasFullFP16]> { + let Inst{17-16} = rm; + } + + def UH : AHuInp<0b11101, 0b11, 0b1100, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".u32.f16\t$Sd, $Sm"), + []>, + Requires<[HasFullFP16]> { + let Inst{17-16} = rm; + } + def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), @@ -715,7 +857,21 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, let D = VFPNeonA8Domain; } +def VNEGH : AHuI<0b11101, 0b11, 0b0001, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA16, "vneg", ".f16\t$Sd, $Sm", + []>; + multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> { + def H : AHuI<0b11101, 0b11, 0b0110, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc), ".f16\t$Sd, $Sm", + []>, + Requires<[HasFullFP16]> { + let Inst{7} = op2; + let Inst{16} = op; + } + def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", @@ -733,11 +889,14 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> { let Inst{16} = op; } + def : InstAlias<!strconcat("vrint", opc, "$p.f16.f16\t$Sd, $Sm"), + (!cast<Instruction>(NAME#"H") SPR:$Sd, SPR:$Sm, pred:$p), 0>, + Requires<[HasFullFP16]>; def : InstAlias<!strconcat("vrint", opc, "$p.f32.f32\t$Sd, $Sm"), - (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p)>, + (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p), 0>, Requires<[HasFPARMv8]>; def : InstAlias<!strconcat("vrint", opc, "$p.f64.f64\t$Dd, $Dm"), - (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm, pred:$p)>, + (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm, pred:$p), 0>, Requires<[HasFPARMv8,HasDPVFP]>; } @@ -748,6 +907,13 @@ defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>; multiclass vrint_inst_anpm<string opc, bits<2> rm, SDPatternOperator node = null_frag> { let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + def H : AHuInp<0b11101, 0b11, 0b1000, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc, ".f16\t$Sd, $Sm"), + []>, + Requires<[HasFullFP16]> { + let Inst{17-16} = rm; + } def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"), @@ -765,10 +931,10 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm, } def : InstAlias<!strconcat("vrint", opc, ".f32.f32\t$Sd, $Sm"), - (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm)>, + (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm), 0>, Requires<[HasFPARMv8]>; def : InstAlias<!strconcat("vrint", opc, ".f64.f64\t$Dd, $Dm"), - (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>, + (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm), 0>, Requires<[HasFPARMv8,HasDPVFP]>; } @@ -787,6 +953,11 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fsqrt SPR:$Sm))]>; +def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpSQRT16, "vsqrt", ".f16\t$Sd, $Sm", + []>; + let hasSideEffects = 0 in { def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), @@ -795,6 +966,18 @@ def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>; + +let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { +def VMOVH : ASuInp<0b11101, 0b11, 0b0000, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA16, "vmovx.f16\t$Sd, $Sm", []>, + Requires<[HasFullFP16]>; + +def VINSH : ASuInp<0b11101, 0b11, 0b0000, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA16, "vins.f16\t$Sd, $Sm", []>, + Requires<[HasFullFP16]>; +} // PostEncoderMethod } // hasSideEffects //===----------------------------------------------------------------------===// @@ -966,6 +1149,44 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010, let DecoderMethod = "DecodeVMOVSRR"; } +// Move H->R, clearing top 16 bits +def VMOVRH : AVConv2I<0b11100001, 0b1001, + (outs GPR:$Rt), (ins SPR:$Sn), + IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn", + []>, + Requires<[HasFullFP16]> { + // Instruction operands. + bits<4> Rt; + bits<5> Sn; + + // Encode instruction operands. + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Rt; + + let Inst{6-5} = 0b00; + let Inst{3-0} = 0b0000; +} + +// Move R->H, clearing top 16 bits +def VMOVHR : AVConv4I<0b11100000, 0b1001, + (outs SPR:$Sn), (ins GPR:$Rt), + IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt", + []>, + Requires<[HasFullFP16]> { + // Instruction operands. + bits<5> Sn; + bits<4> Rt; + + // Encode instruction operands. + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Rt; + + let Inst{6-5} = 0b00; + let Inst{3-0} = 0b0000; +} + // FMRDH: SPR -> GPR // FMRDL: SPR -> GPR // FMRRS: SPR -> GPR @@ -1011,6 +1232,25 @@ class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{22} = Sd{0}; } +class AVConv1IHs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, + bits<4> opcod4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm, + pattern> { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Predicates = [HasFullFP16]; +} + def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm", @@ -1043,6 +1283,13 @@ def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)), def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VSITOS (VLDRS addrmode5:$a))>; +def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm", + []> { + let Inst{7} = 1; // s32 +} + def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm", @@ -1075,6 +1322,13 @@ def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)), def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VUITOS (VLDRS addrmode5:$a))>; +def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm", + []> { + let Inst{7} = 0; // u32 +} + // FP -> Int: class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, @@ -1113,6 +1367,25 @@ class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{22} = Sd{0}; } +class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, + bits<4> opcod4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm, + pattern> { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Predicates = [HasFullFP16]; +} + // Always set Z bit in the instruction, i.e. "round towards zero" variants. def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), @@ -1147,6 +1420,13 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr), (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>; +def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm", + []> { + let Inst{7} = 1; // Z bit +} + def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm", @@ -1180,6 +1460,13 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr), (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>; +def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm", + []> { + let Inst{7} = 1; // Z bit +} + // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. let Uses = [FPSCR] in { // FIXME: Verify encoding after integrated assembler is working. @@ -1197,6 +1484,13 @@ def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, let Inst{7} = 0; // Z bit } +def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm", + []> { + let Inst{7} = 0; // Z bit +} + def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm", @@ -1210,6 +1504,13 @@ def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> { let Inst{7} = 0; // Z bit } + +def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm", + []> { + let Inst{7} = 0; // Z bit +} } // Convert between floating-point and fixed-point @@ -1249,6 +1550,26 @@ class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, let Predicates = [HasVFP2, HasDPVFP]; } +def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> { @@ -1299,6 +1620,26 @@ def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1, // Fixed-Point to FP: +def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> { @@ -1373,6 +1714,13 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, let D = VFPNeonA8Domain; } +def VMLAH : AHbI<0b11100, 0b00, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC16, "vmla", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1400,6 +1748,13 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, let D = VFPNeonA8Domain; } +def VMLSH : AHbI<0b11100, 0b00, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC16, "vmls", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1427,6 +1782,13 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, let D = VFPNeonA8Domain; } +def VNMLAH : AHbI<0b11100, 0b01, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC16, "vnmla", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1453,6 +1815,13 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, let D = VFPNeonA8Domain; } +def VNMLSH : AHbI<0b11100, 0b01, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC16, "vnmls", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1482,6 +1851,13 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0, // VFP pipelines. } +def VFMAH : AHbI<0b11101, 0b10, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFusedMAC]>; + def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; @@ -1517,6 +1893,13 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0, // VFP pipelines. } +def VFMSH : AHbI<0b11101, 0b10, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFusedMAC]>; + def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; @@ -1559,6 +1942,13 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0, // VFP pipelines. } +def VFNMAH : AHbI<0b11101, 0b01, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFusedMAC]>; + def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; @@ -1600,6 +1990,13 @@ def VFNMSS : ASbI<0b11101, 0b01, 0, 0, // VFP pipelines. } +def VFNMSH : AHbI<0b11101, 0b01, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFusedMAC]>; + def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; @@ -1780,6 +2177,23 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), let Inst{7-4} = 0b0000; let Inst{3-0} = imm{3-0}; } + +def FCONSTH : VFPAI<(outs SPR:$Sd), (ins vfp_f16imm:$imm), + VFPMiscFrm, IIC_fpUNA16, + "vmov", ".f16\t$Sd, $imm", + []>, Requires<[HasFullFP16]> { + bits<5> Sd; + bits<8> imm; + + let Inst{27-23} = 0b11101; + let Inst{22} = Sd{0}; + let Inst{21-20} = 0b11; + let Inst{19-16} = imm{7-4}; + let Inst{15-12} = Sd{4-1}; + let Inst{11-8} = 0b1001; // Half precision + let Inst{7-4} = 0b0000; + let Inst{3-0} = imm{3-0}; +} } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 6e7e47b8706a..62d57f3f4986 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -60,9 +60,14 @@ STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); -namespace llvm { -void initializeARMLoadStoreOptPass(PassRegistry &); -} +/// This switch disables formation of double/multi instructions that could +/// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP +/// disabled. This can be used to create libraries that are robust even when +/// users provoke undefined behaviour by supplying misaligned pointers. +/// \see mayCombineMisaligned() +static cl::opt<bool> +AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, + cl::init(false), cl::desc("Be more conservative in ARM load/store opt")); #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass" @@ -71,9 +76,7 @@ namespace { /// form ldm / stm instructions. struct ARMLoadStoreOpt : public MachineFunctionPass { static char ID; - ARMLoadStoreOpt() : MachineFunctionPass(ID) { - initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry()); - } + ARMLoadStoreOpt() : MachineFunctionPass(ID) {} const MachineFunction *MF; const TargetInstrInfo *TII; @@ -90,6 +93,11 @@ namespace { bool runOnMachineFunction(MachineFunction &Fn) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + const char *getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; } @@ -101,8 +109,8 @@ namespace { MachineInstr *MI; int Offset; ///< Load/Store offset. unsigned Position; ///< Position as counted from end of basic block. - MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) - : MI(MI), Offset(Offset), Position(Position) {} + MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position) + : MI(&MI), Offset(Offset), Position(Position) {} }; typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; @@ -131,17 +139,19 @@ namespace { MachineBasicBlock::const_iterator Before); unsigned findFreeReg(const TargetRegisterClass &RegClass); void UpdateBaseRegUses(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - DebugLoc DL, unsigned Base, unsigned WordOffset, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, + unsigned Base, unsigned WordOffset, ARMCC::CondCodes Pred, unsigned PredReg); - MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, - bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, - DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); - MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, - bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, - DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; + MachineInstr *CreateLoadStoreMulti( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + int Offset, unsigned Base, bool BaseKill, unsigned Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, + ArrayRef<std::pair<unsigned, bool>> Regs); + MachineInstr *CreateLoadStoreDouble( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + int Offset, unsigned Base, bool BaseKill, unsigned Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, + ArrayRef<std::pair<unsigned, bool>> Regs) const; void FormCandidates(const MemOpQueue &MemOps); MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, @@ -156,10 +166,11 @@ namespace { char ARMLoadStoreOpt::ID = 0; } -INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false) +INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false, + false) -static bool definesCPSR(const MachineInstr *MI) { - for (const auto &MO : MI->operands()) { +static bool definesCPSR(const MachineInstr &MI) { + for (const auto &MO : MI.operands()) { if (!MO.isReg()) continue; if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) @@ -171,11 +182,11 @@ static bool definesCPSR(const MachineInstr *MI) { return false; } -static int getMemoryOpOffset(const MachineInstr *MI) { - unsigned Opcode = MI->getOpcode(); +static int getMemoryOpOffset(const MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; - unsigned NumOperands = MI->getDesc().getNumOperands(); - unsigned OffField = MI->getOperand(NumOperands-3).getImm(); + unsigned NumOperands = MI.getDesc().getNumOperands(); + unsigned OffField = MI.getOperand(NumOperands - 3).getImm(); if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || @@ -436,12 +447,12 @@ static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { /// Update future uses of the base register with the offset introduced /// due to writeback. This function only works on Thumb1. -void -ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - DebugLoc DL, unsigned Base, - unsigned WordOffset, - ARMCC::CondCodes Pred, unsigned PredReg) { +void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned Base, + unsigned WordOffset, + ARMCC::CondCodes Pred, + unsigned PredReg) { assert(isThumb1 && "Can only update base register uses for Thumb1!"); // Start updating any instructions with immediate offsets. Insert a SUB before // the first non-updateable instruction (if any). @@ -475,7 +486,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, InsertSub = true; } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && - !definesCPSR(MBBI)) { + !definesCPSR(*MBBI)) { // SUBS/ADDS using this register, with a dead def of the CPSR. // Merge it with the update; if the merged offset is too large, // insert a new sub instead. @@ -499,7 +510,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, InsertSub = true; } - } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { + } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) { // Since SUBS sets the condition flags, we can't place the base reset // after an instruction that has a live CPSR def. // The base register might also contain an argument for a function call. @@ -552,7 +563,7 @@ void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, // Initialize if we never queried in this block. if (!LiveRegsValid) { LiveRegs.init(TRI); - LiveRegs.addLiveOuts(&MBB, true); + LiveRegs.addLiveOuts(MBB); LiveRegPos = MBB.end(); LiveRegsValid = true; } @@ -574,10 +585,11 @@ static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, /// Create and insert a LDM or STM with Base as base register and registers in /// Regs as the register operands that would be loaded / stored. It returns /// true if the transformation is done. -MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, - bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, - DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { +MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + int Offset, unsigned Base, bool BaseKill, unsigned Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, + ArrayRef<std::pair<unsigned, bool>> Regs) { unsigned NumRegs = Regs.size(); assert(NumRegs > 1); @@ -770,10 +782,11 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, return MIB.getInstr(); } -MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, - bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, - DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { +MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + int Offset, unsigned Base, bool BaseKill, unsigned Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, + ArrayRef<std::pair<unsigned, bool>> Regs) const { bool IsLoad = isi32Load(Opcode); assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; @@ -836,11 +849,11 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; iterator InsertBefore = std::next(iterator(LatestMI)); MachineBasicBlock &MBB = *LatestMI->getParent(); - unsigned Offset = getMemoryOpOffset(First); + unsigned Offset = getMemoryOpOffset(*First); unsigned Base = getLoadStoreBaseOp(*First).getReg(); bool BaseKill = LatestMI->killsRegister(Base); unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg); DebugLoc DL = First->getDebugLoc(); MachineInstr *Merged = nullptr; if (Cand.CanMergeToLSDouble) @@ -916,6 +929,24 @@ static bool isValidLSDoubleOffset(int Offset) { return (Value % 4) == 0 && Value < 1024; } +/// Return true for loads/stores that can be combined to a double/multi +/// operation without increasing the requirements for alignment. +static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, + const MachineInstr &MI) { + // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no + // difference. + unsigned Opcode = MI.getOpcode(); + if (!isi32Load(Opcode) && !isi32Store(Opcode)) + return true; + + // Stack pointer alignment is out of the programmers control so we can trust + // SP-relative loads/stores. + if (getLoadStoreBaseOp(MI).getReg() == ARM::SP && + STI.getFrameLowering()->getTransientStackAlignment() >= 4) + return true; + return false; +} + /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { const MachineInstr *FirstMI = MemOps[0].MI; @@ -946,7 +977,7 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { bool CanMergeToLSMulti = true; // On swift vldm/vstm starting with an odd register number as that needs // more uops than single vldrs. - if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) + if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1) CanMergeToLSMulti = false; // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it @@ -954,6 +985,10 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { if (PReg == ARM::SP || PReg == ARM::PC) CanMergeToLSMulti = CanMergeToLSDouble = false; + // Should we be conservative? + if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI)) + CanMergeToLSMulti = CanMergeToLSDouble = false; + // Merge following instructions where possible. for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { int NewOffset = MemOps[I].Offset; @@ -1102,11 +1137,11 @@ static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, unsigned MIPredReg; if (MI.getOperand(0).getReg() != Reg || MI.getOperand(1).getReg() != Reg || - getInstrPredicate(&MI, MIPredReg) != Pred || + getInstrPredicate(MI, MIPredReg) != Pred || MIPredReg != PredReg) return 0; - if (CheckCPSRDef && definesCPSR(&MI)) + if (CheckCPSRDef && definesCPSR(MI)) return 0; return MI.getOperand(2).getImm() * Scale; } @@ -1169,7 +1204,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { unsigned Base = BaseOP.getReg(); bool BaseKill = BaseOP.isKill(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); unsigned Opcode = MI->getOpcode(); DebugLoc DL = MI->getDebugLoc(); @@ -1193,10 +1228,30 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { } else { MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && - ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) - return false; + ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) { + + // We couldn't find an inc/dec to merge. But if the base is dead, we + // can still change to a writeback form as that will save us 2 bytes + // of code size. It can create WAW hazards though, so only do it if + // we're minimizing code size. + if (!MBB.getParent()->getFunction()->optForMinSize() || !BaseKill) + return false; + + bool HighRegsUsed = false; + for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) + if (MI->getOperand(i).getReg() >= ARM::R8) { + HighRegsUsed = true; + break; + } + + if (!HighRegsUsed) + MergeInstr = MBB.end(); + else + return false; + } } - MBB.erase(MergeInstr); + if (MergeInstr != MBB.end()) + MBB.erase(MergeInstr); unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) @@ -1291,7 +1346,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { return false; unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); int Bytes = getLSMultipleTransferSize(MI); MachineBasicBlock &MBB = *MI->getParent(); MachineBasicBlock::iterator MBBI(MI); @@ -1388,7 +1443,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { return false; unsigned PredReg; - ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); MachineBasicBlock::iterator MBBI(MI); MachineBasicBlock &MBB = *MI.getParent(); int Offset; @@ -1487,14 +1542,13 @@ static bool isMemoryOp(const MachineInstr &MI) { } static void InsertLDR_STR(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - int Offset, bool isDef, - DebugLoc DL, unsigned NewOpc, + MachineBasicBlock::iterator &MBBI, int Offset, + bool isDef, const DebugLoc &DL, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, - bool OffKill, bool OffUndef, - ARMCC::CondCodes Pred, unsigned PredReg, - const TargetInstrInfo *TII, bool isT2) { + bool OffKill, bool OffUndef, ARMCC::CondCodes Pred, + unsigned PredReg, const TargetInstrInfo *TII, + bool isT2) { if (isDef) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) @@ -1547,9 +1601,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, bool BaseUndef = BaseOp.isUndef(); bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); - int OffImm = getMemoryOpOffset(MI); + int OffImm = getMemoryOpOffset(*MI); unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); if (OddRegNum > EvenRegNum && OffImm == 0) { // Ascending register numbers and no offset. It's safe to change it to a @@ -1655,14 +1709,14 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { unsigned Reg = MO.getReg(); unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); - int Offset = getMemoryOpOffset(MBBI); + ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg); + int Offset = getMemoryOpOffset(*MBBI); if (CurrBase == 0) { // Start of a new chain. CurrBase = Base; CurrOpc = Opcode; CurrPred = Pred; - MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); + MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position)); continue; } // Note: No need to match PredReg in the next if. @@ -1690,7 +1744,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (!Overlap) { // Check offset and sort memory operation into the current chain. if (Offset > MemOps.back().Offset) { - MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); + MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position)); continue; } else { MemOpQueue::iterator MI, ME; @@ -1706,7 +1760,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { } } if (MI != MemOps.end()) { - MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); + MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position)); continue; } } @@ -1723,7 +1777,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { MBBI->getOpcode() == ARM::t2STRDi8) { // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions // remember them because we may still be able to merge add/sub into them. - MergeBaseCandidates.push_back(MBBI); + MergeBaseCandidates.push_back(&*MBBI); } @@ -1805,20 +1859,20 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { // Ignore any DBG_VALUE instructions. while (PrevI->isDebugValue() && PrevI != MBB.begin()) --PrevI; - MachineInstr *PrevMI = PrevI; - unsigned Opcode = PrevMI->getOpcode(); + MachineInstr &PrevMI = *PrevI; + unsigned Opcode = PrevMI.getOpcode(); if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { - MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); + MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1); if (MO.getReg() != ARM::LR) return false; unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); - PrevMI->setDesc(TII->get(NewOpc)); + PrevMI.setDesc(TII->get(NewOpc)); MO.setReg(ARM::PC); - PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); + PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI); MBB.erase(MBBI); return true; } @@ -1840,8 +1894,8 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { for (auto Use : Prev->uses()) if (Use.isKill()) { AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) - .addReg(Use.getReg(), RegState::Kill)) - .copyImplicitOps(&*MBBI); + .addReg(Use.getReg(), RegState::Kill)) + .copyImplicitOps(*MBBI); MBB.erase(MBBI); MBB.erase(Prev); return true; @@ -1851,6 +1905,9 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { } bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(*Fn.getFunction())) + return false; + MF = &Fn; STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); TL = STI->getTargetLowering(); @@ -1877,10 +1934,6 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { return Modified; } -namespace llvm { -void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); -} - #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \ "ARM pre- register allocation load / store optimization pass" @@ -1889,9 +1942,7 @@ namespace { /// locations close to make it more likely they will be combined later. struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ static char ID; - ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) { - initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry()); - } + ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} const DataLayout *TD; const TargetInstrInfo *TII; @@ -1922,10 +1973,13 @@ namespace { char ARMPreAllocLoadStoreOpt::ID = 0; } -INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt", +INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt", ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false) bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + if (AssumeMisalignedLoadStores || skipFunction(*Fn.getFunction())) + return false; + TD = &Fn.getDataLayout(); STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); TII = STI->getInstrInfo(); @@ -2034,7 +2088,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return false; // Then make sure the immediate offset fits. - int OffImm = getMemoryOpOffset(Op0); + int OffImm = getMemoryOpOffset(*Op0); if (isT2) { int Limit = (1 << 8) * Scale; if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) @@ -2056,7 +2110,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, if (FirstReg == SecondReg) return false; BaseReg = Op0->getOperand(1).getReg(); - Pred = getInstrPredicate(Op0, PredReg); + Pred = getInstrPredicate(*Op0, PredReg); dl = Op0->getDebugLoc(); return true; } @@ -2070,11 +2124,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, // Sort by offset (in reverse order). std::sort(Ops.begin(), Ops.end(), [](const MachineInstr *LHS, const MachineInstr *RHS) { - int LOffset = getMemoryOpOffset(LHS); - int ROffset = getMemoryOpOffset(RHS); - assert(LHS == RHS || LOffset != ROffset); - return LOffset > ROffset; - }); + int LOffset = getMemoryOpOffset(*LHS); + int ROffset = getMemoryOpOffset(*RHS); + assert(LHS == RHS || LOffset != ROffset); + return LOffset > ROffset; + }); // The loads / stores of the same base are in order. Scan them from first to // last and check for the following: @@ -2106,7 +2160,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, if (LastOpcode && LSMOpcode != LastOpcode) break; - int Offset = getMemoryOpOffset(Op); + int Offset = getMemoryOpOffset(*Op); unsigned Bytes = getLSMultipleTransferSize(Op); if (LastBytes) { if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) @@ -2141,8 +2195,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, } else { // This is the new location for the loads / stores. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; - while (InsertPos != MBB->end() - && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) + while (InsertPos != MBB->end() && + (MemOps.count(&*InsertPos) || InsertPos->isDebugValue())) ++InsertPos; // If we are moving a pair of loads / stores, see if it makes sense @@ -2237,25 +2291,25 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { MachineBasicBlock::iterator E = MBB->end(); while (MBBI != E) { for (; MBBI != E; ++MBBI) { - MachineInstr *MI = MBBI; - if (MI->isCall() || MI->isTerminator()) { + MachineInstr &MI = *MBBI; + if (MI.isCall() || MI.isTerminator()) { // Stop at barriers. ++MBBI; break; } - if (!MI->isDebugValue()) - MI2LocMap[MI] = ++Loc; + if (!MI.isDebugValue()) + MI2LocMap[&MI] = ++Loc; - if (!isMemoryOp(*MI)) + if (!isMemoryOp(MI)) continue; unsigned PredReg = 0; if (getInstrPredicate(MI, PredReg) != ARMCC::AL) continue; - int Opc = MI->getOpcode(); + int Opc = MI.getOpcode(); bool isLd = isLoadSingle(Opc); - unsigned Base = MI->getOperand(1).getReg(); + unsigned Base = MI.getOperand(1).getReg(); int Offset = getMemoryOpOffset(MI); bool StopHere = false; @@ -2264,15 +2318,15 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { Base2LdsMap.find(Base); if (BI != Base2LdsMap.end()) { for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { - if (Offset == getMemoryOpOffset(BI->second[i])) { + if (Offset == getMemoryOpOffset(*BI->second[i])) { StopHere = true; break; } } if (!StopHere) - BI->second.push_back(MI); + BI->second.push_back(&MI); } else { - Base2LdsMap[Base].push_back(MI); + Base2LdsMap[Base].push_back(&MI); LdBases.push_back(Base); } } else { @@ -2280,15 +2334,15 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { Base2StsMap.find(Base); if (BI != Base2StsMap.end()) { for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { - if (Offset == getMemoryOpOffset(BI->second[i])) { + if (Offset == getMemoryOpOffset(*BI->second[i])) { StopHere = true; break; } } if (!StopHere) - BI->second.push_back(MI); + BI->second.push_back(&MI); } else { - Base2StsMap[Base].push_back(MI); + Base2StsMap[Base].push_back(&MI); StBases.push_back(Base); } } @@ -2335,4 +2389,3 @@ FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { return new ARMPreAllocLoadStoreOpt(); return new ARMLoadStoreOpt(); } - diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp index a2aca2d1a69e..7429acdb09ad 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -26,33 +26,22 @@ using namespace llvm; MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol) { - const MCExpr *Expr; - unsigned Option = MO.getTargetFlags() & ARMII::MO_OPTION_MASK; - switch (Option) { - default: { - Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, - OutContext); - switch (Option) { - default: llvm_unreachable("Unknown target flag on symbol operand"); - case ARMII::MO_NO_FLAG: - break; - case ARMII::MO_LO16: - Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, - OutContext); - Expr = ARMMCExpr::createLower16(Expr, OutContext); - break; - case ARMII::MO_HI16: - Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, - OutContext); - Expr = ARMMCExpr::createUpper16(Expr, OutContext); - break; - } + const MCExpr *Expr = + MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); + switch (MO.getTargetFlags() & ARMII::MO_OPTION_MASK) { + default: + llvm_unreachable("Unknown target flag on symbol operand"); + case ARMII::MO_NO_FLAG: break; - } - - case ARMII::MO_PLT: - Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_PLT, - OutContext); + case ARMII::MO_LO16: + Expr = + MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); + Expr = ARMMCExpr::createLower16(Expr, OutContext); + break; + case ARMII::MO_HI16: + Expr = + MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); + Expr = ARMMCExpr::createUpper16(Expr, OutContext); break; } @@ -89,7 +78,7 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, break; } case MachineOperand::MO_ExternalSymbol: - MCOp = GetSymbolRef(MO, + MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); break; case MachineOperand::MO_JumpTableIndex: diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp index 71ad7a4a732a..b6dee9ff8385 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -21,4 +21,4 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), - IsSplitCSR(false) {} + ArgumentStackSize(0), IsSplitCSR(false) {} diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index 68f9aec8cae5..f71497240ff3 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -15,7 +15,6 @@ #define LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H #include "ARMSubtarget.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Target/TargetMachine.h" diff --git a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp index 30baf4263c11..73dcb9641b61 100644 --- a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp @@ -27,6 +27,11 @@ public: bool runOnMachineFunction(MachineFunction &Fn) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + const char *getPassName() const override { return "optimise barriers pass"; } @@ -46,6 +51,9 @@ static bool CanMovePastDMB(const MachineInstr *MI) { } bool ARMOptimizeBarriersPass::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + // Vector to store the DMBs we will remove after the first iteration std::vector<MachineInstr *> ToRemove; // DMBType is the Imm value of the first operand. It determines whether it's a diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td index 528c4ec73781..47a99313025c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSchedule.td +++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td @@ -94,7 +94,7 @@ def : PredicateProlog<[{ (void)TII; }]>; -def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(MI)}]>; +def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>; //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM @@ -186,38 +186,50 @@ def IIC_iStore_mu : InstrItinClass; def IIC_Preload : InstrItinClass; def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; +def IIC_fpUNA16 : InstrItinClass; def IIC_fpUNA32 : InstrItinClass; def IIC_fpUNA64 : InstrItinClass; +def IIC_fpCMP16 : InstrItinClass; def IIC_fpCMP32 : InstrItinClass; def IIC_fpCMP64 : InstrItinClass; def IIC_fpCVTSD : InstrItinClass; def IIC_fpCVTDS : InstrItinClass; def IIC_fpCVTSH : InstrItinClass; def IIC_fpCVTHS : InstrItinClass; +def IIC_fpCVTIH : InstrItinClass; def IIC_fpCVTIS : InstrItinClass; def IIC_fpCVTID : InstrItinClass; +def IIC_fpCVTHI : InstrItinClass; def IIC_fpCVTSI : InstrItinClass; def IIC_fpCVTDI : InstrItinClass; def IIC_fpMOVIS : InstrItinClass; def IIC_fpMOVID : InstrItinClass; def IIC_fpMOVSI : InstrItinClass; def IIC_fpMOVDI : InstrItinClass; +def IIC_fpALU16 : InstrItinClass; def IIC_fpALU32 : InstrItinClass; def IIC_fpALU64 : InstrItinClass; +def IIC_fpMUL16 : InstrItinClass; def IIC_fpMUL32 : InstrItinClass; def IIC_fpMUL64 : InstrItinClass; +def IIC_fpMAC16 : InstrItinClass; def IIC_fpMAC32 : InstrItinClass; def IIC_fpMAC64 : InstrItinClass; +def IIC_fpFMAC16 : InstrItinClass; def IIC_fpFMAC32 : InstrItinClass; def IIC_fpFMAC64 : InstrItinClass; +def IIC_fpDIV16 : InstrItinClass; def IIC_fpDIV32 : InstrItinClass; def IIC_fpDIV64 : InstrItinClass; +def IIC_fpSQRT16 : InstrItinClass; def IIC_fpSQRT32 : InstrItinClass; def IIC_fpSQRT64 : InstrItinClass; +def IIC_fpLoad16 : InstrItinClass; def IIC_fpLoad32 : InstrItinClass; def IIC_fpLoad64 : InstrItinClass; def IIC_fpLoad_m : InstrItinClass; def IIC_fpLoad_mu : InstrItinClass; +def IIC_fpStore16 : InstrItinClass; def IIC_fpStore32 : InstrItinClass; def IIC_fpStore64 : InstrItinClass; def IIC_fpStore_m : InstrItinClass; diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td index 2c6382542ab9..ba380cba100f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td @@ -1065,11 +1065,11 @@ def CortexA8Itineraries : ProcessorItineraries< // Cortex-A8 machine model for scheduling and other instruction cost heuristics. def CortexA8Model : SchedMachineModel { let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. - let MinLatency = -1; // OperandCycles are interpreted as MinLatency. let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. let MispredictPenalty = 13; // Based on estimate of pipeline depth. + let CompleteModel = 0; let Itineraries = CortexA8Itineraries; } diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td index 9a1d22275646..519e595bd184 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td @@ -2025,12 +2025,12 @@ def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>; // Define a predicate to select the LDM based on number of memory addresses. def A9LMAdr#NumAddr#Pred : - SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>; + SchedPredicate<"(TII->getNumLDMAddresses(*MI)+1)/2 == "#NumAddr>; } // foreach NumAddr // Fall-back for unknown LDMs. -def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(MI) == 0">; +def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == 0">; // LDM/VLDM/VLDn address generation latency & resources. // Dynamically select the A9WriteAdrN sequence using a predicate. diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td index 3ad7730228e5..ea2bf4b578f0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td @@ -374,7 +374,7 @@ let SchedModel = SwiftModel in { } // Predicate. foreach NumAddr = 1-16 in { - def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>; + def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NumAddr>; } def SwiftWriteLDMAddrNoWB : SchedWriteRes<[SwiftUnitP01]> { let Latency = 0; } def SwiftWriteLDMAddrWB : SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]>; diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 6fded9c8ab73..3b99762f7157 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -21,12 +21,9 @@ using namespace llvm; // Emit, if possible, a specialized version of the given Libcall. Typically this // means selecting the appropriately aligned version, but we also convert memset // of 0 into memclr. -SDValue ARMSelectionDAGInfo:: -EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - RTLIB::Libcall LC) const { +SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, RTLIB::Libcall LC) const { const ARMSubtarget &Subtarget = DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); const ARMTargetLowering *TLI = Subtarget.getTargetLowering(); @@ -121,21 +118,17 @@ EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], TLI->getPointerTy(DAG.getDataLayout())), - std::move(Args), 0) + std::move(Args)) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } -SDValue -ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) const { +SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { const ARMSubtarget &Subtarget = DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); // Do repeated 4-byte loads and stores. To be improved. @@ -176,6 +169,12 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, // emit. unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; + // Code size optimisation: do not inline memcpy if expansion results in + // more instructions than the libary call. + if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction()->optForMinSize()) { + return SDValue(); + } + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); for (unsigned I = 0; I != NumMEMCPYs; ++I) { @@ -213,8 +212,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, dl, MVT::i32)), - SrcPtrInfo.getWithOffset(SrcOff), - false, false, false, 0); + SrcPtrInfo.getWithOffset(SrcOff)); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; @@ -237,7 +235,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, dl, MVT::i32)), - DstPtrInfo.getWithOffset(DstOff), false, false, 0); + DstPtrInfo.getWithOffset(DstOff)); ++i; DstOff += VTSize; BytesLeft -= VTSize; @@ -246,26 +244,18 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, makeArrayRef(TFOps, i)); } - -SDValue ARMSelectionDAGInfo:: -EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, - MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) const { +SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, RTLIB::MEMMOVE); } - -SDValue ARMSelectionDAGInfo:: -EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, SDValue Dst, - SDValue Src, SDValue Size, - unsigned Align, bool isVolatile, - MachinePointerInfo DstPtrInfo) const { +SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const { return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, RTLIB::MEMSET); } diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h index 289879ee1d7e..2ddb42c95397 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file defines the ARM subclass for TargetSelectionDAGInfo. +// This file defines the ARM subclass for SelectionDAGTargetInfo. // //===----------------------------------------------------------------------===// @@ -15,7 +15,8 @@ #define LLVM_LIB_TARGET_ARM_ARMSELECTIONDAGINFO_H #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" namespace llvm { @@ -35,35 +36,30 @@ namespace ARM_AM { } } // end namespace ARM_AM -class ARMSelectionDAGInfo : public TargetSelectionDAGInfo { +class ARMSelectionDAGInfo : public SelectionDAGTargetInfo { public: - - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; - SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVolatile, - MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) const override; + SDValue + EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, + unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; // Adjust parameters for memset, see RTABI section 4.3.4 - SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, - SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, - bool isVolatile, + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const override; - SDValue EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, + SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, RTLIB::Libcall LC) const; }; diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp index bb6ae28065bd..1d7eef9ddcfd 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -88,10 +88,9 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle) - : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), - ARMProcClass(None), ARMArch(ARMv4t), stackAlignment(4), CPUString(CPU), - IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM), - FrameLowering(initializeFrameLowering(CPU, FS)), + : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps), + CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), + TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so // we can query directly. InstrInfo(isThumb1Only() @@ -102,63 +101,10 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, TLInfo(TM, *this) {} void ARMSubtarget::initializeEnvironment() { - HasV4TOps = false; - HasV5TOps = false; - HasV5TEOps = false; - HasV6Ops = false; - HasV6MOps = false; - HasV6KOps = false; - HasV6T2Ops = false; - HasV7Ops = false; - HasV8Ops = false; - HasV8_1aOps = false; - HasV8_2aOps = false; - HasVFPv2 = false; - HasVFPv3 = false; - HasVFPv4 = false; - HasFPARMv8 = false; - HasNEON = false; - UseNEONForSinglePrecisionFP = false; - UseMulOps = UseFusedMulOps; - SlowFPVMLx = false; - HasVMLxForwarding = false; - SlowFPBrcc = false; - InThumbMode = false; - UseSoftFloat = false; - HasThumb2 = false; - NoARM = false; - ReserveR9 = false; - NoMovt = false; - SupportsTailCall = false; - HasFP16 = false; - HasFullFP16 = false; - HasD16 = false; - HasHardwareDivide = false; - HasHardwareDivideInARM = false; - HasT2ExtractPack = false; - HasDataBarrier = false; - Pref32BitThumb = false; - AvoidCPSRPartialUpdate = false; - AvoidMOVsShifterOperand = false; - HasRAS = false; - HasMPExtension = false; - HasVirtualization = false; - FPOnlySP = false; - HasPerfMon = false; - HasTrustZone = false; - HasCrypto = false; - HasCRC = false; - HasZeroCycleZeroing = false; - StrictAlign = false; - HasDSP = false; - UseNaClTrap = false; - GenLongCalls = false; - UnsafeFPMath = false; - // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this // directly from it, but we can try to make sure they're consistent when both // available. - UseSjLjEH = isTargetDarwin() && !isTargetWatchOS(); + UseSjLjEH = isTargetDarwin() && !isTargetWatchABI(); assert((!TM.getMCAsmInfo() || (TM.getMCAsmInfo()->getExceptionHandlingType() == ExceptionHandling::SjLj) == UseSjLjEH) && @@ -230,7 +176,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // registers are the 4 used for parameters. We don't currently do this // case. - SupportsTailCall = !isThumb1Only(); + SupportsTailCall = !isThumb() || hasV8MBaselineOps(); if (isTargetMachO() && isTargetIOS() && getTargetTriple().isOSVersionLT(5, 0)) SupportsTailCall = false; @@ -252,6 +198,53 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters (Options.UnsafeFPMath || isTargetDarwin())) UseNEONForSinglePrecisionFP = true; + + // FIXME: Teach TableGen to deal with these instead of doing it manually here. + switch (ARMProcFamily) { + case Others: + case CortexA5: + break; + case CortexA7: + LdStMultipleTiming = DoubleIssue; + break; + case CortexA8: + LdStMultipleTiming = DoubleIssue; + break; + case CortexA9: + LdStMultipleTiming = DoubleIssueCheckUnalignedAccess; + PreISelOperandLatencyAdjustment = 1; + break; + case CortexA12: + break; + case CortexA15: + MaxInterleaveFactor = 2; + PreISelOperandLatencyAdjustment = 1; + PartialUpdateClearance = 12; + break; + case CortexA17: + case CortexA32: + case CortexA35: + case CortexA53: + case CortexA57: + case CortexA72: + case CortexA73: + case CortexR4: + case CortexR4F: + case CortexR5: + case CortexR7: + case CortexM3: + case ExynosM1: + break; + case Krait: + PreISelOperandLatencyAdjustment = 1; + break; + case Swift: + MaxInterleaveFactor = 2; + LdStMultipleTiming = SingleIssuePlusExtras; + PreISelOperandLatencyAdjustment = 1; + PartialUpdateClearance = 12; + break; + } } bool ARMSubtarget::isAPCS_ABI() const { @@ -268,40 +261,16 @@ bool ARMSubtarget::isAAPCS16_ABI() const { return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; } +bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { + if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) + return true; -/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. -bool -ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, - Reloc::Model RelocM) const { - if (RelocM == Reloc::Static) - return false; - - bool isDef = GV->isStrongDefinitionForLinker(); - - if (!isTargetMachO()) { - // Extra load is needed for all externally visible. - if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) - return false; + // 32 bit macho has no relocation for a-b if a is undefined, even if b is in + // the section that is being relocated. This means we have to use o load even + // for GVs that are known to be local to the dso. + if (isTargetDarwin() && TM.isPositionIndependent() && + (GV->isDeclarationForLinker() || GV->hasCommonLinkage())) return true; - } else { - // If this is a strong reference to a definition, it is definitely not - // through a stub. - if (isDef) - return false; - - // Unless we have a symbol with hidden visibility, we have to go through a - // normal $non_lazy_ptr stub because this symbol might be resolved late. - if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. - return true; - - if (RelocM == Reloc::PIC_) { - // If symbol visibility is hidden, we have a stub for common symbol - // references and external declarations. - if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) - // Hidden $non_lazy_ptr reference. - return true; - } - } return false; } @@ -332,21 +301,21 @@ bool ARMSubtarget::enablePostRAScheduler() const { } bool ARMSubtarget::enableAtomicExpand() const { - return hasAnyDataBarrier() && !isThumb1Only(); + return hasAnyDataBarrier() && (!isThumb() || hasV8MBaselineOps()); } bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const { // For general targets, the prologue can grow when VFPs are allocated with // stride 4 (more vpush instructions). But WatchOS uses a compact unwind // format which it's more important to get right. - return isTargetWatchOS() || (isSwift() && !MF.getFunction()->optForMinSize()); + return isTargetWatchABI() || (isSwift() && !MF.getFunction()->optForMinSize()); } bool ARMSubtarget::useMovt(const MachineFunction &MF) const { // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit // immediates as it is inherently position independent, and may be out of // range otherwise. - return !NoMovt && hasV6T2Ops() && + return !NoMovt && hasV8MBaselineOps() && (isTargetWindows() || !MF.getFunction()->optForMinSize()); } diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index 4d54e5751473..910de0e1e72d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -43,8 +43,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, - CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexA35, CortexA53, - CortexA57, CortexA72, Krait, Swift, ExynosM1 + CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexM3, + CortexA32, CortexA35, CortexA53, CortexA57, CortexA72, CortexA73, + Krait, Swift, ExynosM1 }; enum ARMProcClassEnum { None, AClass, RClass, MClass @@ -52,188 +53,275 @@ protected: enum ARMArchEnum { ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te, ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r, - ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a + ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline }; +public: + /// What kind of timing do load multiple/store multiple instructions have. + enum ARMLdStMultipleTiming { + /// Can load/store 2 registers/cycle. + DoubleIssue, + /// Can load/store 2 registers/cycle, but needs an extra cycle if the access + /// is not 64-bit aligned. + DoubleIssueCheckUnalignedAccess, + /// Can load/store 1 register/cycle. + SingleIssue, + /// Can load/store 1 register/cycle, but needs an extra cycle for address + /// computation and potentially also for register writeback. + SingleIssuePlusExtras, + }; + +protected: /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. - ARMProcFamilyEnum ARMProcFamily; + ARMProcFamilyEnum ARMProcFamily = Others; /// ARMProcClass - ARM processor class: None, AClass, RClass or MClass. - ARMProcClassEnum ARMProcClass; + ARMProcClassEnum ARMProcClass = None; /// ARMArch - ARM architecture - ARMArchEnum ARMArch; + ARMArchEnum ARMArch = ARMv4t; /// HasV4TOps, HasV5TOps, HasV5TEOps, /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops - /// Specify whether target support specific ARM ISA variants. - bool HasV4TOps; - bool HasV5TOps; - bool HasV5TEOps; - bool HasV6Ops; - bool HasV6MOps; - bool HasV6KOps; - bool HasV6T2Ops; - bool HasV7Ops; - bool HasV8Ops; - bool HasV8_1aOps; - bool HasV8_2aOps; + bool HasV4TOps = false; + bool HasV5TOps = false; + bool HasV5TEOps = false; + bool HasV6Ops = false; + bool HasV6MOps = false; + bool HasV6KOps = false; + bool HasV6T2Ops = false; + bool HasV7Ops = false; + bool HasV8Ops = false; + bool HasV8_1aOps = false; + bool HasV8_2aOps = false; + bool HasV8MBaselineOps = false; + bool HasV8MMainlineOps = false; /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what /// floating point ISAs are supported. - bool HasVFPv2; - bool HasVFPv3; - bool HasVFPv4; - bool HasFPARMv8; - bool HasNEON; + bool HasVFPv2 = false; + bool HasVFPv3 = false; + bool HasVFPv4 = false; + bool HasFPARMv8 = false; + bool HasNEON = false; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been /// specified. Use the method useNEONForSinglePrecisionFP() to /// determine if NEON should actually be used. - bool UseNEONForSinglePrecisionFP; + bool UseNEONForSinglePrecisionFP = false; /// UseMulOps - True if non-microcoded fused integer multiply-add and /// multiply-subtract instructions should be used. - bool UseMulOps; + bool UseMulOps = false; /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates /// whether the FP VML[AS] instructions are slow (if so, don't use them). - bool SlowFPVMLx; + bool SlowFPVMLx = false; /// HasVMLxForwarding - If true, NEON has special multiplier accumulator /// forwarding to allow mul + mla being issued back to back. - bool HasVMLxForwarding; + bool HasVMLxForwarding = false; /// SlowFPBrcc - True if floating point compare + branch is slow. - bool SlowFPBrcc; + bool SlowFPBrcc = false; /// InThumbMode - True if compiling for Thumb, false for ARM. - bool InThumbMode; + bool InThumbMode = false; /// UseSoftFloat - True if we're using software floating point features. - bool UseSoftFloat; + bool UseSoftFloat = false; /// HasThumb2 - True if Thumb2 instructions are supported. - bool HasThumb2; + bool HasThumb2 = false; /// NoARM - True if subtarget does not support ARM mode execution. - bool NoARM; + bool NoARM = false; /// ReserveR9 - True if R9 is not available as a general purpose register. - bool ReserveR9; + bool ReserveR9 = false; /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of /// 32-bit imms (including global addresses). - bool NoMovt; + bool NoMovt = false; /// SupportsTailCall - True if the OS supports tail call. The dynamic linker /// must be able to synthesize call stubs for interworking between ARM and /// Thumb. - bool SupportsTailCall; + bool SupportsTailCall = false; /// HasFP16 - True if subtarget supports half-precision FP conversions - bool HasFP16; + bool HasFP16 = false; /// HasFullFP16 - True if subtarget supports half-precision FP operations - bool HasFullFP16; + bool HasFullFP16 = false; /// HasD16 - True if subtarget is limited to 16 double precision /// FP registers for VFPv3. - bool HasD16; + bool HasD16 = false; /// HasHardwareDivide - True if subtarget supports [su]div - bool HasHardwareDivide; + bool HasHardwareDivide = false; /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode - bool HasHardwareDivideInARM; + bool HasHardwareDivideInARM = false; /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack /// instructions. - bool HasT2ExtractPack; + bool HasT2ExtractPack = false; /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier /// instructions. - bool HasDataBarrier; + bool HasDataBarrier = false; + + /// HasV7Clrex - True if the subtarget supports CLREX instructions + bool HasV7Clrex = false; + + /// HasAcquireRelease - True if the subtarget supports v8 atomics (LDA/LDAEX etc) + /// instructions + bool HasAcquireRelease = false; /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions /// over 16-bit ones. - bool Pref32BitThumb; + bool Pref32BitThumb = false; /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions /// that partially update CPSR and add false dependency on the previous /// CPSR setting instruction. - bool AvoidCPSRPartialUpdate; + bool AvoidCPSRPartialUpdate = false; /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting /// movs with shifter operand (i.e. asr, lsl, lsr). - bool AvoidMOVsShifterOperand; + bool AvoidMOVsShifterOperand = false; - /// HasRAS - Some processors perform return stack prediction. CodeGen should + /// HasRetAddrStack - Some processors perform return stack prediction. CodeGen should /// avoid issue "normal" call instructions to callees which do not return. - bool HasRAS; + bool HasRetAddrStack = false; /// HasMPExtension - True if the subtarget supports Multiprocessing /// extension (ARMv7 only). - bool HasMPExtension; + bool HasMPExtension = false; /// HasVirtualization - True if the subtarget supports the Virtualization /// extension. - bool HasVirtualization; + bool HasVirtualization = false; /// FPOnlySP - If true, the floating point unit only supports single /// precision. - bool FPOnlySP; + bool FPOnlySP = false; /// If true, the processor supports the Performance Monitor Extensions. These /// include a generic cycle-counter as well as more fine-grained (often /// implementation-specific) events. - bool HasPerfMon; + bool HasPerfMon = false; /// HasTrustZone - if true, processor supports TrustZone security extensions - bool HasTrustZone; + bool HasTrustZone = false; + + /// Has8MSecExt - if true, processor supports ARMv8-M Security Extensions + bool Has8MSecExt = false; /// HasCrypto - if true, processor supports Cryptography extensions - bool HasCrypto; + bool HasCrypto = false; /// HasCRC - if true, processor supports CRC instructions - bool HasCRC; + bool HasCRC = false; + + /// HasRAS - if true, the processor supports RAS extensions + bool HasRAS = false; /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are /// particularly effective at zeroing a VFP register. - bool HasZeroCycleZeroing; + bool HasZeroCycleZeroing = false; + + /// If true, if conversion may decide to leave some instructions unpredicated. + bool IsProfitableToUnpredicate = false; + + /// If true, VMOV will be favored over VGETLNi32. + bool HasSlowVGETLNi32 = false; + + /// If true, VMOV will be favored over VDUP. + bool HasSlowVDUP32 = false; + + /// If true, VMOVSR will be favored over VMOVDRR. + bool PreferVMOVSR = false; + + /// If true, ISHST barriers will be used for Release semantics. + bool PreferISHST = false; + + /// If true, a VLDM/VSTM starting with an odd register number is considered to + /// take more microops than single VLDRS/VSTRS. + bool SlowOddRegister = false; + + /// If true, loading into a D subregister will be penalized. + bool SlowLoadDSubregister = false; + + /// If true, the AGU and NEON/FPU units are multiplexed. + bool HasMuxedUnits = false; + + /// If true, VMOVS will never be widened to VMOVD + bool DontWidenVMOVS = false; + + /// If true, run the MLx expansion pass. + bool ExpandMLx = false; + + /// If true, VFP/NEON VMLA/VMLS have special RAW hazards. + bool HasVMLxHazards = false; + + /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. + bool UseNEONForFPMovs = false; + + /// If true, VLDn instructions take an extra cycle for unaligned accesses. + bool CheckVLDnAlign = false; + + /// If true, VFP instructions are not pipelined. + bool NonpipelinedVFP = false; /// StrictAlign - If true, the subtarget disallows unaligned memory /// accesses for some types. For details, see /// ARMTargetLowering::allowsMisalignedMemoryAccesses(). - bool StrictAlign; + bool StrictAlign = false; /// RestrictIT - If true, the subtarget disallows generation of deprecated IT /// blocks to conform to ARMv8 rule. - bool RestrictIT; + bool RestrictIT = false; /// HasDSP - If true, the subtarget supports the DSP (saturating arith /// and such) instructions. - bool HasDSP; + bool HasDSP = false; /// NaCl TRAP instruction is generated instead of the regular TRAP. - bool UseNaClTrap; + bool UseNaClTrap = false; /// Generate calls via indirect call instructions. - bool GenLongCalls; + bool GenLongCalls = false; /// Target machine allowed unsafe FP math (such as use of NEON fp) - bool UnsafeFPMath; + bool UnsafeFPMath = false; /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS). - bool UseSjLjEH; + bool UseSjLjEH = false; /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. - unsigned stackAlignment; + unsigned stackAlignment = 4; /// CPUString - String name of used CPU. std::string CPUString; + unsigned MaxInterleaveFactor = 1; + + /// Clearance before partial register updates (in number of instructions) + unsigned PartialUpdateClearance = 0; + + /// What kind of timing do load multiple/store multiple have (double issue, + /// single issue etc). + ARMLdStMultipleTiming LdStMultipleTiming = SingleIssue; + + /// The adjustment that we need to apply to get the operand latency from the + /// operand cycle returned by the itinerary data for pre-ISel operands. + int PreISelOperandLatencyAdjustment = 2; + /// IsLittle - The target is Little Endian bool IsLittle; @@ -313,17 +401,23 @@ public: bool hasV8Ops() const { return HasV8Ops; } bool hasV8_1aOps() const { return HasV8_1aOps; } bool hasV8_2aOps() const { return HasV8_2aOps; } + bool hasV8MBaselineOps() const { return HasV8MBaselineOps; } + bool hasV8MMainlineOps() const { return HasV8MMainlineOps; } + /// @{ + /// These functions are obsolete, please consider adding subtarget features + /// or properties instead of calling them. bool isCortexA5() const { return ARMProcFamily == CortexA5; } bool isCortexA7() const { return ARMProcFamily == CortexA7; } bool isCortexA8() const { return ARMProcFamily == CortexA8; } bool isCortexA9() const { return ARMProcFamily == CortexA9; } bool isCortexA15() const { return ARMProcFamily == CortexA15; } bool isSwift() const { return ARMProcFamily == Swift; } - bool isCortexM3() const { return CPUString == "cortex-m3"; } + bool isCortexM3() const { return ARMProcFamily == CortexM3; } bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); } bool isCortexR5() const { return ARMProcFamily == CortexR5; } bool isKrait() const { return ARMProcFamily == Krait; } + /// @} bool hasARMOps() const { return !NoARM; } @@ -334,6 +428,7 @@ public: bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } + bool hasRAS() const { return HasRAS; } bool hasVirtualization() const { return HasVirtualization; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; @@ -343,6 +438,8 @@ public: bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool hasDataBarrier() const { return HasDataBarrier; } + bool hasV7Clrex() const { return HasV7Clrex; } + bool hasAcquireRelease() const { return HasAcquireRelease; } bool hasAnyDataBarrier() const { return HasDataBarrier || (hasV6Ops() && !isThumb()); } @@ -353,11 +450,26 @@ public: bool isFPOnlySP() const { return FPOnlySP; } bool hasPerfMon() const { return HasPerfMon; } bool hasTrustZone() const { return HasTrustZone; } + bool has8MSecExt() const { return Has8MSecExt; } bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } + bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; } + bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; } + bool hasSlowVDUP32() const { return HasSlowVDUP32; } + bool preferVMOVSR() const { return PreferVMOVSR; } + bool preferISHSTBarriers() const { return PreferISHST; } + bool expandMLx() const { return ExpandMLx; } + bool hasVMLxHazards() const { return HasVMLxHazards; } + bool hasSlowOddRegister() const { return SlowOddRegister; } + bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; } + bool hasMuxedUnits() const { return HasMuxedUnits; } + bool dontWidenVMOVS() const { return DontWidenVMOVS; } + bool useNEONForFPMovs() const { return UseNEONForFPMovs; } + bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; } + bool nonpipelinedVFP() const { return NonpipelinedVFP; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } - bool hasRAS() const { return HasRAS; } + bool hasRetAddrStack() const { return HasRetAddrStack; } bool hasMPExtension() const { return HasMPExtension; } bool hasDSP() const { return HasDSP; } bool useNaClTrap() const { return UseNaClTrap; } @@ -373,6 +485,7 @@ public: bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetIOS() const { return TargetTriple.isiOS(); } bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); } + bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); } @@ -399,14 +512,21 @@ public: TargetTriple.getEnvironment() == Triple::GNUEABIHF) && !isTargetDarwin() && !isTargetWindows(); } + bool isTargetMuslAEABI() const { + return (TargetTriple.getEnvironment() == Triple::MuslEABI || + TargetTriple.getEnvironment() == Triple::MuslEABIHF) && + !isTargetDarwin() && !isTargetWindows(); + } // ARM Targets that support EHABI exception handling standard // Darwin uses SjLj. Other targets might need more checks. bool isTargetEHABICompatible() const { return (TargetTriple.getEnvironment() == Triple::EABI || TargetTriple.getEnvironment() == Triple::GNUEABI || + TargetTriple.getEnvironment() == Triple::MuslEABI || TargetTriple.getEnvironment() == Triple::EABIHF || TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::MuslEABIHF || isTargetAndroid()) && !isTargetDarwin() && !isTargetWindows(); } @@ -414,6 +534,7 @@ public: bool isTargetHardFloat() const { // FIXME: this is invalid for WindowsCE return TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::MuslEABIHF || TargetTriple.getEnvironment() == Triple::EABIHF || isTargetWindows() || isAAPCS16_ABI(); } @@ -436,6 +557,13 @@ public: return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; } + /// Returns true if the frame setup is split into two separate pushes (first + /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent + /// to lr. + bool splitFramePushPop() const { + return isTargetMachO(); + } + bool useStride4VFPs(const MachineFunction &MF) const; bool useMovt(const MachineFunction &MF) const; @@ -476,9 +604,20 @@ public: /// function for this subtarget. unsigned getStackAlignment() const { return stackAlignment; } - /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect - /// symbol. - bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; + unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } + + unsigned getPartialUpdateClearance() const { return PartialUpdateClearance; } + + ARMLdStMultipleTiming getLdStMultipleTiming() const { + return LdStMultipleTiming; + } + + int getPreISelOperandLatencyAdjustment() const { + return PreISelOperandLatencyAdjustment; + } + + /// True if the GV will be accessed via an indirect symbol. + bool isGVIndirectSymbol(const GlobalValue *GV) const; /// True if fast-isel is used. bool useFastISel() const; diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp index fca1901dc57c..dc730a675bef 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -16,6 +16,7 @@ #include "ARMTargetObjectFile.h" #include "ARMTargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/MC/MCAsmInfo.h" @@ -53,6 +54,10 @@ extern "C" void LLVMInitializeARMTarget() { RegisterTargetMachine<ARMBETargetMachine> Y(TheARMBETarget); RegisterTargetMachine<ThumbLETargetMachine> A(TheThumbLETarget); RegisterTargetMachine<ThumbBETargetMachine> B(TheThumbBETarget); + + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeARMLoadStoreOptPass(Registry); + initializeARMPreAllocLoadStoreOptPass(Registry); } static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { @@ -85,7 +90,7 @@ computeTargetABI(const Triple &TT, StringRef CPU, (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) || CPU.startswith("cortex-m")) { TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; - } else if (TT.isWatchOS()) { + } else if (TT.isWatchABI()) { TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16; } else { TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; @@ -99,6 +104,8 @@ computeTargetABI(const Triple &TT, StringRef CPU, case llvm::Triple::Android: case llvm::Triple::GNUEABI: case llvm::Triple::GNUEABIHF: + case llvm::Triple::MuslEABI: + case llvm::Triple::MuslEABIHF: case llvm::Triple::EABIHF: case llvm::Triple::EABI: TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; @@ -171,15 +178,30 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU, return Ret; } -/// TargetMachine ctor - Create an ARM architecture model. +static Reloc::Model getEffectiveRelocModel(const Triple &TT, + Optional<Reloc::Model> RM) { + if (!RM.hasValue()) + // Default relocation model on Darwin is PIC. + return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static; + + // DynamicNoPIC is only used on darwin. + if (*RM == Reloc::DynamicNoPIC && !TT.isOSDarwin()) + return Reloc::Static; + + return *RM; +} + +/// Create an ARM architecture model. /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle) : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT, - CPU, FS, Options, RM, CM, OL), + CPU, FS, Options, getEffectiveRelocModel(TT, RM), CM, + OL), TargetABI(computeTargetABI(TT, CPU, Options)), TLOF(createTLOF(getTargetTriple())), Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) { @@ -192,7 +214,8 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, // Default to triple-appropriate EABI if (Options.EABIVersion == EABI::Default || Options.EABIVersion == EABI::Unknown) { - if (Subtarget.isTargetGNUAEABI()) + // musl is compatible with glibc with regard to EABI version + if (Subtarget.isTargetGNUAEABI() || Subtarget.isTargetMuslAEABI()) this->Options.EABIVersion = EABI::GNU; else this->Options.EABIVersion = EABI::EABI5; @@ -219,7 +242,6 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { // it as a key for the subtarget since that can be the only difference // between two functions. bool SoftFloat = - F.hasFnAttribute("use-soft-float") && F.getFnAttribute("use-soft-float").getValueAsString() == "true"; // If the soft float attribute is set on the function turn on the soft float // subtarget feature. @@ -248,8 +270,9 @@ void ARMTargetMachine::anchor() {} ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, bool isLittle) + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL, + bool isLittle) : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) { initAsmInfo(); if (!Subtarget.hasARMOps()) @@ -262,7 +285,8 @@ void ARMLETargetMachine::anchor() {} ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} @@ -271,7 +295,8 @@ void ARMBETargetMachine::anchor() {} ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} @@ -280,7 +305,8 @@ void ThumbTargetMachine::anchor() {} ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle) : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) { initAsmInfo(); @@ -291,7 +317,8 @@ void ThumbLETargetMachine::anchor() {} ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} @@ -300,7 +327,8 @@ void ThumbBETargetMachine::anchor() {} ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h index 8ad1f3dc2c34..c6b70b953162 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -39,7 +39,7 @@ protected: public: ARMBaseTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle); ~ARMBaseTargetMachine() override; @@ -58,39 +58,40 @@ public: } }; -/// ARMTargetMachine - ARM target machine. +/// ARM target machine. /// class ARMTargetMachine : public ARMBaseTargetMachine { virtual void anchor(); public: ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, Reloc::Model RM, - CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle); + StringRef FS, const TargetOptions &Options, + Optional<Reloc::Model> RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool isLittle); }; -/// ARMLETargetMachine - ARM little endian target machine. +/// ARM little endian target machine. /// class ARMLETargetMachine : public ARMTargetMachine { void anchor() override; public: ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; -/// ARMBETargetMachine - ARM big endian target machine. +/// ARM big endian target machine. /// class ARMBETargetMachine : public ARMTargetMachine { void anchor() override; public: ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; -/// ThumbTargetMachine - Thumb target machine. +/// Thumb target machine. /// Due to the way architectures are handled, this represents both /// Thumb-1 and Thumb-2. /// @@ -99,29 +100,29 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { public: ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, - bool isLittle); + Optional<Reloc::Model> RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool isLittle); }; -/// ThumbLETargetMachine - Thumb little endian target machine. +/// Thumb little endian target machine. /// class ThumbLETargetMachine : public ThumbTargetMachine { void anchor() override; public: ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; -/// ThumbBETargetMachine - Thumb big endian target machine. +/// Thumb big endian target machine. /// class ThumbBETargetMachine : public ThumbTargetMachine { void anchor() override; public: ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h index 98e8763c4705..b1db201cb30d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h @@ -21,10 +21,10 @@ class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { protected: const MCSection *AttributesSection; public: - ARMElfTargetObjectFile() : - TargetLoweringObjectFileELF(), - AttributesSection(nullptr) - {} + ARMElfTargetObjectFile() + : TargetLoweringObjectFileELF(), AttributesSection(nullptr) { + PLTRelativeVariantKind = MCSymbolRefExpr::VK_ARM_PREL31; + } void Initialize(MCContext &Ctx, const TargetMachine &TM) override; diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index c1520119ef21..13c5dc61acd9 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -18,12 +18,12 @@ using namespace llvm; int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); - unsigned Bits = Ty->getPrimitiveSizeInBits(); - if (Bits == 0 || Bits > 32) - return 4; + unsigned Bits = Ty->getPrimitiveSizeInBits(); + if (Bits == 0 || Imm.getActiveBits() >= 64) + return 4; - int32_t SImmVal = Imm.getSExtValue(); - uint32_t ZImmVal = Imm.getZExtValue(); + int64_t SImmVal = Imm.getSExtValue(); + uint64_t ZImmVal = Imm.getZExtValue(); if (!ST->isThumb()) { if ((SImmVal >= 0 && SImmVal < 65536) || (ARM_AM::getSOImmVal(ZImmVal) != -1) || @@ -47,6 +47,32 @@ int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { return 3; } + +// Constants smaller than 256 fit in the immediate field of +// Thumb1 instructions so we return a zero cost and 1 otherwise. +int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) { + if (Imm.isNonNegative() && Imm.getLimitedValue() < 256) + return 0; + + return 1; +} + +int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) { + // Division by a constant can be turned into multiplication, but only if we + // know it's constant. So it's not so much that the immediate is cheap (it's + // not), but that the alternative is worse. + // FIXME: this is probably unneeded with GlobalISel. + if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv || + Opcode == Instruction::SRem || Opcode == Instruction::URem) && + Idx == 1) + return 0; + + return getIntImmCost(Imm, Ty); +} + + int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -244,10 +270,8 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index) { // Penalize inserting into an D-subregister. We end up with a three times // lower estimated throughput on swift. - if (ST->isSwift() && - Opcode == Instruction::InsertElement && - ValTy->isVectorTy() && - ValTy->getScalarSizeInBits() <= 32) + if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement && + ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32) return 3; if ((Opcode == Instruction::InsertElement || diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 7d8d2381c983..a0ca9e648002 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -54,12 +54,24 @@ public: bool enableInterleavedAccessVectorization() { return true; } + /// Floating-point computation using ARMv8 AArch32 Advanced + /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD + /// is IEEE-754 compliant, but it's not covered in this target. + bool isFPVectorizationPotentiallyUnsafe() { + return !ST->isTargetDarwin(); + } + /// \name Scalar TTI Implementations /// @{ + int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty); + using BaseT::getIntImmCost; int getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + /// @} /// \name Vector TTI Implementations @@ -88,10 +100,7 @@ public: } unsigned getMaxInterleaveFactor(unsigned VF) { - // These are out of order CPUs: - if (ST->isCortexA15() || ST->isSwift()) - return 2; - return 1; + return ST->getMaxInterleaveFactor(); } int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index c69a741244cf..7d49302f9a96 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -20,7 +20,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -31,20 +31,20 @@ #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCAsmParserUtils.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" -#include "llvm/Support/TargetParser.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -257,9 +257,15 @@ class ARMAsmParser : public MCTargetAsmParser { bool hasThumb() const { return getSTI().getFeatureBits()[ARM::HasV4TOps]; } + bool hasThumb2() const { + return getSTI().getFeatureBits()[ARM::FeatureThumb2]; + } bool hasV6Ops() const { return getSTI().getFeatureBits()[ARM::HasV6Ops]; } + bool hasV6T2Ops() const { + return getSTI().getFeatureBits()[ARM::HasV6T2Ops]; + } bool hasV6MOps() const { return getSTI().getFeatureBits()[ARM::HasV6MOps]; } @@ -269,6 +275,15 @@ class ARMAsmParser : public MCTargetAsmParser { bool hasV8Ops() const { return getSTI().getFeatureBits()[ARM::HasV8Ops]; } + bool hasV8MBaseline() const { + return getSTI().getFeatureBits()[ARM::HasV8MBaselineOps]; + } + bool hasV8MMainline() const { + return getSTI().getFeatureBits()[ARM::HasV8MMainlineOps]; + } + bool has8MSecExt() const { + return getSTI().getFeatureBits()[ARM::Feature8MSecExt]; + } bool hasARM() const { return !getSTI().getFeatureBits()[ARM::FeatureNoARM]; } @@ -281,12 +296,16 @@ class ARMAsmParser : public MCTargetAsmParser { bool hasV8_1aOps() const { return getSTI().getFeatureBits()[ARM::HasV8_1aOps]; } + bool hasRAS() const { + return getSTI().getFeatureBits()[ARM::FeatureRAS]; + } void SwitchMode() { MCSubtargetInfo &STI = copySTI(); uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); setAvailableFeatures(FB); } + void FixModeAfterArchChange(bool WasThumb, SMLoc Loc); bool isMClass() const { return getSTI().getFeatureBits()[ARM::FeatureMClass]; } @@ -417,8 +436,9 @@ class ARMOperand : public MCParsedAsmOperand { k_ShifterImmediate, k_RotateImmediate, k_ModifiedImmediate, + k_ConstantPoolImmediate, k_BitfieldDescriptor, - k_Token + k_Token, } Kind; SMLoc StartLoc, EndLoc, AlignmentLoc; @@ -611,6 +631,11 @@ public: return Imm.Val; } + const MCExpr *getConstantPoolImm() const { + assert(isConstantPoolImm() && "Invalid access!"); + return Imm.Val; + } + unsigned getVectorIndex() const { assert(Kind == k_VectorIndex && "Invalid access!"); return VectorIndex.Val; @@ -648,7 +673,27 @@ public: bool isCCOut() const { return Kind == k_CCOut; } bool isITMask() const { return Kind == k_ITCondMask; } bool isITCondCode() const { return Kind == k_CondCode; } - bool isImm() const override { return Kind == k_Immediate; } + bool isImm() const override { + return Kind == k_Immediate; + } + + bool isARMBranchTarget() const { + if (!isImm()) return false; + + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) + return CE->getValue() % 4 == 0; + return true; + } + + + bool isThumbBranchTarget() const { + if (!isImm()) return false; + + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) + return CE->getValue() % 2 == 0; + return true; + } + // checks whether this operand is an unsigned offset which fits is a field // of specified width and scaled by a specific number of bits template<unsigned width, unsigned scale> @@ -1036,6 +1081,7 @@ public: return ARM_AM::getSOImmVal(Value) == -1 && ARM_AM::getSOImmVal(-Value) != -1; } + bool isConstantPoolImm() const { return Kind == k_ConstantPoolImmediate; } bool isBitfield() const { return Kind == k_BitfieldDescriptor; } bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; } bool isPostIdxReg() const { @@ -1183,6 +1229,20 @@ public: return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) || Val == INT32_MIN; } + bool isAddrMode5FP16() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm() && !isa<MCConstantExpr>(getImm())) + return true; + if (!isMem() || Memory.Alignment != 0) return false; + // Check for register offset. + if (Memory.OffsetRegNum) return false; + // Immediate offset in range [-510, 510] and a multiple of 2. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return (Val >= -510 && Val <= 510 && ((Val & 1) == 0)) || Val == INT32_MIN; + } bool isMemTBB() const { if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0) @@ -1203,7 +1263,7 @@ public: } bool isT2MemRegOffset() const { if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || - Memory.Alignment != 0) + Memory.Alignment != 0 || Memory.BaseRegNum == ARM::PC) return false; // Only lsl #{0, 1, 2, 3} allowed. if (Memory.ShiftType == ARM_AM::no_shift) @@ -1319,6 +1379,7 @@ public: // If we have an immediate that's not a constant, treat it as a label // reference needing a fixup. If it is a constant, it's something else // and we reject it. + if (isImm() && !isa<MCConstantExpr>(getImm())) return true; @@ -1329,6 +1390,11 @@ public: int64_t Val = Memory.OffsetImm->getValue(); return (Val > -4096 && Val < 4096) || (Val == INT32_MIN); } + bool isConstPoolAsmImm() const { + // Delay processing of Constant Pool Immediate, this will turn into + // a constant. Match no other operand + return (isConstantPoolImm()); + } bool isPostIdxImm8() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -1665,7 +1731,7 @@ public: if (!CE) return false; uint64_t Value = CE->getValue(); // i64 value with each byte being either 0 or 0xff. - for (unsigned i = 0; i < 8; ++i) + for (unsigned i = 0; i < 8; ++i, Value >>= 8) if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff) return false; return true; } @@ -1680,6 +1746,16 @@ public: Inst.addOperand(MCOperand::createExpr(Expr)); } + void addARMBranchTargetOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addThumbBranchTargetOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + void addCondCodeOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createImm(unsigned(getCondCode()))); @@ -1941,6 +2017,7 @@ public: } const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val); + assert(SR && "Unknown value type!"); Inst.addOperand(MCOperand::createExpr(SR)); return; @@ -2145,6 +2222,28 @@ public: Inst.addOperand(MCOperand::createImm(Val)); } + void addAddrMode5FP16Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm()) { + Inst.addOperand(MCOperand::createExpr(getImm())); + Inst.addOperand(MCOperand::createImm(0)); + return; + } + + // The lower bit is always zero and as such is not encoded. + int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 2 : 0; + ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; + // Special case for #-0 + if (Val == INT32_MIN) Val = 0; + if (Val < 0) Val = -Val; + Val = ARM_AM::getAM5FP16Opc(AddSub, Val); + Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::createImm(Val)); + } + void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); // If we have an immediate that's not a constant, treat it as a label @@ -2214,6 +2313,14 @@ public: Inst.addOperand(MCOperand::createImm(Val)); } + void addConstPoolAsmImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // This is container for the immediate that we will create the constant + // pool from + addExpr(Inst, getConstantPoolImm()); + return; + } + void addMemTBBOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum)); @@ -2594,6 +2701,15 @@ public: } static std::unique_ptr<ARMOperand> + CreateConstantPoolImm(const MCExpr *Val, SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_ConstantPoolImmediate); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr<ARMOperand> CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) { auto Op = make_unique<ARMOperand>(k_BitfieldDescriptor); Op->Bitfield.LSB = LSB; @@ -2850,6 +2966,9 @@ void ARMOperand::print(raw_ostream &OS) const { OS << "<mod_imm #" << ModImm.Bits << ", #" << ModImm.Rot << ")>"; break; + case k_ConstantPoolImmediate: + OS << "<constant_pool_imm #" << *getConstantPoolImm(); + break; case k_BitfieldDescriptor: OS << "<bitfield " << "lsb: " << Bitfield.LSB << ", width: " << Bitfield.Width << ">"; @@ -3969,6 +4088,18 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { .Case("basepri_max", 0x812) .Case("faultmask", 0x813) .Case("control", 0x814) + .Case("msplim", 0x80a) + .Case("psplim", 0x80b) + .Case("msp_ns", 0x888) + .Case("psp_ns", 0x889) + .Case("msplim_ns", 0x88a) + .Case("psplim_ns", 0x88b) + .Case("primask_ns", 0x890) + .Case("basepri_ns", 0x891) + .Case("basepri_max_ns", 0x892) + .Case("faultmask_ns", 0x893) + .Case("control_ns", 0x894) + .Case("sp_ns", 0x898) .Default(~0U); if (FlagsVal == ~0U) @@ -3983,6 +4114,14 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { // basepri, basepri_max and faultmask only valid for V7m. return MatchOperand_NoMatch; + if (!has8MSecExt() && (FlagsVal == 0x80a || FlagsVal == 0x80b || + (FlagsVal > 0x814 && FlagsVal < 0xc00))) + return MatchOperand_NoMatch; + + if (!hasV8MMainline() && (FlagsVal == 0x88a || FlagsVal == 0x88b || + (FlagsVal > 0x890 && FlagsVal <= 0x893))) + return MatchOperand_NoMatch; + Parser.Lex(); // Eat identifier token. Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S)); return MatchOperand_Success; @@ -4673,14 +4812,14 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst, // classify tB as either t2B or t1B based on range of immediate operand case ARM::tB: { ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]); - if (!op.isSignedOffset<11, 1>() && isThumbTwo()) + if (!op.isSignedOffset<11, 1>() && isThumb() && hasV8MBaseline()) Inst.setOpcode(ARM::t2B); break; } // classify tBcc as either t2Bcc or t1Bcc based on range of immediate operand case ARM::tBcc: { ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]); - if (!op.isSignedOffset<8, 1>() && isThumbTwo()) + if (!op.isSignedOffset<8, 1>() && isThumb() && hasV8MBaseline()) Inst.setOpcode(ARM::t2Bcc); break; } @@ -4973,7 +5112,8 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) { // vmov.i{8|16|32|64} <dreg|qreg>, #imm ARMOperand &TyOp = static_cast<ARMOperand &>(*Operands[2]); bool isVmovf = TyOp.isToken() && - (TyOp.getToken() == ".f32" || TyOp.getToken() == ".f64"); + (TyOp.getToken() == ".f32" || TyOp.getToken() == ".f64" || + TyOp.getToken() == ".f16"); ARMOperand &Mnemonic = static_cast<ARMOperand &>(*Operands[0]); bool isFconst = Mnemonic.isToken() && (Mnemonic.getToken() == "fconstd" || Mnemonic.getToken() == "fconsts"); @@ -5144,16 +5284,12 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { S = Parser.getTok().getLoc(); if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val) return Error(S, "unexpected token in operand"); - Parser.Lex(); // Eat '=' const MCExpr *SubExprVal; if (getParser().parseExpression(SubExprVal)) return true; E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - - const MCExpr *CPLoc = - getTargetStreamer().addConstantPoolEntry(SubExprVal, S); - Operands.push_back(ARMOperand::CreateImm(CPLoc, S, E)); + Operands.push_back(ARMOperand::CreateConstantPoolImm(SubExprVal, S, E)); return false; } } @@ -5265,7 +5401,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" || - Mnemonic.startswith("vsel")) + Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" || + Mnemonic == "bxns" || Mnemonic == "blxns") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5311,6 +5448,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" || Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" || Mnemonic == "vfms" || Mnemonic == "vfnms" || Mnemonic == "fconsts" || + Mnemonic == "bxns" || Mnemonic == "blxns" || (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; @@ -5369,7 +5507,8 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" || Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || - (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) { + (FullInst.startswith("vmull") && FullInst.endswith(".p64")) || + Mnemonic == "vmovx" || Mnemonic == "vins") { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { @@ -6405,6 +6544,20 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, "immediate expression for mov requires :lower16: or :upper16"); break; } + case ARM::HINT: + case ARM::t2HINT: { + if (hasRAS()) { + // ESB is not predicable (pred must be AL) + unsigned Imm8 = Inst.getOperand(0).getImm(); + unsigned Pred = Inst.getOperand(1).getImm(); + if (Imm8 == 0x10 && Pred != ARMCC::AL) + return Error(Operands[1]->getStartLoc(), "instruction 'esb' is not " + "predicable, but condition " + "code specified"); + } + // Without the RAS extension, this behaves as any other unallocated hint. + break; + } } return false; @@ -6766,6 +6919,90 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, case ARM::t2LDRSHpcrel: Inst.setOpcode(ARM::t2LDRSHpci); return true; + case ARM::LDRConstPool: + case ARM::tLDRConstPool: + case ARM::t2LDRConstPool: { + // Pseudo instruction ldr rt, =immediate is converted to a + // MOV rt, immediate if immediate is known and representable + // otherwise we create a constant pool entry that we load from. + MCInst TmpInst; + if (Inst.getOpcode() == ARM::LDRConstPool) + TmpInst.setOpcode(ARM::LDRi12); + else if (Inst.getOpcode() == ARM::tLDRConstPool) + TmpInst.setOpcode(ARM::tLDRpci); + else if (Inst.getOpcode() == ARM::t2LDRConstPool) + TmpInst.setOpcode(ARM::t2LDRpci); + const ARMOperand &PoolOperand = + static_cast<ARMOperand &>(*Operands[3]); + const MCExpr *SubExprVal = PoolOperand.getConstantPoolImm(); + // If SubExprVal is a constant we may be able to use a MOV + if (isa<MCConstantExpr>(SubExprVal) && + Inst.getOperand(0).getReg() != ARM::PC && + Inst.getOperand(0).getReg() != ARM::SP) { + int64_t Value = + (int64_t) (cast<MCConstantExpr>(SubExprVal))->getValue(); + bool UseMov = true; + bool MovHasS = true; + if (Inst.getOpcode() == ARM::LDRConstPool) { + // ARM Constant + if (ARM_AM::getSOImmVal(Value) != -1) { + Value = ARM_AM::getSOImmVal(Value); + TmpInst.setOpcode(ARM::MOVi); + } + else if (ARM_AM::getSOImmVal(~Value) != -1) { + Value = ARM_AM::getSOImmVal(~Value); + TmpInst.setOpcode(ARM::MVNi); + } + else if (hasV6T2Ops() && + Value >=0 && Value < 65536) { + TmpInst.setOpcode(ARM::MOVi16); + MovHasS = false; + } + else + UseMov = false; + } + else { + // Thumb/Thumb2 Constant + if (hasThumb2() && + ARM_AM::getT2SOImmVal(Value) != -1) + TmpInst.setOpcode(ARM::t2MOVi); + else if (hasThumb2() && + ARM_AM::getT2SOImmVal(~Value) != -1) { + TmpInst.setOpcode(ARM::t2MVNi); + Value = ~Value; + } + else if (hasV8MBaseline() && + Value >=0 && Value < 65536) { + TmpInst.setOpcode(ARM::t2MOVi16); + MovHasS = false; + } + else + UseMov = false; + } + if (UseMov) { + TmpInst.addOperand(Inst.getOperand(0)); // Rt + TmpInst.addOperand(MCOperand::createImm(Value)); // Immediate + TmpInst.addOperand(Inst.getOperand(2)); // CondCode + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + if (MovHasS) + TmpInst.addOperand(MCOperand::createReg(0)); // S + Inst = TmpInst; + return true; + } + } + // No opportunity to use MOV/MVN create constant pool + const MCExpr *CPLoc = + getTargetStreamer().addConstantPoolEntry(SubExprVal, + PoolOperand.getStartLoc()); + TmpInst.addOperand(Inst.getOperand(0)); // Rt + TmpInst.addOperand(MCOperand::createExpr(CPLoc)); // offset to constpool + if (TmpInst.getOpcode() == ARM::LDRi12) + TmpInst.addOperand(MCOperand::createImm(0)); // unused offset + TmpInst.addOperand(Inst.getOperand(2)); // CondCode + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + Inst = TmpInst; + return true; + } // Handle NEON VST complex aliases. case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_16: @@ -9031,6 +9268,31 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { return false; } +// After changing arch/CPU, try to put the ARM/Thumb mode back to what it was +// before, if supported by the new target, or emit mapping symbols for the mode +// switch. +void ARMAsmParser::FixModeAfterArchChange(bool WasThumb, SMLoc Loc) { + if (WasThumb != isThumb()) { + if (WasThumb && hasThumb()) { + // Stay in Thumb mode + SwitchMode(); + } else if (!WasThumb && hasARM()) { + // Stay in ARM mode + SwitchMode(); + } else { + // Mode switch forced, because the new arch doesn't support the old mode. + getParser().getStreamer().EmitAssemblerFlag(isThumb() ? MCAF_Code16 + : MCAF_Code32); + // Warn about the implcit mode switch. GAS does not switch modes here, + // but instead stays in the old mode, reporting an error on any following + // instructions as the mode does not exist on the target. + Warning(Loc, Twine("new target does not support ") + + (WasThumb ? "thumb" : "arm") + " mode, switching to " + + (!WasThumb ? "thumb" : "arm") + " mode"); + } + } +} + /// parseDirectiveArch /// ::= .arch token bool ARMAsmParser::parseDirectiveArch(SMLoc L) { @@ -9043,10 +9305,12 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) { return false; } + bool WasThumb = isThumb(); Triple T; MCSubtargetInfo &STI = copySTI(); STI.setDefaultFeatures("", ("+" + ARM::getArchName(ID)).str()); setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + FixModeAfterArchChange(WasThumb, L); getTargetStreamer().emitArch(ID); return false; @@ -9177,9 +9441,11 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { return false; } + bool WasThumb = isThumb(); MCSubtargetInfo &STI = copySTI(); STI.setDefaultFeatures(CPU, ""); setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + FixModeAfterArchChange(WasThumb, L); return false; } @@ -9834,7 +10100,7 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) { StringRef Arch = Parser.getTok().getString(); SMLoc ArchLoc = Parser.getTok().getLoc(); - getLexer().Lex(); + Lex(); unsigned ID = ARM::parseArch(Arch); @@ -9863,7 +10129,9 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) { return true; // '.align' is target specifically handled to mean 2**2 byte alignment. - if (getStreamer().getCurrentSection().first->UseCodeAlign()) + const MCSection *Section = getStreamer().getCurrentSection().first; + assert(Section && "must have section to emit alignment"); + if (Section->UseCodeAlign()) getStreamer().EmitCodeAlignment(4, 0); else getStreamer().EmitValueToAlignment(4, 0, 1, 0); @@ -9933,6 +10201,7 @@ static const struct { // FIXME: Only available in A-class, isel not predicated { ARM::AEK_VIRT, Feature_HasV7, {ARM::FeatureVirtualization} }, { ARM::AEK_FP16, Feature_HasV8_2a, {ARM::FeatureFPARMv8, ARM::FeatureFullFP16} }, + { ARM::AEK_RAS, Feature_HasV8, {ARM::FeatureRAS} }, // FIXME: Unsupported extensions. { ARM::AEK_OS, Feature_None, {} }, { ARM::AEK_IWMMXT, Feature_None, {} }, @@ -9954,7 +10223,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { StringRef Name = Parser.getTok().getString(); SMLoc ExtLoc = Parser.getTok().getLoc(); - getLexer().Lex(); + Lex(); bool EnableFeature = true; if (Name.startswith_lower("no")) { diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index e63defed2288..3196a57ccc3e 100644 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCExpr.h" @@ -210,6 +210,8 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn, @@ -222,6 +224,8 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, @@ -391,8 +395,8 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); +static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); #include "ARMGenDisassemblerTables.inc" static MCDisassembler *createARMDisassembler(const Target &T, @@ -590,6 +594,8 @@ MCDisassembler::DecodeStatus ThumbDisassembler::AddThumbPredicate(MCInst &MI) const { MCDisassembler::DecodeStatus S = Success; + const FeatureBitset &FeatureBits = getSubtargetInfo().getFeatureBits(); + // A few instructions actually have predicates encoded in them. Don't // try to overwrite it if we're seeing one of those. switch (MI.getOpcode()) { @@ -610,6 +616,10 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const { else return Success; break; + case ARM::t2HINT: + if (MI.getOperand(0).getImm() == 0x10 && (FeatureBits[ARM::FeatureRAS]) != 0) + S = SoftFail; + break; case ARM::tB: case ARM::t2B: case ARM::t2TBB: @@ -1941,6 +1951,29 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, return S; } +// Check for UNPREDICTABLE predicated ESB instruction +static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned pred = fieldFromInstruction(Insn, 28, 4); + unsigned imm8 = fieldFromInstruction(Insn, 0, 8); + const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); + const FeatureBitset &FeatureBits = Dis->getSubtargetInfo().getFeatureBits(); + + DecodeStatus S = MCDisassembler::Success; + + Inst.addOperand(MCOperand::createImm(imm8)); + + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + // ESB is unpredictable if pred != AL. Without the RAS extension, it is a NOP, + // so all predicates should be allowed. + if (imm8 == 0x10 && pred != 0xe && ((FeatureBits[ARM::FeatureRAS]) != 0)) + S = MCDisassembler::SoftFail; + + return S; +} + static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned imod = fieldFromInstruction(Insn, 18, 2); @@ -2183,6 +2216,7 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 9, 4); + // U == 1 to add imm, 0 to subtract it. unsigned U = fieldFromInstruction(Val, 8, 1); unsigned imm = fieldFromInstruction(Val, 0, 8); @@ -2197,6 +2231,26 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, return S; } +static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction(Val, 9, 4); + // U == 1 to add imm, 0 to subtract it. + unsigned U = fieldFromInstruction(Val, 8, 1); + unsigned imm = fieldFromInstruction(Val, 0, 8); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + if (U) + Inst.addOperand(MCOperand::createImm(ARM_AM::getAM5FP16Opc(ARM_AM::add, imm))); + else + Inst.addOperand(MCOperand::createImm(ARM_AM::getAM5FP16Opc(ARM_AM::sub, imm))); + + return S; +} + static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { return DecodeGPRRegisterClass(Inst, Val, Address, Decoder); @@ -4096,6 +4150,24 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, // Values basepri, basepri_max and faultmask are only valid for v7m. return MCDisassembler::Fail; break; + case 0x8a: // msplim_ns + case 0x8b: // psplim_ns + case 0x91: // basepri_ns + case 0x92: // basepri_max_ns + case 0x93: // faultmask_ns + if (!(FeatureBits[ARM::HasV8MMainlineOps])) + return MCDisassembler::Fail; + // fall through + case 10: // msplim + case 11: // psplim + case 0x88: // msp_ns + case 0x89: // psp_ns + case 0x90: // primask_ns + case 0x94: // control_ns + case 0x98: // sp_ns + if (!(FeatureBits[ARM::Feature8MSecExt])) + return MCDisassembler::Fail; + break; default: return MCDisassembler::Fail; } @@ -5193,8 +5265,8 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -5210,12 +5282,30 @@ static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, if (Rt == Rt2) S = MCDisassembler::SoftFail; + // We have to check if the instruction is MRRC2 + // or MCRR2 when constructing the operands for + // Inst. Reason is because MRRC2 stores to two + // registers so it's tablegen desc has has two + // outputs whereas MCRR doesn't store to any + // registers so all of it's operands are listed + // as inputs, therefore the operand order for + // MRRC2 needs to be [Rt, Rt2, cop, opc1, CRm] + // and MCRR2 operand order is [cop, opc1, Rt, Rt2, CRm] + + if (Inst.getOpcode() == ARM::MRRC2) { + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + } Inst.addOperand(MCOperand::createImm(cop)); Inst.addOperand(MCOperand::createImm(opc1)); - if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder))) - return MCDisassembler::Fail; + if (Inst.getOpcode() == ARM::MCRR2) { + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + } Inst.addOperand(MCOperand::createImm(CRm)); return S; diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 33fc85af9b19..e81bb77dbdfc 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -25,6 +25,7 @@ using namespace llvm; #define DEBUG_TYPE "asm-printer" +#define PRINT_ALIAS_INSTR #include "ARMGenAsmWriter.inc" /// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing. @@ -73,43 +74,6 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, switch (Opcode) { - // Check for HINT instructions w/ canonical names. - case ARM::HINT: - case ARM::tHINT: - case ARM::t2HINT: - switch (MI->getOperand(0).getImm()) { - case 0: - O << "\tnop"; - break; - case 1: - O << "\tyield"; - break; - case 2: - O << "\twfe"; - break; - case 3: - O << "\twfi"; - break; - case 4: - O << "\tsev"; - break; - case 5: - if (STI.getFeatureBits()[ARM::HasV8Ops]) { - O << "\tsevl"; - break; - } // Fallthrough for non-v8 - default: - // Anything else should just print normally. - printInstruction(MI, STI, O); - printAnnotation(O, Annot); - return; - } - printPredicateOperand(MI, 1, STI, O); - if (Opcode == ARM::t2HINT) - O << ".w"; - printAnnotation(O, Annot); - return; - // Check for MOVs and print canonical forms, instead. case ARM::MOVsr: { // FIXME: Thumb variants? @@ -297,23 +261,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, } break; } - // B9.3.3 ERET (Thumb) - // For a target that has Virtualization Extensions, ERET is the preferred - // disassembly of SUBS PC, LR, #0 - case ARM::t2SUBS_PC_LR: { - if (MI->getNumOperands() == 3 && MI->getOperand(0).isImm() && - MI->getOperand(0).getImm() == 0 && - STI.getFeatureBits()[ARM::FeatureVirtualization]) { - O << "\teret"; - printPredicateOperand(MI, 1, STI, O); - printAnnotation(O, Annot); - return; - } - break; - } } - printInstruction(MI, STI, O); + if (!printAliasInstr(MI, STI, O)) + printInstruction(MI, STI, O); + printAnnotation(O, Annot); } @@ -645,6 +597,34 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, O << "]" << markup(">"); } +template <bool AlwaysPrintImm0> +void ARMInstPrinter::printAddrMode5FP16Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, OpNum, STI, O); + return; + } + + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + + unsigned ImmOffs = ARM_AM::getAM5FP16Offset(MO2.getImm()); + unsigned Op = ARM_AM::getAM5FP16Op(MO2.getImm()); + if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) { + O << ", " + << markup("<imm:") + << "#" + << ARM_AM::getAddrOpcStr(ARM_AM::getAM5FP16Op(MO2.getImm())) + << ImmOffs * 2 + << markup(">"); + } + O << "]" << markup(">"); +} + void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -901,6 +881,42 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, case 20: O << "control"; return; + case 10: + O << "msplim"; + return; + case 11: + O << "psplim"; + return; + case 0x88: + O << "msp_ns"; + return; + case 0x89: + O << "psp_ns"; + return; + case 0x8a: + O << "msplim_ns"; + return; + case 0x8b: + O << "psplim_ns"; + return; + case 0x90: + O << "primask_ns"; + return; + case 0x91: + O << "basepri_ns"; + return; + case 0x92: + O << "basepri_max_ns"; + return; + case 0x93: + O << "faultmask_ns"; + return; + case 0x94: + O << "control_ns"; + return; + case 0x98: + O << "sp_ns"; + return; } } diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 52f7115f0558..9d80eed84dc2 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -30,6 +30,12 @@ public: // Autogenerated by tblgen. void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, raw_ostream &O); + virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); + virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, + unsigned PrintMethodIdx, + const MCSubtargetInfo &STI, + raw_ostream &O); static const char *getRegisterName(unsigned RegNo); void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, @@ -71,6 +77,9 @@ public: template <bool AlwaysPrintImm0> void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + template <bool AlwaysPrintImm0> + void printAddrMode5FP16Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index b03cada9a641..3959eab966a8 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -486,7 +486,7 @@ namespace ARM_AM { // addrmode5 := reg +/- imm8*4 // // The first operand is always a Reg. The second operand encodes the - // operation in bit 8 and the immediate in bits 0-7. + // operation (add or subtract) in bit 8 and the immediate in bits 0-7. /// getAM5Opc - This function encodes the addrmode5 opc field. static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) { @@ -501,6 +501,29 @@ namespace ARM_AM { } //===--------------------------------------------------------------------===// + // Addressing Mode #5 FP16 + //===--------------------------------------------------------------------===// + // + // This is used for coprocessor instructions, such as 16-bit FP load/stores. + // + // addrmode5fp16 := reg +/- imm8*2 + // + // The first operand is always a Reg. The second operand encodes the + // operation (add or subtract) in bit 8 and the immediate in bits 0-7. + + /// getAM5FP16Opc - This function encodes the addrmode5fp16 opc field. + static inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) { + bool isSub = Opc == sub; + return ((int)isSub << 8) | Offset; + } + static inline unsigned char getAM5FP16Offset(unsigned AM5Opc) { + return AM5Opc & 0xFF; + } + static inline AddrOpc getAM5FP16Op(unsigned AM5Opc) { + return ((AM5Opc >> 8) & 1) ? sub : add; + } + + //===--------------------------------------------------------------------===// // Addressing Mode #6 //===--------------------------------------------------------------------===// // @@ -650,6 +673,32 @@ namespace ARM_AM { return FPUnion.F; } + /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit + /// floating-point value. If the value cannot be represented as an 8-bit + /// floating-point value, then return -1. + static inline int getFP16Imm(const APInt &Imm) { + uint32_t Sign = Imm.lshr(15).getZExtValue() & 1; + int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15 + int64_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits + + // We can handle 4 bits of mantissa. + // mantissa = (16+UInt(e:f:g:h))/16. + if (Mantissa & 0x3f) + return -1; + Mantissa >>= 6; + + // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 + if (Exp < -3 || Exp > 4) + return -1; + Exp = ((Exp+3) & 0x7) ^ 4; + + return ((int)Sign << 7) | (Exp << 4) | Mantissa; + } + + static inline int getFP16Imm(const APFloat &FPImm) { + return getFP16Imm(FPImm.bitcastToAPInt()); + } + /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit /// floating-point value. If the value cannot be represented as an 8-bit /// floating-point value, then return -1. diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index fa52c9354c17..0fc758201d47 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -46,6 +46,7 @@ public: : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM, /*HasRelocationAddend*/ false) {} }; +} // end anonymous namespace const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = { @@ -62,6 +63,10 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_pcrel_9", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_pcrel_9", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, {"fixup_thumb_adr_pcrel_10", 0, 8, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, @@ -78,7 +83,9 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_blx", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, {"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel | @@ -90,6 +97,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_arm_movw_lo16", 0, 20, 0}, {"fixup_t2_movt_hi16", 0, 20, 0}, {"fixup_t2_movw_lo16", 0, 20, 0}, + {"fixup_arm_mod_imm", 0, 12, 0}, }; const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = { // This table *must* be in the order that the fixup_* kinds are defined in @@ -105,6 +113,10 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_pcrel_9", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_pcrel_9", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, {"fixup_thumb_adr_pcrel_10", 8, 8, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, @@ -121,7 +133,9 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_arm_condbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_arm_blx", 8, 24, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_blx", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, {"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_arm_thumb_cp", 8, 8, MCFixupKindInfo::FKF_IsPCRel | @@ -133,6 +147,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_arm_movw_lo16", 12, 20, 0}, {"fixup_t2_movt_hi16", 12, 20, 0}, {"fixup_t2_movw_lo16", 12, 20, 0}, + {"fixup_arm_mod_imm", 20, 12, 0}, }; if (Kind < FirstTargetFixupKind) @@ -155,10 +170,10 @@ void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) { break; } } -} // end anonymous namespace unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const { bool HasThumb2 = STI->getFeatureBits()[ARM::FeatureThumb2]; + bool HasV8MBaselineOps = STI->getFeatureBits()[ARM::HasV8MBaselineOps]; switch (Op) { default: @@ -170,7 +185,7 @@ unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const { case ARM::tADR: return HasThumb2 ? (unsigned)ARM::t2ADR : Op; case ARM::tB: - return HasThumb2 ? (unsigned)ARM::t2B : Op; + return HasV8MBaselineOps ? (unsigned)ARM::t2B : Op; case ARM::tCBZ: return ARM::tHINT; case ARM::tCBNZ: @@ -243,7 +258,9 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, return reasonForFixupRelaxation(Fixup, Value); } -void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { +void ARMAsmBackend::relaxInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI, + MCInst &Res) const { unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode()); // Sanity check w/ diagnostic if we get here w/ a bogus instruction. @@ -449,7 +466,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // Offset by 8 just as above. if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Fixup.getValue())) - if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL) + if (SRE->getKind() == MCSymbolRefExpr::VK_TLSCALL) return 0; return 0xffffff & ((Value - 8) >> 2); case ARM::fixup_t2_uncondbranch: { @@ -524,10 +541,15 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // // Note that the halfwords are stored high first, low second; so we need // to transpose the fixup value here to map properly. - uint32_t offset = (Value - 2) >> 2; + if (Ctx && Value % 4 != 0) { + Ctx->reportError(Fixup.getLoc(), "misaligned ARM call destination"); + return 0; + } + + uint32_t offset = (Value - 4) >> 2; if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Fixup.getValue())) - if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL) + if (SRE->getKind() == MCSymbolRefExpr::VK_TLSCALL) offset = 0; uint32_t signBit = (offset & 0x400000) >> 22; uint32_t I1Bit = (offset & 0x200000) >> 21; @@ -563,7 +585,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } case ARM::fixup_arm_thumb_br: // Offset by 4 and don't encode the lower bit, which is always 0. - if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) { + if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] && + !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) { const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value); if (FixupDiagnostic) { Ctx->reportError(Fixup.getLoc(), FixupDiagnostic); @@ -624,6 +647,44 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return Value; } + case ARM::fixup_arm_pcrel_9: + Value = Value - 4; // ARM fixups offset by an additional word and don't + // need to adjust for the half-word ordering. + // Fall through. + case ARM::fixup_t2_pcrel_9: { + // Offset by 4, adjusted by two due to the half-word ordering of thumb. + Value = Value - 4; + bool isAdd = true; + if ((int64_t)Value < 0) { + Value = -Value; + isAdd = false; + } + // These values don't encode the low bit since it's always zero. + if (Ctx && (Value & 1)) { + Ctx->reportError(Fixup.getLoc(), "invalid value for this fixup"); + return 0; + } + Value >>= 1; + if (Ctx && Value >= 256) { + Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + return 0; + } + Value |= isAdd << 23; + + // Same addressing mode as fixup_arm_pcrel_9, but with 16-bit halfwords + // swapped. + if (Kind == ARM::fixup_t2_pcrel_9) + return swapHalfWords(Value, IsLittleEndian); + + return Value; + } + case ARM::fixup_arm_mod_imm: + Value = ARM_AM::getSOImmVal(Value); + if (Ctx && Value >> 12) { + Ctx->reportError(Fixup.getLoc(), "out of range immediate fixup value"); + return 0; + } + return Value; } } @@ -690,11 +751,13 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case FK_Data_2: case ARM::fixup_arm_thumb_br: case ARM::fixup_arm_thumb_cb: + case ARM::fixup_arm_mod_imm: return 2; case ARM::fixup_arm_pcrel_10_unscaled: case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: + case ARM::fixup_arm_pcrel_9: case ARM::fixup_arm_adr_pcrel_12: case ARM::fixup_arm_uncondbl: case ARM::fixup_arm_condbl: @@ -708,6 +771,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_t2_condbranch: case ARM::fixup_t2_uncondbranch: case ARM::fixup_t2_pcrel_10: + case ARM::fixup_t2_pcrel_9: case ARM::fixup_t2_adr_pcrel_12: case ARM::fixup_arm_thumb_bl: case ARM::fixup_arm_thumb_blx: @@ -766,6 +830,7 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) { case ARM::fixup_arm_movw_lo16: case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movw_lo16: + case ARM::fixup_arm_mod_imm: // Instruction size is 4 bytes. return 4; } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index 28a62132a419..84caaacc47d3 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -11,12 +11,12 @@ #define LLVM_LIB_TARGET_ARM_ARMASMBACKEND_H #include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -namespace { +namespace llvm { class ARMAsmBackend : public MCAsmBackend { const MCSubtargetInfo *STI; @@ -63,7 +63,8 @@ public: const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override; - void relaxInstruction(const MCInst &Inst, MCInst &Res) const override; + void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + MCInst &Res) const override; bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; @@ -74,6 +75,6 @@ public: void setIsThumb(bool it) { isThumbMode = it; } bool isLittle() const { return IsLittleEndian; } }; -} // end anonymous namespace +} // end namespace llvm #endif diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h index 995dd0fe08ee..09dc0173ade6 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -10,11 +10,10 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H #define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H +#include "ARMAsmBackend.h" #include "llvm/Support/MachO.h" -using namespace llvm; - -namespace { +namespace llvm { class ARMAsmBackendDarwin : public ARMAsmBackend { const MCRegisterInfo &MRI; public: @@ -22,7 +21,6 @@ public: ARMAsmBackendDarwin(const Target &T, const Triple &TT, const MCRegisterInfo &MRI, MachO::CPUSubTypeARM st) : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), MRI(MRI), Subtype(st) { - HasDataInCodeSupport = true; } MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { @@ -33,6 +31,6 @@ public: uint32_t generateCompactUnwindEncoding( ArrayRef<MCCFIInstruction> Instrs) const override; }; -} +} // end namespace llvm #endif diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h index 68b12edd089e..748f915be17b 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h @@ -10,7 +10,10 @@ #ifndef LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H #define LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H +#include "ARMAsmBackend.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" using namespace llvm; + namespace { class ARMAsmBackendELF : public ARMAsmBackend { public: diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h index 170f59a4c905..2a375be49a83 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H #define LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H +#include "ARMAsmBackend.h" using namespace llvm; namespace { diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 4289a73e9d6b..088b4205ed62 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -289,18 +289,20 @@ namespace ARMII { /// higher 16 bit of the address. Used only via movt instruction. MO_HI16 = 0x2, - /// MO_PLT - On a symbol operand, this represents an ELF PLT reference on a - /// call operand. - MO_PLT = 0x3, - /// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects /// just that part of the flag set. - MO_OPTION_MASK = 0x3f, + MO_OPTION_MASK = 0x1f, /// MO_DLLIMPORT - On a symbol operand, this represents that the reference /// to the symbol is for an import stub. This is used for DLL import /// storage class indication on Windows. - MO_DLLIMPORT = 0x40, + MO_DLLIMPORT = 0x20, + + /// MO_SECREL - On a symbol operand this indicates that the immediate is + /// the offset from beginning of section. + /// + /// This is the TLS offset for the COFF/Windows TLS mechanism. + MO_SECREL = 0x40, /// MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it /// represents a symbol which, if indirect, will get special Darwin mangling diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 52eba8be288f..4118fe8e8cdb 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -34,8 +34,8 @@ namespace { ~ARMELFObjectWriter() override; - unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel) const override; + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsPCRel) const override; bool needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const override; @@ -67,7 +67,7 @@ bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, // Need to examine the Fixup when determining whether to // emit the relocation as an explicit symbol or as a section relative // offset -unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target, +unsigned ARMELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { return GetRelocTypeInner(Target, Fixup, IsPCRel); @@ -98,6 +98,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_ARM_GOT_PREL: Type = ELF::R_ARM_GOT_PREL; break; + case MCSymbolRefExpr::VK_ARM_PREL31: + Type = ELF::R_ARM_PREL31; + break; } break; case ARM::fixup_arm_blx: @@ -106,7 +109,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PLT: Type = ELF::R_ARM_CALL; break; - case MCSymbolRefExpr::VK_ARM_TLSCALL: + case MCSymbolRefExpr::VK_TLSCALL: Type = ELF::R_ARM_TLS_CALL; break; default: @@ -120,6 +123,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, Type = ELF::R_ARM_JUMP24; break; case ARM::fixup_t2_condbranch: + Type = ELF::R_ARM_THM_JUMP19; + break; case ARM::fixup_t2_uncondbranch: Type = ELF::R_ARM_THM_JUMP24; break; @@ -138,7 +143,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case ARM::fixup_arm_thumb_bl: case ARM::fixup_arm_thumb_blx: switch (Modifier) { - case MCSymbolRefExpr::VK_ARM_TLSCALL: + case MCSymbolRefExpr::VK_TLSCALL: Type = ELF::R_ARM_THM_TLS_CALL; break; default: @@ -210,10 +215,10 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_ARM_TLSLDO: Type = ELF::R_ARM_TLS_LDO32; break; - case MCSymbolRefExpr::VK_ARM_TLSCALL: + case MCSymbolRefExpr::VK_TLSCALL: Type = ELF::R_ARM_TLS_CALL; break; - case MCSymbolRefExpr::VK_ARM_TLSDESC: + case MCSymbolRefExpr::VK_TLSDESC: Type = ELF::R_ARM_TLS_GOTDESC; break; case MCSymbolRefExpr::VK_ARM_TLSDESCSEQ: diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 57577dc834b7..36cb74765f3b 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -763,6 +763,12 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { setAttributeItem(Virtualization_use, AllowTZVirtualization, false); break; + case ARM::AK_ARMV8MBaseline: + case ARM::AK_ARMV8MMainline: + setAttributeItem(THUMB_ISA_use, AllowThumbDerived, false); + setAttributeItem(CPU_arch_profile, MicroControllerProfile, false); + break; + case ARM::AK_IWMMXT: setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, Allowed, false); diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 46ba57170db5..3fe2302bdd37 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -33,6 +33,13 @@ enum Fixups { // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for // the short-swapped encoding of Thumb2 instructions. fixup_t2_pcrel_10, + // fixup_arm_pcrel_9 - 9-bit PC relative relocation for symbol addresses + // used in VFP instructions where bit 0 not encoded (so it's encoded as an + // 8-bit immediate). + fixup_arm_pcrel_9, + // fixup_t2_pcrel_9 - Equivalent to fixup_arm_pcrel_9, accounting for + // the short-swapped encoding of Thumb2 instructions. + fixup_t2_pcrel_9, // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol // addresses where the lower 2 bits are not encoded (so it's encoded as an // 8-bit immediate). @@ -100,6 +107,9 @@ enum Fixups { fixup_t2_movt_hi16, // :upper16: fixup_t2_movw_lo16, // :lower16: + // fixup_arm_mod_imm - Fixup for mod_imm + fixup_arm_mod_imm, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index bda37f6616a8..53cd29a6061e 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -13,7 +13,6 @@ #include "ARMMCAsmInfo.h" #include "llvm/ADT/Triple.h" -#include "llvm/Support/CommandLine.h" using namespace llvm; @@ -33,7 +32,7 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) { SupportsDebugInformation = true; // Exceptions handling - ExceptionsType = TheTriple.isOSDarwin() && !TheTriple.isWatchOS() + ExceptionsType = (TheTriple.isOSDarwin() && !TheTriple.isWatchABI()) ? ExceptionHandling::SjLj : ExceptionHandling::DwarfCFI; diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index b88578309f08..9fca13eeea93 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -120,11 +120,11 @@ public: SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; - /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit + /// getThumbBranchTargetOpValue - Return encoding info for 24-bit /// immediate Thumb2 direct branch target. - uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; + uint32_t getThumbBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate /// branch target. @@ -214,11 +214,6 @@ public: llvm_unreachable("Invalid ShiftOpc!"); } - /// getAddrMode2OpValue - Return encoding for addrmode2 operands. - uint32_t getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - /// getAddrMode2OffsetOpValue - Return encoding for am2offset operands. uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups, @@ -255,11 +250,16 @@ public: SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; - /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand. + /// getAddrMode5OpValue - Return encoding info for 'reg +/- (imm8 << 2)' operand. uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; + /// getAddrMode5FP16OpValue - Return encoding info for 'reg +/- (imm8 << 1)' operand. + uint32_t getAddrMode5FP16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + /// getCCOutOpValue - Return encoding of the 's' bit. unsigned getCCOutOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, @@ -312,12 +312,8 @@ public: // Support for fixups (MCFixup) if (MO.isExpr()) { const MCExpr *Expr = MO.getExpr(); - // In instruction code this value always encoded as lowest 12 bits, - // so we don't have to perform any specific adjustments. - // Due to requirements of relocatable records we have to use FK_Data_4. - // See ARMELFObjectWriter::ExplicitRelSym and - // ARMELFObjectWriter::GetRelocTypeInner for more details. - MCFixupKind Kind = MCFixupKind(FK_Data_4); + // Fixups resolve to plain values that need to be encoded. + MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_mod_imm); Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc())); return 0; } @@ -345,9 +341,6 @@ public: unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; - unsigned getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; /// getSORegOpValue - Return an encoded so_reg shifted register value. unsigned getSORegRegOpValue(const MCInst &MI, unsigned Op, @@ -757,10 +750,9 @@ getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit /// immediate branch target. -uint32_t ARMMCCodeEmitter:: -getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { +uint32_t ARMMCCodeEmitter::getThumbBranchTargetOpValue( + const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { unsigned Val = 0; const MCOperand MO = MI.getOperand(OpIdx); @@ -1049,12 +1041,12 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, switch (ARM16Expr->getKind()) { default: llvm_unreachable("Unsupported ARMFixup"); case ARMMCExpr::VK_ARM_HI16: - Kind = MCFixupKind(isThumb2(STI) ? ARM::fixup_t2_movt_hi16 - : ARM::fixup_arm_movt_hi16); + Kind = MCFixupKind(isThumb(STI) ? ARM::fixup_t2_movt_hi16 + : ARM::fixup_arm_movt_hi16); break; case ARMMCExpr::VK_ARM_LO16: - Kind = MCFixupKind(isThumb2(STI) ? ARM::fixup_t2_movw_lo16 - : ARM::fixup_arm_movw_lo16); + Kind = MCFixupKind(isThumb(STI) ? ARM::fixup_t2_movw_lo16 + : ARM::fixup_arm_movw_lo16); break; } @@ -1105,21 +1097,6 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx, } uint32_t ARMMCCodeEmitter:: -getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - // {17-14} Rn - // {13} 1 == imm12, 0 == Rm - // {12} isAdd - // {11-0} imm12/Rm - const MCOperand &MO = MI.getOperand(OpIdx); - unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); - uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups, STI); - Binary |= Rn << 14; - return Binary; -} - -uint32_t ARMMCCodeEmitter:: getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { @@ -1252,7 +1229,7 @@ getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx, return (MO.getImm() >> 2); } -/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand. +/// getAddrMode5OpValue - Return encoding info for 'reg +/- (imm8 << 2)' operand. uint32_t ARMMCCodeEmitter:: getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups, @@ -1292,6 +1269,46 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, return Binary; } +/// getAddrMode5FP16OpValue - Return encoding info for 'reg +/- (imm8 << 1)' operand. +uint32_t ARMMCCodeEmitter:: +getAddrMode5FP16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + // {12-9} = reg + // {8} = (U)nsigned (add == '1', sub == '0') + // {7-0} = imm8 + unsigned Reg, Imm8; + bool isAdd; + // If The first operand isn't a register, we have a label reference. + const MCOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) { + Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. + Imm8 = 0; + isAdd = false; // 'U' bit is handled as part of the fixup. + + assert(MO.isExpr() && "Unexpected machine operand type!"); + const MCExpr *Expr = MO.getExpr(); + MCFixupKind Kind; + if (isThumb2(STI)) + Kind = MCFixupKind(ARM::fixup_t2_pcrel_9); + else + Kind = MCFixupKind(ARM::fixup_arm_pcrel_9); + Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc())); + + ++MCNumCPRelocations; + } else { + EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups, STI); + isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add; + } + + uint32_t Binary = ARM_AM::getAM5Offset(Imm8); + // Immediate is always encoded as positive. The 'U' bit controls add vs sub. + if (isAdd) + Binary |= (1 << 8); + Binary |= (Reg << 9); + return Binary; +} + unsigned ARMMCCodeEmitter:: getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups, @@ -1446,23 +1463,6 @@ getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum, } unsigned ARMMCCodeEmitter:: -getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO1 = MI.getOperand(OpNum); - - // FIXME: Needs fixup support. - unsigned Value = 0; - int32_t tmp = (int32_t)MO1.getImm(); - if (tmp < 0) - tmp = abs(tmp); - else - Value |= 4096; // Set the ADD bit - Value |= tmp & 4095; - return Value; -} - -unsigned ARMMCCodeEmitter:: getT2SORegOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 8c8c249addb5..afb089ab0286 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -16,7 +16,6 @@ #include "ARMMCTargetDesc.h" #include "InstPrinter/ARMInstPrinter.h" #include "llvm/ADT/Triple.h" -#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" @@ -201,18 +200,6 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCCodeGenInfo *createARMMCCodeGenInfo(const Triple &TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - if (RM == Reloc::Default) { - // Default relocation model on Darwin is PIC, not DynamicNoPIC. - RM = TT.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC; - } - X->initMCCodeGenInfo(RM, CM, OL); - return X; -} - static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, MCAsmBackend &MAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { @@ -291,9 +278,6 @@ extern "C" void LLVMInitializeARMTargetMC() { // Register the MC asm info. RegisterMCAsmInfoFn X(*T, createARMMCAsmInfo); - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(*T, createARMMCCodeGenInfo); - // Register the MC instruction info. TargetRegistry::RegisterMCInstrInfo(*T, createARMMCInstrInfo); diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp index 4468132588cf..482bcf902518 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -12,7 +12,7 @@ #include "llvm-c/Disassembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCRelocationInfo.h" +#include "llvm/MC/MCDisassembler/MCRelocationInfo.h" using namespace llvm; using namespace object; diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index cfd504e533af..cfa6ce7da65e 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -389,7 +389,8 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer, uint32_t Offset = Target.getConstant(); if (IsPCRel && RelocType == MachO::ARM_RELOC_VANILLA) Offset += 1 << Log2Size; - if (Offset && A && !Writer->doesSymbolRequireExternRelocation(*A)) + if (Offset && A && !Writer->doesSymbolRequireExternRelocation(*A) && + RelocType != MachO::ARM_RELOC_HALF) return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, RelocType, Log2Size, FixedValue); @@ -447,8 +448,10 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer, // Even when it's not a scattered relocation, movw/movt always uses // a PAIR relocation. if (Type == MachO::ARM_RELOC_HALF) { - // The other-half value only gets populated for the movt and movw - // relocation entries. + // The entire addend is needed to correctly apply a relocation. One half is + // extracted from the instruction itself, the other comes from this + // PAIR. I.e. it's correct that we insert the high bits of the addend in the + // MOVW case here. relocation entries. uint32_t Value = 0; switch ((unsigned)Fixup.getKind()) { default: break; diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp index ed2deeaa24c0..7f2124033982 100644 --- a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp +++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp @@ -378,12 +378,14 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { } bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(*Fn.getFunction())) + return false; + TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo()); TRI = Fn.getSubtarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>(); - // Only run this for CortexA9. - if (!STI->isCortexA9()) + if (!STI->expandMLx()) return false; isLikeA9 = STI->isLikeA9() || STI->isSwift(); isSwift = STI->isSwift(); diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp index df73554372d8..3f88eb818062 100644 --- a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/ARMMCTargetDesc.h" -#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 93e0ac4aa320..c0732e4b750a 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -38,18 +38,17 @@ bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ return !MF.getFrameInfo()->hasVarSizedObjects(); } -static void -emitSPUpdate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const TargetInstrInfo &TII, DebugLoc dl, - const ThumbRegisterInfo &MRI, - int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) { +static void emitSPUpdate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const TargetInstrInfo &TII, const DebugLoc &dl, + const ThumbRegisterInfo &MRI, int NumBytes, + unsigned MIFlags = MachineInstr::NoFlags) { emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, MRI, MIFlags); } -void Thumb1FrameLowering:: +MachineBasicBlock::iterator Thumb1FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const Thumb1InstrInfo &TII = @@ -60,9 +59,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // If we have alloca, convert as follows: // ADJCALLSTACKDOWN -> sub, sp, sp, amount // ADJCALLSTACKUP -> add, sp, sp, amount - MachineInstr *Old = I; - DebugLoc dl = Old->getDebugLoc(); - unsigned Amount = Old->getOperand(0).getImm(); + MachineInstr &Old = *I; + DebugLoc dl = Old.getDebugLoc(); + unsigned Amount = Old.getOperand(0).getImm(); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next @@ -71,7 +70,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Amount = (Amount+Align-1)/Align*Align; // Replace the pseudo instruction with a new instruction... - unsigned Opc = Old->getOpcode(); + unsigned Opc = Old.getOpcode(); if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { emitSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount); } else { @@ -80,7 +79,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, } } } - MBB.erase(I); + return MBB.erase(I); } void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, @@ -151,7 +150,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, case ARM::R9: case ARM::R10: case ARM::R11: - if (STI.isTargetMachO()) { + if (STI.splitFramePushPop()) { GPRCS2Size += 4; break; } @@ -189,7 +188,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, int FramePtrOffsetInBlock = 0; unsigned adjustedGPRCS1Size = GPRCS1Size; - if (tryFoldSPUpdateIntoPushPop(STI, MF, std::prev(MBBI), NumBytes)) { + if (tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) { FramePtrOffsetInBlock = NumBytes; adjustedGPRCS1Size += NumBytes; NumBytes = 0; @@ -213,7 +212,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.isTargetMachO()) + if (STI.splitFramePushPop()) break; // fallthough case ARM::R0: @@ -304,16 +303,15 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, AFI->setShouldRestoreSPFromFP(true); } -static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) { - if (MI->getOpcode() == ARM::tLDRspi && - MI->getOperand(1).isFI() && - isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) +static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) { + if (MI.getOpcode() == ARM::tLDRspi && MI.getOperand(1).isFI() && + isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs)) return true; - else if (MI->getOpcode() == ARM::tPOP) { + else if (MI.getOpcode() == ARM::tPOP) { // The first two operands are predicates. The last two are // imp-def and imp-use of SP. Check everything in between. - for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i) - if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) + for (int i = 2, e = MI.getNumOperands() - 2; i != e; ++i) + if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs)) return false; return true; } @@ -346,8 +344,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, if (MBBI != MBB.begin()) { do --MBBI; - while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); - if (!isCSRestore(MBBI, CSRegs)) + while (MBBI != MBB.begin() && isCSRestore(*MBBI, CSRegs)); + if (!isCSRestore(*MBBI, CSRegs)) ++MBBI; } @@ -376,11 +374,11 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, .addReg(FramePtr)); } else { if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && - &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { + &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = std::prev(MBBI); - if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes)) + if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes)) emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); - } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) + } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } } @@ -467,7 +465,7 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // Look for a temporary register to use. // First, compute the liveness information. LivePhysRegs UsedRegs(STI.getRegisterInfo()); - UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true); + UsedRegs.addLiveOuts(MBB); // The semantic of pristines changed recently and now, // the callee-saved registers that are touched in the function // are not part of the pristines set anymore. @@ -637,7 +635,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); if (MI != MBB.end()) - MIB.copyImplicitOps(&*MI); + MIB.copyImplicitOps(*MI); MI = MBB.erase(MI); } else // LR may only be popped into PC, as part of return sequence. diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h index 27faac63683a..9de1ba1d7009 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h @@ -41,7 +41,7 @@ public: bool hasReservedCallFrame(const MachineFunction &MF) const override; - void + MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 530e1d33839a..159731d8fc72 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -38,9 +38,9 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { } void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { // Need to check the arch. MachineFunction &MF = *MBB.getParent(); const ARMSubtarget &st = MF.getSubtarget<ARMSubtarget>(); @@ -118,11 +118,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } } -void -Thumb1InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI, - Reloc::Model RM) const { - if (RM == Reloc::PIC_) - expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::tLDRi, RM); +void Thumb1InstrInfo::expandLoadStackGuard( + MachineBasicBlock::iterator MI) const { + MachineFunction &MF = *MI->getParent()->getParent(); + const TargetMachine &TM = MF.getTarget(); + if (TM.isPositionIndependent()) + expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::tLDRi); else - expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::tLDRi, RM); + expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::tLDRi); } diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h index f3f493d89237..931914ad2799 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h @@ -38,9 +38,8 @@ public: /// const ThumbRegisterInfo &getRegisterInfo() const override { return RI; } - void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -55,8 +54,7 @@ public: const TargetRegisterInfo *TRI) const override; private: - void expandLoadStackGuard(MachineBasicBlock::iterator MI, - Reloc::Model RM) const override; + void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; }; } diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp index bf0498dfda69..0c7055551632 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -36,6 +36,11 @@ namespace { bool runOnMachineFunction(MachineFunction &Fn) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + const char *getPassName() const override { return "Thumb IT blocks insertion pass"; } @@ -165,7 +170,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, ++I; if (I != E) { unsigned NPredReg = 0; - ARMCC::CondCodes NCC = getITInstrPredicate(I, NPredReg); + ARMCC::CondCodes NCC = getITInstrPredicate(*I, NPredReg); if (NCC == CC || NCC == OCC) return true; } @@ -182,7 +187,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; - ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg); + ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; @@ -222,7 +227,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { MI = NMI; unsigned NPredReg = 0; - ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg); + ARMCC::CondCodes NCC = getITInstrPredicate(*NMI, NPredReg); if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; // Add implicit use of ITSTATE. diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 4da769f23280..e2e6dafd218a 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -50,7 +50,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, MachineBasicBlock *NewDest) const { MachineBasicBlock *MBB = Tail->getParent(); ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>(); - if (!AFI->hasITBlocks()) { + if (!AFI->hasITBlocks() || Tail->isBranch()) { TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest); return; } @@ -58,7 +58,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, // If the first instruction of Tail is predicated, we may have to update // the IT instruction. unsigned PredReg = 0; - ARMCC::CondCodes CC = getInstrPredicate(Tail, PredReg); + ARMCC::CondCodes CC = getInstrPredicate(*Tail, PredReg); MachineBasicBlock::iterator MBBI = Tail; if (CC != ARMCC::AL) // Expecting at least the t2IT instruction before it. @@ -106,13 +106,13 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, } unsigned PredReg = 0; - return getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; + return getITInstrPredicate(*MBBI, PredReg) == ARMCC::AL; } void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { // Handle SPR, DPR, and QPR copies. if (!ARM::GPRRegClass.contains(DestReg, SrcReg)) return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc); @@ -148,8 +148,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for // gsub_0, but needs an extra constraint for gsub_1 (which could be sp // otherwise). - MachineRegisterInfo *MRI = &MF.getRegInfo(); - MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + MachineRegisterInfo *MRI = &MF.getRegInfo(); + MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); + } MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8)); AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); @@ -187,8 +189,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for // gsub_0, but needs an extra constraint for gsub_1 (which could be sp // otherwise). - MachineRegisterInfo *MRI = &MF.getRegInfo(); - MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + MachineRegisterInfo *MRI = &MF.getRegInfo(); + MRI->constrainRegClass(DestReg, + &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); + } MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8)); AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); @@ -204,20 +209,22 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); } -void -Thumb2InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI, - Reloc::Model RM) const { - if (RM == Reloc::PIC_) - expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12, RM); +void Thumb2InstrInfo::expandLoadStackGuard( + MachineBasicBlock::iterator MI) const { + MachineFunction &MF = *MI->getParent()->getParent(); + if (MF.getTarget().isPositionIndependent()) + expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12); else - expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12, RM); + expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12); } void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, DebugLoc dl, - unsigned DestReg, unsigned BaseReg, int NumBytes, - ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII, unsigned MIFlags) { + MachineBasicBlock::iterator &MBBI, + const DebugLoc &dl, unsigned DestReg, + unsigned BaseReg, int NumBytes, + ARMCC::CondCodes Pred, unsigned PredReg, + const ARMBaseInstrInfo &TII, + unsigned MIFlags) { if (NumBytes == 0 && DestReg != BaseReg) { BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) .addReg(BaseReg, RegState::Kill) @@ -459,7 +466,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Offset += MI.getOperand(FrameRegIdx+1).getImm(); unsigned PredReg; - if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) { + if (Offset == 0 && getInstrPredicate(MI, PredReg) == ARMCC::AL) { // Turn it into a move. MI.setDesc(TII.get(ARM::tMOVr)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); @@ -627,9 +634,9 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return Offset == 0; } -ARMCC::CondCodes -llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { - unsigned Opc = MI->getOpcode(); +ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr &MI, + unsigned &PredReg) { + unsigned Opc = MI.getOpcode(); if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) return ARMCC::AL; return getInstrPredicate(MI, PredReg); diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 916ab06ec305..15d63300b6a2 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -39,9 +39,8 @@ public: bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override; - void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, @@ -63,16 +62,13 @@ public: const ThumbRegisterInfo &getRegisterInfo() const override { return RI; } private: - void expandLoadStackGuard(MachineBasicBlock::iterator MI, - Reloc::Model RM) const override; + void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; }; /// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical /// to llvm::getInstrPredicate except it returns AL for conditional branch /// instructions which are "predicated", but are not in IT blocks. -ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg); - - +ARMCC::CondCodes getITInstrPredicate(const MachineInstr &MI, unsigned &PredReg); } #endif diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index bcd0e5751258..c4fdb9b3147d 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -18,11 +18,12 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/IR/Function.h" // To access Function attributes +#include "llvm/IR/Function.h" // To access Function attributes #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include <utility> using namespace llvm; #define DEBUG_TYPE "t2-reduce-size" @@ -115,12 +116,14 @@ namespace { { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 }, { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 }, @@ -143,6 +146,11 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + const char *getPassName() const override { return "Thumb2 instruction size reduction pass"; } @@ -208,7 +216,7 @@ namespace { } Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor) - : MachineFunctionPass(ID), PredicateFtor(Ftor) { + : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) { OptimizeSize = MinimizeSize = false; for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { unsigned FromOpc = ReduceTable[i].WideOpc; @@ -417,6 +425,46 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, HasShift = true; OpNum = 4; break; + case ARM::t2LDR_POST: + case ARM::t2STR_POST: { + if (!MBB.getParent()->getFunction()->optForMinSize()) + return false; + + // We're creating a completely different type of load/store - LDM from LDR. + // For this reason we can't reuse the logic at the end of this function; we + // have to implement the MI building here. + bool IsStore = Entry.WideOpc == ARM::t2STR_POST; + unsigned Rt = MI->getOperand(IsStore ? 1 : 0).getReg(); + unsigned Rn = MI->getOperand(IsStore ? 0 : 1).getReg(); + unsigned Offset = MI->getOperand(3).getImm(); + unsigned PredImm = MI->getOperand(4).getImm(); + unsigned PredReg = MI->getOperand(5).getReg(); + assert(isARMLowRegister(Rt)); + assert(isARMLowRegister(Rn)); + + if (Offset != 4) + return false; + + // Add the 16-bit load / store instruction. + DebugLoc dl = MI->getDebugLoc(); + auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1)) + .addReg(Rn, RegState::Define) + .addReg(Rn) + .addImm(PredImm) + .addReg(PredReg) + .addReg(Rt, IsStore ? 0 : RegState::Define); + + // Transfer memoperands. + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + + // Kill the old instruction. + MI->eraseFromBundle(); + ++NumLdSts; + return true; + } case ARM::t2LDMIA: { unsigned BaseReg = MI->getOperand(0).getReg(); assert(isARMLowRegister(BaseReg)); @@ -597,7 +645,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, case ARM::t2ADDSri: case ARM::t2ADDSrr: { unsigned PredReg = 0; - if (getInstrPredicate(MI, PredReg) == ARMCC::AL) { + if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) { switch (Opc) { default: break; case ARM::t2ADDSri: { @@ -670,7 +718,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, if (Reg1 != Reg0) return false; // Try to commute the operands to make it a 2-address instruction. - MachineInstr *CommutedMI = TII->commuteInstruction(MI); + MachineInstr *CommutedMI = TII->commuteInstruction(*MI); if (!CommutedMI) return false; } @@ -678,11 +726,11 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Try to commute the operands to make it a 2-address instruction. unsigned CommOpIdx1 = 1; unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex; - if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || + if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) || MI->getOperand(CommOpIdx2).getReg() != Reg0) return false; MachineInstr *CommutedMI = - TII->commuteInstruction(MI, false, CommOpIdx1, CommOpIdx2); + TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2); if (!CommutedMI) return false; } @@ -702,7 +750,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Check if it's possible / necessary to transfer the predicate. const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { if (!NewMCID.isPredicable()) @@ -798,7 +846,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Check if it's possible / necessary to transfer the predicate. const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { if (!NewMCID.isPredicable()) @@ -983,7 +1031,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { NextMII->bundleWithPred(); } - if (!NextInSameBundle && MI->isInsideBundle()) { + if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) { // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill // marker is only on the BUNDLE instruction. Process the BUNDLE // instruction as we finish with the bundled instruction to work around @@ -1050,5 +1098,5 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { /// reduction pass. FunctionPass *llvm::createThumb2SizeReductionPass( std::function<bool(const Function &)> Ftor) { - return new Thumb2SizeReduce(Ftor); + return new Thumb2SizeReduce(std::move(Ftor)); } diff --git a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp index b5f9d7e38f27..6c26c8843865 100644 --- a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -61,7 +61,7 @@ ThumbRegisterInfo::getPointerRegClass(const MachineFunction &MF, static void emitThumb1LoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - DebugLoc dl, unsigned DestReg, + const DebugLoc &dl, unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) { @@ -81,7 +81,7 @@ static void emitThumb1LoadConstPool(MachineBasicBlock &MBB, static void emitThumb2LoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - DebugLoc dl, unsigned DestReg, + const DebugLoc &dl, unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) { @@ -101,9 +101,9 @@ static void emitThumb2LoadConstPool(MachineBasicBlock &MBB, /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. void ThumbRegisterInfo::emitLoadConstPool( - MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, - unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred, - unsigned PredReg, unsigned MIFlags) const { + MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + const DebugLoc &dl, unsigned DestReg, unsigned SubIdx, int Val, + ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); if (STI.isThumb1Only()) { @@ -120,57 +120,55 @@ void ThumbRegisterInfo::emitLoadConstPool( /// a destreg = basereg + immediate in Thumb code. Materialize the immediate /// in a register using mov / mvn sequences or load the immediate from a /// constpool entry. -static -void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned BaseReg, - int NumBytes, bool CanChangeCC, - const TargetInstrInfo &TII, - const ARMBaseRegisterInfo& MRI, - unsigned MIFlags = MachineInstr::NoFlags) { - MachineFunction &MF = *MBB.getParent(); - bool isHigh = !isARMLowRegister(DestReg) || - (BaseReg != 0 && !isARMLowRegister(BaseReg)); - bool isSub = false; - // Subtract doesn't have high register version. Load the negative value - // if either base or dest register is a high register. Also, if do not - // issue sub as part of the sequence if condition register is to be - // preserved. - if (NumBytes < 0 && !isHigh && CanChangeCC) { - isSub = true; - NumBytes = -NumBytes; - } - unsigned LdReg = DestReg; - if (DestReg == ARM::SP) - assert(BaseReg == ARM::SP && "Unexpected!"); - if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg)) - LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); - - if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) { - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) - .addImm(NumBytes).setMIFlags(MIFlags); - } else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) { - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) - .addImm(NumBytes).setMIFlags(MIFlags); - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)) - .addReg(LdReg, RegState::Kill).setMIFlags(MIFlags); - } else - MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes, - ARMCC::AL, 0, MIFlags); - - // Emit add / sub. - int Opc = (isSub) ? ARM::tSUBrr : ((isHigh || !CanChangeCC) ? ARM::tADDhirr - : ARM::tADDrr); - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); - if (Opc != ARM::tADDhirr) - MIB = AddDefaultT1CC(MIB); - if (DestReg == ARM::SP || isSub) - MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill); - else - MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); - AddDefaultPred(MIB); +static void emitThumbRegPlusImmInReg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, + bool CanChangeCC, const TargetInstrInfo &TII, + const ARMBaseRegisterInfo &MRI, unsigned MIFlags = MachineInstr::NoFlags) { + MachineFunction &MF = *MBB.getParent(); + bool isHigh = !isARMLowRegister(DestReg) || + (BaseReg != 0 && !isARMLowRegister(BaseReg)); + bool isSub = false; + // Subtract doesn't have high register version. Load the negative value + // if either base or dest register is a high register. Also, if do not + // issue sub as part of the sequence if condition register is to be + // preserved. + if (NumBytes < 0 && !isHigh && CanChangeCC) { + isSub = true; + NumBytes = -NumBytes; + } + unsigned LdReg = DestReg; + if (DestReg == ARM::SP) + assert(BaseReg == ARM::SP && "Unexpected!"); + if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg)) + LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); + + if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) { + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) + .addImm(NumBytes) + .setMIFlags(MIFlags); + } else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) { + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) + .addImm(NumBytes) + .setMIFlags(MIFlags); + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)) + .addReg(LdReg, RegState::Kill) + .setMIFlags(MIFlags); + } else + MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes, ARMCC::AL, 0, + MIFlags); + + // Emit add / sub. + int Opc = (isSub) ? ARM::tSUBrr + : ((isHigh || !CanChangeCC) ? ARM::tADDhirr : ARM::tADDrr); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); + if (Opc != ARM::tADDhirr) + MIB = AddDefaultT1CC(MIB); + if (DestReg == ARM::SP || isSub) + MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill); + else + MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); + AddDefaultPred(MIB); } /// emitThumbRegPlusImmediate - Emits a series of instructions to materialize @@ -179,10 +177,10 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, /// be too long. This is allowed to modify the condition flags. void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned BaseReg, - int NumBytes, const TargetInstrInfo &TII, - const ARMBaseRegisterInfo& MRI, + const DebugLoc &dl, unsigned DestReg, + unsigned BaseReg, int NumBytes, + const TargetInstrInfo &TII, + const ARMBaseRegisterInfo &MRI, unsigned MIFlags) { bool isSub = NumBytes < 0; unsigned Bytes = (unsigned)NumBytes; @@ -281,7 +279,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, unsigned RequiredExtraInstrs; if (ExtraRange) - RequiredExtraInstrs = RoundUpToAlignment(RangeAfterCopy, ExtraRange) / ExtraRange; + RequiredExtraInstrs = alignTo(RangeAfterCopy, ExtraRange) / ExtraRange; else if (RangeAfterCopy > 0) // We need an extra instruction but none is available RequiredExtraInstrs = 1000000; diff --git a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h index 23aaff37f409..e6b06959e428 100644 --- a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h @@ -39,8 +39,9 @@ public: /// specified immediate. void emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val, - ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0, + const DebugLoc &dl, unsigned DestReg, unsigned SubIdx, + int Val, ARMCC::CondCodes Pred = ARMCC::AL, + unsigned PredReg = 0, unsigned MIFlags = MachineInstr::NoFlags) const override; // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be |