diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
21 files changed, 426 insertions, 364 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index 57f9d1c6b610..005b74a68af3 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -67,8 +67,9 @@ def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", [FeatureFPARMv8]>; def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", "Restrict FP to 16 double registers">; -def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", - "Enable divide instructions">; +def FeatureHWDivThumb : SubtargetFeature<"hwdiv", "HasHardwareDivideInThumb", + "true", + "Enable divide instructions in Thumb">; def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", "HasHardwareDivideInARM", "true", "Enable divide instructions in ARM mode">; @@ -225,7 +226,7 @@ def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", def FeatureVirtualization : SubtargetFeature<"virtualization", "HasVirtualization", "true", "Supports Virtualization extension", - [FeatureHWDiv, FeatureHWDivARM]>; + [FeatureHWDivThumb, FeatureHWDivARM]>; // M-series ISA def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", @@ -433,21 +434,21 @@ def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops, def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops, FeatureDB, FeatureDSP, - FeatureHWDiv, + FeatureHWDivThumb, FeatureRClass]>; def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, FeatureMClass]>; def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, FeatureMClass, FeatureDSP]>; @@ -502,7 +503,7 @@ def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline", [HasV8MBaselineOps, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, FeatureV7Clrex, Feature8MSecExt, FeatureAcquireRelease, @@ -512,7 +513,7 @@ def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline", [HasV8MMainlineOps, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, Feature8MSecExt, FeatureAcquireRelease, FeatureMClass]>; @@ -678,7 +679,7 @@ def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, FeatureFP16, FeatureAvoidPartialCPSR, FeatureVFP4, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM]>; def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, @@ -686,7 +687,7 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, FeatureNEONForFP, FeatureVFP4, FeatureMP, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, @@ -768,39 +769,39 @@ def : ProcNoItin<"cortex-m33", [ARMv8mMainline, FeatureVFPOnlySP]>; def : ProcNoItin<"cortex-a32", [ARMv8a, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC, FeatureFPAO]>; def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC, FeatureFPAO]>; def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; @@ -811,7 +812,7 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureNEONForFP, FeatureVFP4, FeatureMP, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, @@ -820,25 +821,25 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureZCZeroing]>; def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"kryo", [ARMv8a, ProcKryo, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index eb0d410b596b..14e197f477f1 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -589,12 +589,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { ATS.finishAttributeSection(); } -static bool isV8M(const ARMSubtarget *Subtarget) { - // Note that v8M Baseline is a subset of v6T2! - return (Subtarget->hasV8MBaselineOps() && !Subtarget->hasV6T2Ops()) || - Subtarget->hasV8MMainlineOps(); -} - //===----------------------------------------------------------------------===// // Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile() // FIXME: @@ -602,39 +596,6 @@ static bool isV8M(const ARMSubtarget *Subtarget) { // to appear in the .ARM.attributes section in ELF. // Instead of subclassing the MCELFStreamer, we do the work here. -static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU, - const ARMSubtarget *Subtarget) { - if (CPU == "xscale") - return ARMBuildAttrs::v5TEJ; - - if (Subtarget->hasV8Ops()) { - if (Subtarget->isRClass()) - return ARMBuildAttrs::v8_R; - return ARMBuildAttrs::v8_A; - } else if (Subtarget->hasV8MMainlineOps()) - return ARMBuildAttrs::v8_M_Main; - else if (Subtarget->hasV7Ops()) { - if (Subtarget->isMClass() && Subtarget->hasDSP()) - return ARMBuildAttrs::v7E_M; - return ARMBuildAttrs::v7; - } else if (Subtarget->hasV6T2Ops()) - return ARMBuildAttrs::v6T2; - else if (Subtarget->hasV8MBaselineOps()) - return ARMBuildAttrs::v8_M_Base; - else if (Subtarget->hasV6MOps()) - return ARMBuildAttrs::v6S_M; - else if (Subtarget->hasV6Ops()) - return ARMBuildAttrs::v6; - else if (Subtarget->hasV5TEOps()) - return ARMBuildAttrs::v5TE; - else if (Subtarget->hasV5TOps()) - return ARMBuildAttrs::v5T; - else if (Subtarget->hasV4TOps()) - return ARMBuildAttrs::v4T; - else - return ARMBuildAttrs::v4; -} - // Returns true if all functions have the same function attribute value. // It also returns true when the module has no functions. static bool checkFunctionsAttributeConsistency(const Module &M, StringRef Attr, @@ -671,89 +632,8 @@ void ARMAsmPrinter::emitAttributes() { static_cast<const ARMBaseTargetMachine &>(TM); const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian()); - const std::string &CPUString = STI.getCPUString(); - - if (!StringRef(CPUString).startswith("generic")) { - // FIXME: remove krait check when GNU tools support krait cpu - if (STI.isKrait()) { - ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); - // We consider krait as a "cortex-a9" + hwdiv CPU - // Enable hwdiv through ".arch_extension idiv" - if (STI.hasDivide() || STI.hasDivideInARMMode()) - ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM); - } else - ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); - } - - ATS.emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(CPUString, &STI)); - - // Tag_CPU_arch_profile must have the default value of 0 when "Architecture - // profile is not applicable (e.g. pre v7, or cross-profile code)". - if (STI.hasV7Ops() || isV8M(&STI)) { - if (STI.isAClass()) { - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::ApplicationProfile); - } else if (STI.isRClass()) { - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::RealTimeProfile); - } else if (STI.isMClass()) { - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::MicroControllerProfile); - } - } - - ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, - STI.hasARMOps() ? ARMBuildAttrs::Allowed - : ARMBuildAttrs::Not_Allowed); - if (isV8M(&STI)) { - ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumbDerived); - } else if (STI.isThumb1Only()) { - ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); - } else if (STI.hasThumb2()) { - ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumb32); - } - - if (STI.hasNEON()) { - /* NEON is not exactly a VFP architecture, but GAS emit one of - * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ - if (STI.hasFPARMv8()) { - if (STI.hasCrypto()) - ATS.emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8); - else - ATS.emitFPU(ARM::FK_NEON_FP_ARMV8); - } else if (STI.hasVFP4()) - ATS.emitFPU(ARM::FK_NEON_VFPV4); - else - ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON); - // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture - if (STI.hasV8Ops()) - ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a: - ARMBuildAttrs::AllowNeonARMv8); - } else { - if (STI.hasFPARMv8()) - // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one - // FPU, but there are two different names for it depending on the CPU. - ATS.emitFPU(STI.hasD16() - ? (STI.isFPOnlySP() ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16) - : ARM::FK_FP_ARMV8); - else if (STI.hasVFP4()) - ATS.emitFPU(STI.hasD16() - ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16) - : ARM::FK_VFPV4); - else if (STI.hasVFP3()) - ATS.emitFPU(STI.hasD16() - // +d16 - ? (STI.isFPOnlySP() - ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD) - : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16)) - // -d16 - : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3)); - else if (STI.hasVFP2()) - ATS.emitFPU(ARM::FK_VFPV2); - } + // Emit build attributes for the available hardware. + ATS.emitTargetAttributes(STI); // RW data addressing. if (isPositionIndependent()) { @@ -846,32 +726,15 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, ARMBuildAttrs::AllowIEEE754); - if (STI.allowsUnalignedMem()) - ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access, - ARMBuildAttrs::Allowed); - else - ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access, - ARMBuildAttrs::Not_Allowed); - // FIXME: add more flags to ARMBuildAttributes.h // 8-bytes alignment stuff. ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1); ATS.emitAttribute(ARMBuildAttrs::ABI_align_preserved, 1); - // ABI_HardFP_use attribute to indicate single precision FP. - if (STI.isFPOnlySP()) - ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use, - ARMBuildAttrs::HardFPSinglePrecision); - // Hard float. Use both S and D registers and conform to AAPCS-VFP. if (STI.isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS); - // FIXME: Should we signal R9 usage? - - if (STI.hasFP16()) - ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); - // FIXME: To support emitting this build attribute as GCC does, the // -mfp16-format option and associated plumbing must be // supported. For now the __fp16 type is exposed by default, so this @@ -879,21 +742,6 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format, ARMBuildAttrs::FP16FormatIEEE); - if (STI.hasMPExtension()) - ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); - - // Hardware divide in ARM mode is part of base arch, starting from ARMv8. - // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M). - // It is not possible to produce DisallowDIV: if hwdiv is present in the base - // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits. - // AllowDIVExt is only emitted if hwdiv isn't available in the base arch; - // otherwise, the default value (AllowDIVIfExists) applies. - if (STI.hasDivideInARMMode() && !STI.hasV8Ops()) - ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); - - if (STI.hasDSP() && isV8M(&STI)) - ATS.emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed); - if (MMI) { if (const Module *SourceModule = MMI->getModule()) { // ABI_PCS_wchar_t to indicate wchar_t width @@ -930,16 +778,6 @@ void ARMAsmPrinter::emitAttributes() { else ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9IsGPR); - - if (STI.hasTrustZone() && STI.hasVirtualization()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowTZVirtualization); - else if (STI.hasTrustZone()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowTZ); - else if (STI.hasVirtualization()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowVirtualization); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 23777b821f9f..faf1c631a3a7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -404,6 +404,29 @@ public: /// Returns true if the instruction has a shift by immediate that can be /// executed in one cycle less. bool isSwiftFastImmShift(const MachineInstr *MI) const; + + /// Returns predicate register associated with the given frame instruction. + unsigned getFramePred(const MachineInstr &MI) const { + assert(isFrameInstr(MI)); + if (isFrameSetup(MI)) + // Operands of ADJCALLSTACKDOWN: + // - argument declared in ADJCALLSTACKDOWN pattern: + // 0 - frame size + // 1 - predicate code (like ARMCC::AL) + // - added by predOps: + // 2 - predicate reg + return MI.getOperand(2).getReg(); + assert(MI.getOpcode() == ARM::ADJCALLSTACKUP || + MI.getOpcode() == ARM::tADJCALLSTACKUP); + // Operands of ADJCALLSTACKUP: + // - argument declared in ADJCALLSTACKUP pattern: + // 0 - frame size + // 1 - arg of CALLSEQ_END + // 2 - predicate code + // - added by predOps: + // 3 - predicate reg + return MI.getOperand(3).getReg(); + } }; /// Get the operands corresponding to the given \p Pred value. By default, the diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td index 7a7b7fede7c8..bc7afdb7f1c9 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td @@ -273,9 +273,9 @@ def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS_ThisReturn, R9))>; -def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP, - (sequence "R%u", 12, 1), - (sequence "D%u", 31, 0))>; +def CSR_iOS_TLSCall + : CalleeSavedRegs<(add LR, SP, (sub(sequence "R%u", 12, 1), R9, R12), + (sequence "D%u", 31, 0))>; // C++ TLS access function saves all registers except SP. Try to match // the order of CSRs in CSR_iOS. diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 23722f1b7f3f..6434df317aa8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1741,10 +1741,9 @@ bool ARMConstantIslands::undoLRSpillRestore() { .add(MI->getOperand(1)); MI->eraseFromParent(); MadeChange = true; - } - if (MI->getOpcode() == ARM::tPUSH && - MI->getOperand(2).getReg() == ARM::LR && - MI->getNumExplicitOperands() == 3) { + } else if (MI->getOpcode() == ARM::tPUSH && + MI->getOperand(2).getReg() == ARM::LR && + MI->getNumExplicitOperands() == 3) { // Just remove the push. MI->eraseFromParent(); MadeChange = true; @@ -2158,6 +2157,15 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { // If we're in PIC mode, there should be another ADD following. auto *TRI = STI->getRegisterInfo(); + + // %base cannot be redefined after the load as it will appear before + // TBB/TBH like: + // %base = + // %base = + // tBB %base, %idx + if (registerDefinedBetween(BaseReg, Load->getNextNode(), MBB->end(), TRI)) + continue; + if (isPositionIndependentOrROPI) { MachineInstr *Add = Load->getNextNode(); if (Add->getOpcode() != ARM::tADDrr || diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index 01e062bd185c..e9bc7db66fa4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -1702,7 +1702,8 @@ bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) { // If we have integer div support we should have selected this automagically. // In case we have a real miss go ahead and return false and we'll pick // it up later. - if (Subtarget->hasDivide()) return false; + if (Subtarget->hasDivideInThumbMode()) + return false; // Otherwise emit a libcall. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 37be22bed540..70dbe1bc5b95 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -322,6 +322,18 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, } } +/// We need the offset of the frame pointer relative to other MachineFrameInfo +/// offsets which are encoded relative to SP at function begin. +/// See also emitPrologue() for how the FP is set up. +/// Unfortunately we cannot determine this value in determineCalleeSaves() yet +/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use +/// this to produce a conservative estimate that we check in an assert() later. +static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) { + // This is a conservative estimation: Assume the frame pointer being r7 and + // pc("r15") up to r8 getting spilled before (= 8 registers). + return -AFI.getArgRegsSaveSize() - (8 * 4); +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -432,8 +444,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; int FramePtrOffsetInPush = 0; if (HasFP) { - FramePtrOffsetInPush = - MFI.getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize; + int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); + assert(getMaxFPOffset(*MF.getFunction(), *AFI) <= FPOffset && + "Max FP estimation is wrong"); + FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize; AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); } @@ -1700,6 +1714,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // worth the effort and added fragility? unsigned EstimatedStackSize = MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills); + + // Determine biggest (positive) SP offset in MachineFrameInfo. + int MaxFixedOffset = 0; + for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { + int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I); + MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset); + } + bool HasFP = hasFP(MF); if (HasFP) { if (AFI->hasStackFrame()) @@ -1707,15 +1729,20 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } else { // If FP is not used, SP will be used to access arguments, so count the // size of arguments into the estimation. - EstimatedStackSize += AFI->getArgumentStackSize(); + EstimatedStackSize += MaxFixedOffset; } EstimatedStackSize += 16; // For possible paddings. - bool BigStack = EstimatedStackSize >= estimateRSStackSizeLimit(MF, this) || - MFI.hasVarSizedObjects() || - (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)); + unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this); + int MaxFPOffset = getMaxFPOffset(*MF.getFunction(), *AFI); + bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit || + MFI.hasVarSizedObjects() || + (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) || + // For large argument stacks fp relative addressed may overflow. + (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit); bool ExtraCSSpill = false; - if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { + if (BigFrameOffsets || + !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); if (HasFP) { @@ -1899,7 +1926,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // callee-saved register or reserve a special spill slot to facilitate // register scavenging. Thumb1 needs a spill slot for stack pointer // adjustments also, even when the frame itself is small. - if (BigStack && !ExtraCSSpill) { + if (BigFrameOffsets && !ExtraCSSpill) { // If any non-reserved CS register isn't spilled, just spill one or two // extra. That should take care of it! unsigned NumExtras = TargetAlign / 4; @@ -1958,7 +1985,7 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( // ADJCALLSTACKUP -> add, sp, sp, amount MachineInstr &Old = *I; DebugLoc dl = Old.getDebugLoc(); - unsigned Amount = Old.getOperand(0).getImm(); + unsigned Amount = TII.getFrameSize(Old); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next @@ -1976,14 +2003,11 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( ARMCC::CondCodes Pred = (PIdx == -1) ? ARMCC::AL : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm(); + unsigned PredReg = TII.getFramePred(Old); if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { - // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. - unsigned PredReg = Old.getOperand(2).getReg(); emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, Pred, PredReg); } else { - // Note: PredReg is operand 3 for ADJCALLSTACKUP. - unsigned PredReg = Old.getOperand(3).getReg(); assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, Pred, PredReg); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index b07b4e1f5cfb..e9df9449103c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -228,11 +228,6 @@ private: const uint16_t *DOpcodes, const uint16_t *QOpcodes = nullptr); - /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, - /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be - /// generated to force the table registers to be consecutive. - void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc); - /// Try to select SBFX/UBFX instructions for ARM. bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); @@ -544,11 +539,11 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, SDValue NewMulConst; if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { HandleSDNode Handle(N); + SDLoc Loc(N); replaceDAGValue(N.getOperand(1), NewMulConst); BaseReg = Handle.getValue(); - Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl, - PowerOfTwo), - SDLoc(N), MVT::i32); + Opc = CurDAG->getTargetConstant( + ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); return true; } } @@ -1859,6 +1854,14 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { return Opc; // If not one we handle, return it unchanged. } +/// Returns true if the given increment is a Constant known to be equal to the +/// access size performed by a NEON load/store. This means the "[rN]!" form can +/// be used. +static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { + auto C = dyn_cast<ConstantSDNode>(Inc); + return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; +} + void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *DOpcodes, const uint16_t *QOpcodes0, @@ -1926,13 +1929,13 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue Inc = N->getOperand(AddrOpIdx + 1); // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode())) + bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); + if ((NumVecs <= 2) && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs > 2 && !isVLDfixed(Opc)) || - !isa<ConstantSDNode>(Inc.getNode())) - Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate) + Ops.push_back(IsImmUpdate ? Reg0 : Inc); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -2080,11 +2083,12 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue Inc = N->getOperand(AddrOpIdx + 1); // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) + bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); + if (NumVecs <= 2 && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if (!isa<ConstantSDNode>(Inc.getNode())) + if (!IsImmUpdate) Ops.push_back(Inc); else if (NumVecs > 2 && !isVSTfixed(Opc)) Ops.push_back(Reg0); @@ -2214,7 +2218,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + bool IsImmUpdate = + isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); + Ops.push_back(IsImmUpdate ? Reg0 : Inc); } SDValue SuperReg; @@ -2318,9 +2324,11 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, // fixed-stride update instructions don't have an explicit writeback // operand. It's implicit in the opcode itself. SDValue Inc = N->getOperand(2); - if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) + bool IsImmUpdate = + isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); + if (NumVecs <= 2 && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); - if (!isa<ConstantSDNode>(Inc.getNode())) + if (!IsImmUpdate) Ops.push_back(Inc); // FIXME: VLD3 and VLD4 haven't been updated to that form yet. else if (NumVecs > 2) @@ -2356,39 +2364,6 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, CurDAG->RemoveDeadNode(N); } -void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, - unsigned Opc) { - assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); - SDLoc dl(N); - EVT VT = N->getValueType(0); - unsigned FirstTblReg = IsExt ? 2 : 1; - - // Form a REG_SEQUENCE to force register allocation. - SDValue RegSeq; - SDValue V0 = N->getOperand(FirstTblReg + 0); - SDValue V1 = N->getOperand(FirstTblReg + 1); - if (NumVecs == 2) - RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); - else { - SDValue V2 = N->getOperand(FirstTblReg + 2); - // If it's a vtbl3, form a quad D-register and leave the last part as - // an undef. - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) - : N->getOperand(FirstTblReg + 3); - RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); - } - - SmallVector<SDValue, 6> Ops; - if (IsExt) - Ops.push_back(N->getOperand(1)); - Ops.push_back(RegSeq); - Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); - Ops.push_back(getAL(CurDAG, dl)); // predicate - Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register - ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); -} - bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) return false; @@ -3730,59 +3705,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { break; } - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); - switch (IntNo) { - default: - break; - - case Intrinsic::arm_neon_vtbl2: - SelectVTBL(N, false, 2, ARM::VTBL2); - return; - case Intrinsic::arm_neon_vtbl3: - SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); - return; - case Intrinsic::arm_neon_vtbl4: - SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); - return; - - case Intrinsic::arm_neon_vtbx2: - SelectVTBL(N, true, 2, ARM::VTBX2); - return; - case Intrinsic::arm_neon_vtbx3: - SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); - return; - case Intrinsic::arm_neon_vtbx4: - SelectVTBL(N, true, 4, ARM::VTBX4Pseudo); - return; - } - break; - } - - case ARMISD::VTBL1: { - SDLoc dl(N); - EVT VT = N->getValueType(0); - SDValue Ops[] = {N->getOperand(0), N->getOperand(1), - getAL(CurDAG, dl), // Predicate - CurDAG->getRegister(0, MVT::i32)}; // Predicate Register - ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops)); - return; - } - case ARMISD::VTBL2: { - SDLoc dl(N); - EVT VT = N->getValueType(0); - - // Form a REG_SEQUENCE to force register allocation. - SDValue V0 = N->getOperand(0); - SDValue V1 = N->getOperand(1); - SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); - - SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate - CurDAG->getRegister(0, MVT::i32)}; // Predicate Register - ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops)); - return; - } - case ISD::ATOMIC_CMP_SWAP: SelectCMP_SWAP(N); return; diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index e697c8ca5339..165e9b7378c7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -852,7 +852,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); - bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide() + bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() : Subtarget->hasDivideInARMMode(); if (!hasDivide) { // These are expanded into libcalls if the cpu doesn't have HW divider. @@ -860,7 +860,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UDIV, MVT::i32, LibCall); } - if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) { + if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) { setOperationAction(ISD::SDIV, MVT::i32, Custom); setOperationAction(ISD::UDIV, MVT::i32, Custom); @@ -2633,7 +2633,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { return true; } -bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { +bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { if (!Subtarget->supportsTailCall()) return false; @@ -3347,6 +3347,12 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } + case Intrinsic::arm_neon_vtbl1: + return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::arm_neon_vtbl2: + return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } } @@ -10867,11 +10873,8 @@ static SDValue CombineBaseUpdate(SDNode *N, // If the increment is a constant, it must match the memory ref size. SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); - if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { - uint64_t IncVal = CInc->getZExtValue(); - if (IncVal != NumBytes) - continue; - } else if (NumBytes >= 3 * 16) { + ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode()); + if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) { // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two // separate instructions that make it harder to use a non-constant update. continue; @@ -11688,34 +11691,6 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero, - APInt &KnownOne) { - if (Op.getOpcode() == ARMISD::BFI) { - // Conservatively, we can recurse down the first operand - // and just mask out all affected bits. - computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); - - // The operand to BFI is already a mask suitable for removing the bits it - // sets. - ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2)); - const APInt &Mask = CI->getAPIntValue(); - KnownZero &= Mask; - KnownOne &= Mask; - return; - } - if (Op.getOpcode() == ARMISD::CMOV) { - APInt KZ2(KnownZero.getBitWidth(), 0); - APInt KO2(KnownOne.getBitWidth(), 0); - computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); - computeKnownBits(DAG, Op.getOperand(1), KZ2, KO2); - - KnownZero &= KZ2; - KnownOne &= KO2; - return; - } - return DAG.computeKnownBits(Op, KnownZero, KnownOne); -} - SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { // If we have a CMOV, OR and AND combination such as: // if (x & CN) @@ -11777,7 +11752,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D // Lastly, can we determine that the bits defined by OrCI // are zero in Y? APInt KnownZero, KnownOne; - computeKnownBits(DAG, Y, KnownZero, KnownOne); + DAG.computeKnownBits(Y, KnownZero, KnownOne); if ((OrCI & KnownZero) != OrCI) return SDValue(); @@ -12657,6 +12632,19 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, } } } + case ARMISD::BFI: { + // Conservatively, we can recurse down the first operand + // and just mask out all affected bits. + DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth + 1); + + // The operand to BFI is already a mask suitable for removing the bits it + // sets. + ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2)); + const APInt &Mask = CI->getAPIntValue(); + KnownZero &= Mask; + KnownOne &= Mask; + return; + } } } @@ -13052,7 +13040,9 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { // rem = a - b * div // return {div, rem} // This should be lowered into UDIV/SDIV + MLS later on. - if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() && + bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() + : Subtarget->hasDivideInARMMode(); + if (hasDivide && Op->getValueType(0).isSimple() && Op->getSimpleValueType(0) == MVT::i32) { unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; const SDValue Dividend = Op->getOperand(0); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 70a0b1380ec9..8b54ce430ed2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -717,7 +717,7 @@ class InstrItineraryData; bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; - bool mayBeEmittedAsTailCall(CallInst *CI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, SDValue ARMcc, SDValue CCR, SDValue Cmp, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index cc0e7d4d9c35..703e8071b177 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -259,8 +259,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float conversions">; def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, AssemblerPredicate<"FeatureFullFP16","full half-float">; -def HasDivide : Predicate<"Subtarget->hasDivide()">, - AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">; +def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">, + AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">; def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">; def HasDSP : Predicate<"Subtarget->hasDSP()">, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index 681e235d78f0..9b08c612e16b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -587,6 +587,14 @@ def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; +def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, + SDTCisVT<2, v8i8>]>; +def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, + SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; +def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; +def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; + + def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); unsigned EltBits = 0; @@ -6443,7 +6451,8 @@ def VTBL1 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, "vtbl", "8", "$Vd, $Vn, $Vm", "", - [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; + [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; + let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), @@ -6498,6 +6507,49 @@ def VTBX4Pseudo IIC_VTBX4, "$orig = $dst", []>; } // DecoderMethod = "DecodeTBLInstruction" +def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), + (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vm)), + (v8i8 (VTBX2 v8i8:$orig, + (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1), + v8i8:$Vm))>; + +def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vm)), + (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + (v8i8 (IMPLICIT_DEF)), dsub_3), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vm)), + (v8i8 (VTBX3Pseudo v8i8:$orig, + (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + (v8i8 (IMPLICIT_DEF)), dsub_3), + v8i8:$Vm))>; + +def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), + (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + v8i8:$Vn3, dsub_3), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), + (v8i8 (VTBX4Pseudo v8i8:$orig, + (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + v8i8:$Vn3, dsub_3), + v8i8:$Vm))>; + // VRINT : Vector Rounding multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index f5b673b78ad7..f710ee6a7e77 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2797,7 +2797,7 @@ def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">; def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb, HasV8MBaseline]>, + Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>, Sched<[WriteDIV]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011100; @@ -2809,7 +2809,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb, HasV8MBaseline]>, + Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>, Sched<[WriteDIV]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011101; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp index 8d224d6a70fa..816596b85721 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -299,6 +299,20 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); break; + case G_SUB: + I.setDesc(TII.get(ARM::SUBrr)); + MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); + break; + case G_MUL: + if (TII.getSubtarget().hasV6Ops()) { + I.setDesc(TII.get(ARM::MUL)); + } else { + assert(TII.getSubtarget().useMulOps() && "Unsupported target"); + I.setDesc(TII.get(ARM::MULv5)); + MIB->getOperand(0).setIsEarlyClobber(true); + } + MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); + break; case G_FADD: if (!selectFAdd(MIB, TII, MRI)) return false; diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index 994bbd673dd8..fe9681439e6b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -43,8 +43,9 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({Op, 1, p0}, Legal); } - for (auto Ty : {s1, s8, s16, s32}) - setAction({G_ADD, Ty}, Legal); + for (unsigned Op : {G_ADD, G_SUB, G_MUL}) + for (auto Ty : {s1, s8, s16, s32}) + setAction({Op, Ty}, Legal); for (unsigned Op : {G_SEXT, G_ZEXT}) { setAction({Op, s32}, Legal); diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp index 08f3da738868..e47bd3a8963e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -219,6 +219,8 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { switch (Opc) { case G_ADD: + case G_SUB: + case G_MUL: case G_SEXT: case G_ZEXT: case G_GEP: diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index 40993fc0aa8a..d2630685d91b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -208,8 +208,8 @@ protected: /// FP registers for VFPv3. bool HasD16 = false; - /// HasHardwareDivide - True if subtarget supports [su]div - bool HasHardwareDivide = false; + /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode + bool HasHardwareDivideInThumb = false; /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode bool HasHardwareDivideInARM = false; @@ -507,7 +507,7 @@ public: return hasNEON() && UseNEONForSinglePrecisionFP; } - bool hasDivide() const { return HasHardwareDivide; } + bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; } bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } bool hasDataBarrier() const { return HasDataBarrier; } bool hasV7Clrex() const { return HasV7Clrex; } diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index f421d3ac1693..ada816c16389 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -67,6 +67,9 @@ static cl::opt<ImplicitItModeTy> ImplicitItMode( clEnumValN(ImplicitItModeTy::ThumbOnly, "thumb", "Warn in ARM, emit implicit ITs in Thumb"))); +static cl::opt<bool> AddBuildAttributes("arm-add-build-attributes", + cl::init(false)); + class ARMOperand; enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; @@ -540,6 +543,10 @@ public: // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + // Add build attributes based on the selected target. + if (AddBuildAttributes) + getTargetStreamer().emitTargetAttributes(STI); + // Not in an ITBlock to start with. ITState.CurPosition = ~0U; @@ -10189,8 +10196,8 @@ static const struct { { ARM::AEK_CRYPTO, Feature_HasV8, {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} }, { ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} }, - { (ARM::AEK_HWDIV | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass, - {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} }, + { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass, + {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} }, { ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} }, { ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} }, { ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} }, diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 6fa890ba1cd5..4d6c52f3cd49 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -464,7 +464,7 @@ public: void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes); void ChangeSection(MCSection *Section, const MCExpr *Subsection) override { - LastMappingSymbols[getPreviousSection().first] = std::move(LastEMSInfo); + LastMappingSymbols[getCurrentSection().first] = std::move(LastEMSInfo); MCELFStreamer::ChangeSection(Section, Subsection); auto LastMappingSymbol = LastMappingSymbols.find(Section); if (LastMappingSymbol != LastMappingSymbols.end()) { diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index 73e563890dd9..2b0cd461df7a 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -11,9 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "ARMTargetMachine.h" #include "llvm/MC/ConstantPools.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/TargetParser.h" using namespace llvm; @@ -75,3 +79,179 @@ void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {} void ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {} void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {} + +static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) { + if (STI.getCPU() == "xscale") + return ARMBuildAttrs::v5TEJ; + + if (STI.hasFeature(ARM::HasV8Ops)) { + if (STI.hasFeature(ARM::FeatureRClass)) + return ARMBuildAttrs::v8_R; + return ARMBuildAttrs::v8_A; + } else if (STI.hasFeature(ARM::HasV8MMainlineOps)) + return ARMBuildAttrs::v8_M_Main; + else if (STI.hasFeature(ARM::HasV7Ops)) { + if (STI.hasFeature(ARM::FeatureMClass) && STI.hasFeature(ARM::FeatureDSP)) + return ARMBuildAttrs::v7E_M; + return ARMBuildAttrs::v7; + } else if (STI.hasFeature(ARM::HasV6T2Ops)) + return ARMBuildAttrs::v6T2; + else if (STI.hasFeature(ARM::HasV8MBaselineOps)) + return ARMBuildAttrs::v8_M_Base; + else if (STI.hasFeature(ARM::HasV6MOps)) + return ARMBuildAttrs::v6S_M; + else if (STI.hasFeature(ARM::HasV6Ops)) + return ARMBuildAttrs::v6; + else if (STI.hasFeature(ARM::HasV5TEOps)) + return ARMBuildAttrs::v5TE; + else if (STI.hasFeature(ARM::HasV5TOps)) + return ARMBuildAttrs::v5T; + else if (STI.hasFeature(ARM::HasV4TOps)) + return ARMBuildAttrs::v4T; + else + return ARMBuildAttrs::v4; +} + +static bool isV8M(const MCSubtargetInfo &STI) { + // Note that v8M Baseline is a subset of v6T2! + return (STI.hasFeature(ARM::HasV8MBaselineOps) && + !STI.hasFeature(ARM::HasV6T2Ops)) || + STI.hasFeature(ARM::HasV8MMainlineOps); +} + +/// Emit the build attributes that only depend on the hardware that we expect +// /to be available, and not on the ABI, or any source-language choices. +void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { + switchVendor("aeabi"); + + const StringRef CPUString = STI.getCPU(); + if (!CPUString.empty() && !CPUString.startswith("generic")) { + // FIXME: remove krait check when GNU tools support krait cpu + if (STI.hasFeature(ARM::ProcKrait)) { + emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); + // We consider krait as a "cortex-a9" + hwdiv CPU + // Enable hwdiv through ".arch_extension idiv" + if (STI.hasFeature(ARM::FeatureHWDivThumb) || + STI.hasFeature(ARM::FeatureHWDivARM)) + emitArchExtension(ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM); + } else { + emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); + } + } + + emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(STI)); + + if (STI.hasFeature(ARM::FeatureAClass)) { + emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::ApplicationProfile); + } else if (STI.hasFeature(ARM::FeatureRClass)) { + emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::RealTimeProfile); + } else if (STI.hasFeature(ARM::FeatureMClass)) { + emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::MicroControllerProfile); + } + + emitAttribute(ARMBuildAttrs::ARM_ISA_use, STI.hasFeature(ARM::FeatureNoARM) + ? ARMBuildAttrs::Not_Allowed + : ARMBuildAttrs::Allowed); + + if (isV8M(STI)) { + emitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumbDerived); + } else if (STI.hasFeature(ARM::FeatureThumb2)) { + emitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumb32); + } else if (STI.hasFeature(ARM::HasV4TOps)) { + emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); + } + + if (STI.hasFeature(ARM::FeatureNEON)) { + /* NEON is not exactly a VFP architecture, but GAS emit one of + * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ + if (STI.hasFeature(ARM::FeatureFPARMv8)) { + if (STI.hasFeature(ARM::FeatureCrypto)) + emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8); + else + emitFPU(ARM::FK_NEON_FP_ARMV8); + } else if (STI.hasFeature(ARM::FeatureVFP4)) + emitFPU(ARM::FK_NEON_VFPV4); + else + emitFPU(STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_NEON_FP16 + : ARM::FK_NEON); + // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture + if (STI.hasFeature(ARM::HasV8Ops)) + emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + STI.hasFeature(ARM::HasV8_1aOps) + ? ARMBuildAttrs::AllowNeonARMv8_1a + : ARMBuildAttrs::AllowNeonARMv8); + } else { + if (STI.hasFeature(ARM::FeatureFPARMv8)) + // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one + // FPU, but there are two different names for it depending on the CPU. + emitFPU(STI.hasFeature(ARM::FeatureD16) + ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16 + : ARM::FK_FPV5_D16) + : ARM::FK_FP_ARMV8); + else if (STI.hasFeature(ARM::FeatureVFP4)) + emitFPU(STI.hasFeature(ARM::FeatureD16) + ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16 + : ARM::FK_VFPV4_D16) + : ARM::FK_VFPV4); + else if (STI.hasFeature(ARM::FeatureVFP3)) + emitFPU( + STI.hasFeature(ARM::FeatureD16) + // +d16 + ? (STI.hasFeature(ARM::FeatureVFPOnlySP) + ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16 + : ARM::FK_VFPV3XD) + : (STI.hasFeature(ARM::FeatureFP16) + ? ARM::FK_VFPV3_D16_FP16 + : ARM::FK_VFPV3_D16)) + // -d16 + : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16 + : ARM::FK_VFPV3)); + else if (STI.hasFeature(ARM::FeatureVFP2)) + emitFPU(ARM::FK_VFPV2); + } + + // ABI_HardFP_use attribute to indicate single precision FP. + if (STI.hasFeature(ARM::FeatureVFPOnlySP)) + emitAttribute(ARMBuildAttrs::ABI_HardFP_use, + ARMBuildAttrs::HardFPSinglePrecision); + + if (STI.hasFeature(ARM::FeatureFP16)) + emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); + + if (STI.hasFeature(ARM::FeatureMP)) + emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); + + // Hardware divide in ARM mode is part of base arch, starting from ARMv8. + // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M). + // It is not possible to produce DisallowDIV: if hwdiv is present in the base + // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits. + // AllowDIVExt is only emitted if hwdiv isn't available in the base arch; + // otherwise, the default value (AllowDIVIfExists) applies. + if (STI.hasFeature(ARM::FeatureHWDivARM) && !STI.hasFeature(ARM::HasV8Ops)) + emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); + + if (STI.hasFeature(ARM::FeatureDSP) && isV8M(STI)) + emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed); + + if (STI.hasFeature(ARM::FeatureStrictAlign)) + emitAttribute(ARMBuildAttrs::CPU_unaligned_access, + ARMBuildAttrs::Not_Allowed); + else + emitAttribute(ARMBuildAttrs::CPU_unaligned_access, + ARMBuildAttrs::Allowed); + + if (STI.hasFeature(ARM::FeatureTrustZone) && + STI.hasFeature(ARM::FeatureVirtualization)) + emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowTZVirtualization); + else if (STI.hasFeature(ARM::FeatureTrustZone)) + emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZ); + else if (STI.hasFeature(ARM::FeatureVirtualization)) + emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowVirtualization); +} diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index fc083b98395b..d0fd366ab9ed 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -83,13 +83,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // ADJCALLSTACKUP -> add, sp, sp, amount MachineInstr &Old = *I; DebugLoc dl = Old.getDebugLoc(); - unsigned Amount = Old.getOperand(0).getImm(); + unsigned Amount = TII.getFrameSize(Old); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned Align = getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; + Amount = alignTo(Amount, getStackAlignment()); // Replace the pseudo instruction with a new instruction... unsigned Opc = Old.getOpcode(); |