aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td43
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp166
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h23
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td6
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp50
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp128
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp66
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td54
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp14
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h6
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp11
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp180
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp5
21 files changed, 426 insertions, 364 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index 57f9d1c6b610..005b74a68af3 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -67,8 +67,9 @@ def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
[FeatureFPARMv8]>;
def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
"Restrict FP to 16 double registers">;
-def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
- "Enable divide instructions">;
+def FeatureHWDivThumb : SubtargetFeature<"hwdiv", "HasHardwareDivideInThumb",
+ "true",
+ "Enable divide instructions in Thumb">;
def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
"HasHardwareDivideInARM", "true",
"Enable divide instructions in ARM mode">;
@@ -225,7 +226,7 @@ def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
def FeatureVirtualization : SubtargetFeature<"virtualization",
"HasVirtualization", "true",
"Supports Virtualization extension",
- [FeatureHWDiv, FeatureHWDivARM]>;
+ [FeatureHWDivThumb, FeatureHWDivARM]>;
// M-series ISA
def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass",
@@ -433,21 +434,21 @@ def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops,
def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops,
FeatureDB,
FeatureDSP,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureRClass]>;
def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops,
FeatureThumb2,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureMClass]>;
def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops,
FeatureThumb2,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureMClass,
FeatureDSP]>;
@@ -502,7 +503,7 @@ def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline",
[HasV8MBaselineOps,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureV7Clrex,
Feature8MSecExt,
FeatureAcquireRelease,
@@ -512,7 +513,7 @@ def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline",
[HasV8MMainlineOps,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
Feature8MSecExt,
FeatureAcquireRelease,
FeatureMClass]>;
@@ -678,7 +679,7 @@ def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
FeatureFP16,
FeatureAvoidPartialCPSR,
FeatureVFP4,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM]>;
def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
@@ -686,7 +687,7 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
FeatureNEONForFP,
FeatureVFP4,
FeatureMP,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureAvoidMOVsShOp,
@@ -768,39 +769,39 @@ def : ProcNoItin<"cortex-m33", [ARMv8mMainline,
FeatureVFPOnlySP]>;
def : ProcNoItin<"cortex-a32", [ARMv8a,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC,
FeatureFPAO]>;
def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC,
FeatureFPAO]>;
def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
@@ -811,7 +812,7 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureNEONForFP,
FeatureVFP4,
FeatureMP,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureAvoidMOVsShOp,
@@ -820,25 +821,25 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureZCZeroing]>;
def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"kryo", [ARMv8a, ProcKryo,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index eb0d410b596b..14e197f477f1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -589,12 +589,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
ATS.finishAttributeSection();
}
-static bool isV8M(const ARMSubtarget *Subtarget) {
- // Note that v8M Baseline is a subset of v6T2!
- return (Subtarget->hasV8MBaselineOps() && !Subtarget->hasV6T2Ops()) ||
- Subtarget->hasV8MMainlineOps();
-}
-
//===----------------------------------------------------------------------===//
// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
// FIXME:
@@ -602,39 +596,6 @@ static bool isV8M(const ARMSubtarget *Subtarget) {
// to appear in the .ARM.attributes section in ELF.
// Instead of subclassing the MCELFStreamer, we do the work here.
-static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
- const ARMSubtarget *Subtarget) {
- if (CPU == "xscale")
- return ARMBuildAttrs::v5TEJ;
-
- if (Subtarget->hasV8Ops()) {
- if (Subtarget->isRClass())
- return ARMBuildAttrs::v8_R;
- return ARMBuildAttrs::v8_A;
- } else if (Subtarget->hasV8MMainlineOps())
- return ARMBuildAttrs::v8_M_Main;
- else if (Subtarget->hasV7Ops()) {
- if (Subtarget->isMClass() && Subtarget->hasDSP())
- return ARMBuildAttrs::v7E_M;
- return ARMBuildAttrs::v7;
- } else if (Subtarget->hasV6T2Ops())
- return ARMBuildAttrs::v6T2;
- else if (Subtarget->hasV8MBaselineOps())
- return ARMBuildAttrs::v8_M_Base;
- else if (Subtarget->hasV6MOps())
- return ARMBuildAttrs::v6S_M;
- else if (Subtarget->hasV6Ops())
- return ARMBuildAttrs::v6;
- else if (Subtarget->hasV5TEOps())
- return ARMBuildAttrs::v5TE;
- else if (Subtarget->hasV5TOps())
- return ARMBuildAttrs::v5T;
- else if (Subtarget->hasV4TOps())
- return ARMBuildAttrs::v4T;
- else
- return ARMBuildAttrs::v4;
-}
-
// Returns true if all functions have the same function attribute value.
// It also returns true when the module has no functions.
static bool checkFunctionsAttributeConsistency(const Module &M, StringRef Attr,
@@ -671,89 +632,8 @@ void ARMAsmPrinter::emitAttributes() {
static_cast<const ARMBaseTargetMachine &>(TM);
const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian());
- const std::string &CPUString = STI.getCPUString();
-
- if (!StringRef(CPUString).startswith("generic")) {
- // FIXME: remove krait check when GNU tools support krait cpu
- if (STI.isKrait()) {
- ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
- // We consider krait as a "cortex-a9" + hwdiv CPU
- // Enable hwdiv through ".arch_extension idiv"
- if (STI.hasDivide() || STI.hasDivideInARMMode())
- ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM);
- } else
- ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
- }
-
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(CPUString, &STI));
-
- // Tag_CPU_arch_profile must have the default value of 0 when "Architecture
- // profile is not applicable (e.g. pre v7, or cross-profile code)".
- if (STI.hasV7Ops() || isV8M(&STI)) {
- if (STI.isAClass()) {
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::ApplicationProfile);
- } else if (STI.isRClass()) {
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::RealTimeProfile);
- } else if (STI.isMClass()) {
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::MicroControllerProfile);
- }
- }
-
- ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use,
- STI.hasARMOps() ? ARMBuildAttrs::Allowed
- : ARMBuildAttrs::Not_Allowed);
- if (isV8M(&STI)) {
- ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumbDerived);
- } else if (STI.isThumb1Only()) {
- ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed);
- } else if (STI.hasThumb2()) {
- ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumb32);
- }
-
- if (STI.hasNEON()) {
- /* NEON is not exactly a VFP architecture, but GAS emit one of
- * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
- if (STI.hasFPARMv8()) {
- if (STI.hasCrypto())
- ATS.emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8);
- else
- ATS.emitFPU(ARM::FK_NEON_FP_ARMV8);
- } else if (STI.hasVFP4())
- ATS.emitFPU(ARM::FK_NEON_VFPV4);
- else
- ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON);
- // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
- if (STI.hasV8Ops())
- ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a:
- ARMBuildAttrs::AllowNeonARMv8);
- } else {
- if (STI.hasFPARMv8())
- // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
- // FPU, but there are two different names for it depending on the CPU.
- ATS.emitFPU(STI.hasD16()
- ? (STI.isFPOnlySP() ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16)
- : ARM::FK_FP_ARMV8);
- else if (STI.hasVFP4())
- ATS.emitFPU(STI.hasD16()
- ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16)
- : ARM::FK_VFPV4);
- else if (STI.hasVFP3())
- ATS.emitFPU(STI.hasD16()
- // +d16
- ? (STI.isFPOnlySP()
- ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD)
- : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16))
- // -d16
- : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3));
- else if (STI.hasVFP2())
- ATS.emitFPU(ARM::FK_VFPV2);
- }
+ // Emit build attributes for the available hardware.
+ ATS.emitTargetAttributes(STI);
// RW data addressing.
if (isPositionIndependent()) {
@@ -846,32 +726,15 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
ARMBuildAttrs::AllowIEEE754);
- if (STI.allowsUnalignedMem())
- ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
- ARMBuildAttrs::Allowed);
- else
- ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
- ARMBuildAttrs::Not_Allowed);
-
// FIXME: add more flags to ARMBuildAttributes.h
// 8-bytes alignment stuff.
ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1);
ATS.emitAttribute(ARMBuildAttrs::ABI_align_preserved, 1);
- // ABI_HardFP_use attribute to indicate single precision FP.
- if (STI.isFPOnlySP())
- ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
- ARMBuildAttrs::HardFPSinglePrecision);
-
// Hard float. Use both S and D registers and conform to AAPCS-VFP.
if (STI.isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard)
ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS);
- // FIXME: Should we signal R9 usage?
-
- if (STI.hasFP16())
- ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
-
// FIXME: To support emitting this build attribute as GCC does, the
// -mfp16-format option and associated plumbing must be
// supported. For now the __fp16 type is exposed by default, so this
@@ -879,21 +742,6 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format,
ARMBuildAttrs::FP16FormatIEEE);
- if (STI.hasMPExtension())
- ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
-
- // Hardware divide in ARM mode is part of base arch, starting from ARMv8.
- // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
- // It is not possible to produce DisallowDIV: if hwdiv is present in the base
- // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits.
- // AllowDIVExt is only emitted if hwdiv isn't available in the base arch;
- // otherwise, the default value (AllowDIVIfExists) applies.
- if (STI.hasDivideInARMMode() && !STI.hasV8Ops())
- ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
-
- if (STI.hasDSP() && isV8M(&STI))
- ATS.emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed);
-
if (MMI) {
if (const Module *SourceModule = MMI->getModule()) {
// ABI_PCS_wchar_t to indicate wchar_t width
@@ -930,16 +778,6 @@ void ARMAsmPrinter::emitAttributes() {
else
ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
ARMBuildAttrs::R9IsGPR);
-
- if (STI.hasTrustZone() && STI.hasVirtualization())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowTZVirtualization);
- else if (STI.hasTrustZone())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowTZ);
- else if (STI.hasVirtualization())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowVirtualization);
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 23777b821f9f..faf1c631a3a7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -404,6 +404,29 @@ public:
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
bool isSwiftFastImmShift(const MachineInstr *MI) const;
+
+ /// Returns predicate register associated with the given frame instruction.
+ unsigned getFramePred(const MachineInstr &MI) const {
+ assert(isFrameInstr(MI));
+ if (isFrameSetup(MI))
+ // Operands of ADJCALLSTACKDOWN:
+ // - argument declared in ADJCALLSTACKDOWN pattern:
+ // 0 - frame size
+ // 1 - predicate code (like ARMCC::AL)
+ // - added by predOps:
+ // 2 - predicate reg
+ return MI.getOperand(2).getReg();
+ assert(MI.getOpcode() == ARM::ADJCALLSTACKUP ||
+ MI.getOpcode() == ARM::tADJCALLSTACKUP);
+ // Operands of ADJCALLSTACKUP:
+ // - argument declared in ADJCALLSTACKUP pattern:
+ // 0 - frame size
+ // 1 - arg of CALLSEQ_END
+ // 2 - predicate code
+ // - added by predOps:
+ // 3 - predicate reg
+ return MI.getOperand(3).getReg();
+ }
};
/// Get the operands corresponding to the given \p Pred value. By default, the
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index 7a7b7fede7c8..bc7afdb7f1c9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -273,9 +273,9 @@ def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
(sub CSR_AAPCS_ThisReturn, R9))>;
-def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP,
- (sequence "R%u", 12, 1),
- (sequence "D%u", 31, 0))>;
+def CSR_iOS_TLSCall
+ : CalleeSavedRegs<(add LR, SP, (sub(sequence "R%u", 12, 1), R9, R12),
+ (sequence "D%u", 31, 0))>;
// C++ TLS access function saves all registers except SP. Try to match
// the order of CSRs in CSR_iOS.
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 23722f1b7f3f..6434df317aa8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1741,10 +1741,9 @@ bool ARMConstantIslands::undoLRSpillRestore() {
.add(MI->getOperand(1));
MI->eraseFromParent();
MadeChange = true;
- }
- if (MI->getOpcode() == ARM::tPUSH &&
- MI->getOperand(2).getReg() == ARM::LR &&
- MI->getNumExplicitOperands() == 3) {
+ } else if (MI->getOpcode() == ARM::tPUSH &&
+ MI->getOperand(2).getReg() == ARM::LR &&
+ MI->getNumExplicitOperands() == 3) {
// Just remove the push.
MI->eraseFromParent();
MadeChange = true;
@@ -2158,6 +2157,15 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
// If we're in PIC mode, there should be another ADD following.
auto *TRI = STI->getRegisterInfo();
+
+ // %base cannot be redefined after the load as it will appear before
+ // TBB/TBH like:
+ // %base =
+ // %base =
+ // tBB %base, %idx
+ if (registerDefinedBetween(BaseReg, Load->getNextNode(), MBB->end(), TRI))
+ continue;
+
if (isPositionIndependentOrROPI) {
MachineInstr *Add = Load->getNextNode();
if (Add->getOpcode() != ARM::tADDrr ||
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 01e062bd185c..e9bc7db66fa4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -1702,7 +1702,8 @@ bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
// If we have integer div support we should have selected this automagically.
// In case we have a real miss go ahead and return false and we'll pick
// it up later.
- if (Subtarget->hasDivide()) return false;
+ if (Subtarget->hasDivideInThumbMode())
+ return false;
// Otherwise emit a libcall.
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 37be22bed540..70dbe1bc5b95 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -322,6 +322,18 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
}
}
+/// We need the offset of the frame pointer relative to other MachineFrameInfo
+/// offsets which are encoded relative to SP at function begin.
+/// See also emitPrologue() for how the FP is set up.
+/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
+/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
+/// this to produce a conservative estimate that we check in an assert() later.
+static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
+ // This is a conservative estimation: Assume the frame pointer being r7 and
+ // pc("r15") up to r8 getting spilled before (= 8 registers).
+ return -AFI.getArgRegsSaveSize() - (8 * 4);
+}
+
void ARMFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -432,8 +444,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
int FramePtrOffsetInPush = 0;
if (HasFP) {
- FramePtrOffsetInPush =
- MFI.getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;
+ int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
+ assert(getMaxFPOffset(*MF.getFunction(), *AFI) <= FPOffset &&
+ "Max FP estimation is wrong");
+ FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
}
@@ -1700,6 +1714,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// worth the effort and added fragility?
unsigned EstimatedStackSize =
MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
+
+ // Determine biggest (positive) SP offset in MachineFrameInfo.
+ int MaxFixedOffset = 0;
+ for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
+ int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
+ MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
+ }
+
bool HasFP = hasFP(MF);
if (HasFP) {
if (AFI->hasStackFrame())
@@ -1707,15 +1729,20 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
} else {
// If FP is not used, SP will be used to access arguments, so count the
// size of arguments into the estimation.
- EstimatedStackSize += AFI->getArgumentStackSize();
+ EstimatedStackSize += MaxFixedOffset;
}
EstimatedStackSize += 16; // For possible paddings.
- bool BigStack = EstimatedStackSize >= estimateRSStackSizeLimit(MF, this) ||
- MFI.hasVarSizedObjects() ||
- (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
+ unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
+ int MaxFPOffset = getMaxFPOffset(*MF.getFunction(), *AFI);
+ bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit ||
+ MFI.hasVarSizedObjects() ||
+ (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) ||
+ // For large argument stacks fp relative addressed may overflow.
+ (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit);
bool ExtraCSSpill = false;
- if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
+ if (BigFrameOffsets ||
+ !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
AFI->setHasStackFrame(true);
if (HasFP) {
@@ -1899,7 +1926,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// callee-saved register or reserve a special spill slot to facilitate
// register scavenging. Thumb1 needs a spill slot for stack pointer
// adjustments also, even when the frame itself is small.
- if (BigStack && !ExtraCSSpill) {
+ if (BigFrameOffsets && !ExtraCSSpill) {
// If any non-reserved CS register isn't spilled, just spill one or two
// extra. That should take care of it!
unsigned NumExtras = TargetAlign / 4;
@@ -1958,7 +1985,7 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
// ADJCALLSTACKUP -> add, sp, sp, amount
MachineInstr &Old = *I;
DebugLoc dl = Old.getDebugLoc();
- unsigned Amount = Old.getOperand(0).getImm();
+ unsigned Amount = TII.getFrameSize(Old);
if (Amount != 0) {
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
@@ -1976,14 +2003,11 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
ARMCC::CondCodes Pred =
(PIdx == -1) ? ARMCC::AL
: (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
+ unsigned PredReg = TII.getFramePred(Old);
if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
- // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
- unsigned PredReg = Old.getOperand(2).getReg();
emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
Pred, PredReg);
} else {
- // Note: PredReg is operand 3 for ADJCALLSTACKUP.
- unsigned PredReg = Old.getOperand(3).getReg();
assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
Pred, PredReg);
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index b07b4e1f5cfb..e9df9449103c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -228,11 +228,6 @@ private:
const uint16_t *DOpcodes,
const uint16_t *QOpcodes = nullptr);
- /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
- /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
- /// generated to force the table registers to be consecutive.
- void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
-
/// Try to select SBFX/UBFX instructions for ARM.
bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
@@ -544,11 +539,11 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
SDValue NewMulConst;
if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
HandleSDNode Handle(N);
+ SDLoc Loc(N);
replaceDAGValue(N.getOperand(1), NewMulConst);
BaseReg = Handle.getValue();
- Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
- PowerOfTwo),
- SDLoc(N), MVT::i32);
+ Opc = CurDAG->getTargetConstant(
+ ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
return true;
}
}
@@ -1859,6 +1854,14 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
return Opc; // If not one we handle, return it unchanged.
}
+/// Returns true if the given increment is a Constant known to be equal to the
+/// access size performed by a NEON load/store. This means the "[rN]!" form can
+/// be used.
+static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
+ auto C = dyn_cast<ConstantSDNode>(Inc);
+ return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
+}
+
void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes0,
@@ -1926,13 +1929,13 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue Inc = N->getOperand(AddrOpIdx + 1);
// FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
+ bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
+ if ((NumVecs <= 2) && !IsImmUpdate)
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
- if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
- !isa<ConstantSDNode>(Inc.getNode()))
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate)
+ Ops.push_back(IsImmUpdate ? Reg0 : Inc);
}
Ops.push_back(Pred);
Ops.push_back(Reg0);
@@ -2080,11 +2083,12 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue Inc = N->getOperand(AddrOpIdx + 1);
// FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+ bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
+ if (NumVecs <= 2 && !IsImmUpdate)
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
- if (!isa<ConstantSDNode>(Inc.getNode()))
+ if (!IsImmUpdate)
Ops.push_back(Inc);
else if (NumVecs > 2 && !isVSTfixed(Opc))
Ops.push_back(Reg0);
@@ -2214,7 +2218,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ bool IsImmUpdate =
+ isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
+ Ops.push_back(IsImmUpdate ? Reg0 : Inc);
}
SDValue SuperReg;
@@ -2318,9 +2324,11 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
// fixed-stride update instructions don't have an explicit writeback
// operand. It's implicit in the opcode itself.
SDValue Inc = N->getOperand(2);
- if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+ bool IsImmUpdate =
+ isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
+ if (NumVecs <= 2 && !IsImmUpdate)
Opc = getVLDSTRegisterUpdateOpcode(Opc);
- if (!isa<ConstantSDNode>(Inc.getNode()))
+ if (!IsImmUpdate)
Ops.push_back(Inc);
// FIXME: VLD3 and VLD4 haven't been updated to that form yet.
else if (NumVecs > 2)
@@ -2356,39 +2364,6 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
CurDAG->RemoveDeadNode(N);
}
-void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
- unsigned Opc) {
- assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- unsigned FirstTblReg = IsExt ? 2 : 1;
-
- // Form a REG_SEQUENCE to force register allocation.
- SDValue RegSeq;
- SDValue V0 = N->getOperand(FirstTblReg + 0);
- SDValue V1 = N->getOperand(FirstTblReg + 1);
- if (NumVecs == 2)
- RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
- else {
- SDValue V2 = N->getOperand(FirstTblReg + 2);
- // If it's a vtbl3, form a quad D-register and leave the last part as
- // an undef.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
- : N->getOperand(FirstTblReg + 3);
- RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
- }
-
- SmallVector<SDValue, 6> Ops;
- if (IsExt)
- Ops.push_back(N->getOperand(1));
- Ops.push_back(RegSeq);
- Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
- Ops.push_back(getAL(CurDAG, dl)); // predicate
- Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
- ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
-}
-
bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
if (!Subtarget->hasV6T2Ops())
return false;
@@ -3730,59 +3705,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
break;
}
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- switch (IntNo) {
- default:
- break;
-
- case Intrinsic::arm_neon_vtbl2:
- SelectVTBL(N, false, 2, ARM::VTBL2);
- return;
- case Intrinsic::arm_neon_vtbl3:
- SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
- return;
- case Intrinsic::arm_neon_vtbl4:
- SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
- return;
-
- case Intrinsic::arm_neon_vtbx2:
- SelectVTBL(N, true, 2, ARM::VTBX2);
- return;
- case Intrinsic::arm_neon_vtbx3:
- SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
- return;
- case Intrinsic::arm_neon_vtbx4:
- SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
- return;
- }
- break;
- }
-
- case ARMISD::VTBL1: {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- SDValue Ops[] = {N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), // Predicate
- CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
- ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops));
- return;
- }
- case ARMISD::VTBL2: {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
-
- // Form a REG_SEQUENCE to force register allocation.
- SDValue V0 = N->getOperand(0);
- SDValue V1 = N->getOperand(1);
- SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
-
- SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate
- CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
- ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops));
- return;
- }
-
case ISD::ATOMIC_CMP_SWAP:
SelectCMP_SWAP(N);
return;
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index e697c8ca5339..165e9b7378c7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -852,7 +852,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide()
+ bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
: Subtarget->hasDivideInARMMode();
if (!hasDivide) {
// These are expanded into libcalls if the cpu doesn't have HW divider.
@@ -860,7 +860,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i32, LibCall);
}
- if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) {
+ if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
setOperationAction(ISD::SDIV, MVT::i32, Custom);
setOperationAction(ISD::UDIV, MVT::i32, Custom);
@@ -2633,7 +2633,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
return true;
}
-bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
if (!Subtarget->supportsTailCall())
return false;
@@ -3347,6 +3347,12 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::arm_neon_vtbl1:
+ return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::arm_neon_vtbl2:
+ return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
}
}
@@ -10867,11 +10873,8 @@ static SDValue CombineBaseUpdate(SDNode *N,
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
- if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
- uint64_t IncVal = CInc->getZExtValue();
- if (IncVal != NumBytes)
- continue;
- } else if (NumBytes >= 3 * 16) {
+ ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
+ if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) {
// VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
// separate instructions that make it harder to use a non-constant update.
continue;
@@ -11688,34 +11691,6 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
- APInt &KnownOne) {
- if (Op.getOpcode() == ARMISD::BFI) {
- // Conservatively, we can recurse down the first operand
- // and just mask out all affected bits.
- computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
-
- // The operand to BFI is already a mask suitable for removing the bits it
- // sets.
- ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
- const APInt &Mask = CI->getAPIntValue();
- KnownZero &= Mask;
- KnownOne &= Mask;
- return;
- }
- if (Op.getOpcode() == ARMISD::CMOV) {
- APInt KZ2(KnownZero.getBitWidth(), 0);
- APInt KO2(KnownOne.getBitWidth(), 0);
- computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
- computeKnownBits(DAG, Op.getOperand(1), KZ2, KO2);
-
- KnownZero &= KZ2;
- KnownOne &= KO2;
- return;
- }
- return DAG.computeKnownBits(Op, KnownZero, KnownOne);
-}
-
SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
// If we have a CMOV, OR and AND combination such as:
// if (x & CN)
@@ -11777,7 +11752,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
// Lastly, can we determine that the bits defined by OrCI
// are zero in Y?
APInt KnownZero, KnownOne;
- computeKnownBits(DAG, Y, KnownZero, KnownOne);
+ DAG.computeKnownBits(Y, KnownZero, KnownOne);
if ((OrCI & KnownZero) != OrCI)
return SDValue();
@@ -12657,6 +12632,19 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
}
}
+ case ARMISD::BFI: {
+ // Conservatively, we can recurse down the first operand
+ // and just mask out all affected bits.
+ DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth + 1);
+
+ // The operand to BFI is already a mask suitable for removing the bits it
+ // sets.
+ ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
+ const APInt &Mask = CI->getAPIntValue();
+ KnownZero &= Mask;
+ KnownOne &= Mask;
+ return;
+ }
}
}
@@ -13052,7 +13040,9 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
// rem = a - b * div
// return {div, rem}
// This should be lowered into UDIV/SDIV + MLS later on.
- if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() &&
+ bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
+ : Subtarget->hasDivideInARMMode();
+ if (hasDivide && Op->getValueType(0).isSimple() &&
Op->getSimpleValueType(0) == MVT::i32) {
unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
const SDValue Dividend = Op->getOperand(0);
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 70a0b1380ec9..8b54ce430ed2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -717,7 +717,7 @@ class InstrItineraryData;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
- bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
SDValue ARMcc, SDValue CCR, SDValue Cmp,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index cc0e7d4d9c35..703e8071b177 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -259,8 +259,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float conversions">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<"FeatureFullFP16","full half-float">;
-def HasDivide : Predicate<"Subtarget->hasDivide()">,
- AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">;
+def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
+ AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
def HasDSP : Predicate<"Subtarget->hasDSP()">,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 681e235d78f0..9b08c612e16b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -587,6 +587,14 @@ def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
+ SDTCisVT<2, v8i8>]>;
+def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
+ SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
+def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
+def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
+
+
def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
@@ -6443,7 +6451,8 @@ def VTBL1
: N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
(ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
"vtbl", "8", "$Vd, $Vn, $Vm", "",
- [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
+ [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
+
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
@@ -6498,6 +6507,49 @@ def VTBX4Pseudo
IIC_VTBX4, "$orig = $dst", []>;
} // DecoderMethod = "DecodeTBLInstruction"
+def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
+ (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1),
+ v8i8:$Vm))>;
+def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vm)),
+ (v8i8 (VTBX2 v8i8:$orig,
+ (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1),
+ v8i8:$Vm))>;
+
+def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vm)),
+ (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ (v8i8 (IMPLICIT_DEF)), dsub_3),
+ v8i8:$Vm))>;
+def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vm)),
+ (v8i8 (VTBX3Pseudo v8i8:$orig,
+ (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ (v8i8 (IMPLICIT_DEF)), dsub_3),
+ v8i8:$Vm))>;
+
+def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
+ (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ v8i8:$Vn3, dsub_3),
+ v8i8:$Vm))>;
+def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
+ (v8i8 (VTBX4Pseudo v8i8:$orig,
+ (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ v8i8:$Vn3, dsub_3),
+ v8i8:$Vm))>;
+
// VRINT : Vector Rounding
multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index f5b673b78ad7..f710ee6a7e77 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -2797,7 +2797,7 @@ def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">;
def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"sdiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb, HasV8MBaseline]>,
+ Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>,
Sched<[WriteDIV]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011100;
@@ -2809,7 +2809,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"udiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb, HasV8MBaseline]>,
+ Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>,
Sched<[WriteDIV]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011101;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index 8d224d6a70fa..816596b85721 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -299,6 +299,20 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
I.setDesc(TII.get(ARM::ADDrr));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
break;
+ case G_SUB:
+ I.setDesc(TII.get(ARM::SUBrr));
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
+ break;
+ case G_MUL:
+ if (TII.getSubtarget().hasV6Ops()) {
+ I.setDesc(TII.get(ARM::MUL));
+ } else {
+ assert(TII.getSubtarget().useMulOps() && "Unsupported target");
+ I.setDesc(TII.get(ARM::MULv5));
+ MIB->getOperand(0).setIsEarlyClobber(true);
+ }
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
+ break;
case G_FADD:
if (!selectFAdd(MIB, TII, MRI))
return false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index 994bbd673dd8..fe9681439e6b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -43,8 +43,9 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({Op, 1, p0}, Legal);
}
- for (auto Ty : {s1, s8, s16, s32})
- setAction({G_ADD, Ty}, Legal);
+ for (unsigned Op : {G_ADD, G_SUB, G_MUL})
+ for (auto Ty : {s1, s8, s16, s32})
+ setAction({Op, Ty}, Legal);
for (unsigned Op : {G_SEXT, G_ZEXT}) {
setAction({Op, s32}, Legal);
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 08f3da738868..e47bd3a8963e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -219,6 +219,8 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
switch (Opc) {
case G_ADD:
+ case G_SUB:
+ case G_MUL:
case G_SEXT:
case G_ZEXT:
case G_GEP:
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index 40993fc0aa8a..d2630685d91b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -208,8 +208,8 @@ protected:
/// FP registers for VFPv3.
bool HasD16 = false;
- /// HasHardwareDivide - True if subtarget supports [su]div
- bool HasHardwareDivide = false;
+ /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode
+ bool HasHardwareDivideInThumb = false;
/// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
bool HasHardwareDivideInARM = false;
@@ -507,7 +507,7 @@ public:
return hasNEON() && UseNEONForSinglePrecisionFP;
}
- bool hasDivide() const { return HasHardwareDivide; }
+ bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; }
bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
bool hasDataBarrier() const { return HasDataBarrier; }
bool hasV7Clrex() const { return HasV7Clrex; }
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index f421d3ac1693..ada816c16389 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -67,6 +67,9 @@ static cl::opt<ImplicitItModeTy> ImplicitItMode(
clEnumValN(ImplicitItModeTy::ThumbOnly, "thumb",
"Warn in ARM, emit implicit ITs in Thumb")));
+static cl::opt<bool> AddBuildAttributes("arm-add-build-attributes",
+ cl::init(false));
+
class ARMOperand;
enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
@@ -540,6 +543,10 @@ public:
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ // Add build attributes based on the selected target.
+ if (AddBuildAttributes)
+ getTargetStreamer().emitTargetAttributes(STI);
+
// Not in an ITBlock to start with.
ITState.CurPosition = ~0U;
@@ -10189,8 +10196,8 @@ static const struct {
{ ARM::AEK_CRYPTO, Feature_HasV8,
{ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
{ ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} },
- { (ARM::AEK_HWDIV | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
- {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} },
+ { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
+ {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} },
{ ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
{ ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
{ ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} },
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6fa890ba1cd5..4d6c52f3cd49 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -464,7 +464,7 @@ public:
void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes);
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
- LastMappingSymbols[getPreviousSection().first] = std::move(LastEMSInfo);
+ LastMappingSymbols[getCurrentSection().first] = std::move(LastEMSInfo);
MCELFStreamer::ChangeSection(Section, Subsection);
auto LastMappingSymbol = LastMappingSymbols.find(Section);
if (LastMappingSymbol != LastMappingSymbols.end()) {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 73e563890dd9..2b0cd461df7a 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -11,9 +11,13 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMTargetMachine.h"
#include "llvm/MC/ConstantPools.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/TargetParser.h"
using namespace llvm;
@@ -75,3 +79,179 @@ void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {}
void
ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
+
+static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) {
+ if (STI.getCPU() == "xscale")
+ return ARMBuildAttrs::v5TEJ;
+
+ if (STI.hasFeature(ARM::HasV8Ops)) {
+ if (STI.hasFeature(ARM::FeatureRClass))
+ return ARMBuildAttrs::v8_R;
+ return ARMBuildAttrs::v8_A;
+ } else if (STI.hasFeature(ARM::HasV8MMainlineOps))
+ return ARMBuildAttrs::v8_M_Main;
+ else if (STI.hasFeature(ARM::HasV7Ops)) {
+ if (STI.hasFeature(ARM::FeatureMClass) && STI.hasFeature(ARM::FeatureDSP))
+ return ARMBuildAttrs::v7E_M;
+ return ARMBuildAttrs::v7;
+ } else if (STI.hasFeature(ARM::HasV6T2Ops))
+ return ARMBuildAttrs::v6T2;
+ else if (STI.hasFeature(ARM::HasV8MBaselineOps))
+ return ARMBuildAttrs::v8_M_Base;
+ else if (STI.hasFeature(ARM::HasV6MOps))
+ return ARMBuildAttrs::v6S_M;
+ else if (STI.hasFeature(ARM::HasV6Ops))
+ return ARMBuildAttrs::v6;
+ else if (STI.hasFeature(ARM::HasV5TEOps))
+ return ARMBuildAttrs::v5TE;
+ else if (STI.hasFeature(ARM::HasV5TOps))
+ return ARMBuildAttrs::v5T;
+ else if (STI.hasFeature(ARM::HasV4TOps))
+ return ARMBuildAttrs::v4T;
+ else
+ return ARMBuildAttrs::v4;
+}
+
+static bool isV8M(const MCSubtargetInfo &STI) {
+ // Note that v8M Baseline is a subset of v6T2!
+ return (STI.hasFeature(ARM::HasV8MBaselineOps) &&
+ !STI.hasFeature(ARM::HasV6T2Ops)) ||
+ STI.hasFeature(ARM::HasV8MMainlineOps);
+}
+
+/// Emit the build attributes that only depend on the hardware that we expect
+// /to be available, and not on the ABI, or any source-language choices.
+void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
+ switchVendor("aeabi");
+
+ const StringRef CPUString = STI.getCPU();
+ if (!CPUString.empty() && !CPUString.startswith("generic")) {
+ // FIXME: remove krait check when GNU tools support krait cpu
+ if (STI.hasFeature(ARM::ProcKrait)) {
+ emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
+ // We consider krait as a "cortex-a9" + hwdiv CPU
+ // Enable hwdiv through ".arch_extension idiv"
+ if (STI.hasFeature(ARM::FeatureHWDivThumb) ||
+ STI.hasFeature(ARM::FeatureHWDivARM))
+ emitArchExtension(ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM);
+ } else {
+ emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
+ }
+ }
+
+ emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(STI));
+
+ if (STI.hasFeature(ARM::FeatureAClass)) {
+ emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
+ } else if (STI.hasFeature(ARM::FeatureRClass)) {
+ emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::RealTimeProfile);
+ } else if (STI.hasFeature(ARM::FeatureMClass)) {
+ emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::MicroControllerProfile);
+ }
+
+ emitAttribute(ARMBuildAttrs::ARM_ISA_use, STI.hasFeature(ARM::FeatureNoARM)
+ ? ARMBuildAttrs::Not_Allowed
+ : ARMBuildAttrs::Allowed);
+
+ if (isV8M(STI)) {
+ emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumbDerived);
+ } else if (STI.hasFeature(ARM::FeatureThumb2)) {
+ emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ } else if (STI.hasFeature(ARM::HasV4TOps)) {
+ emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed);
+ }
+
+ if (STI.hasFeature(ARM::FeatureNEON)) {
+ /* NEON is not exactly a VFP architecture, but GAS emit one of
+ * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
+ if (STI.hasFeature(ARM::FeatureFPARMv8)) {
+ if (STI.hasFeature(ARM::FeatureCrypto))
+ emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8);
+ else
+ emitFPU(ARM::FK_NEON_FP_ARMV8);
+ } else if (STI.hasFeature(ARM::FeatureVFP4))
+ emitFPU(ARM::FK_NEON_VFPV4);
+ else
+ emitFPU(STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_NEON_FP16
+ : ARM::FK_NEON);
+ // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
+ if (STI.hasFeature(ARM::HasV8Ops))
+ emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ STI.hasFeature(ARM::HasV8_1aOps)
+ ? ARMBuildAttrs::AllowNeonARMv8_1a
+ : ARMBuildAttrs::AllowNeonARMv8);
+ } else {
+ if (STI.hasFeature(ARM::FeatureFPARMv8))
+ // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
+ // FPU, but there are two different names for it depending on the CPU.
+ emitFPU(STI.hasFeature(ARM::FeatureD16)
+ ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16
+ : ARM::FK_FPV5_D16)
+ : ARM::FK_FP_ARMV8);
+ else if (STI.hasFeature(ARM::FeatureVFP4))
+ emitFPU(STI.hasFeature(ARM::FeatureD16)
+ ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16
+ : ARM::FK_VFPV4_D16)
+ : ARM::FK_VFPV4);
+ else if (STI.hasFeature(ARM::FeatureVFP3))
+ emitFPU(
+ STI.hasFeature(ARM::FeatureD16)
+ // +d16
+ ? (STI.hasFeature(ARM::FeatureVFPOnlySP)
+ ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16
+ : ARM::FK_VFPV3XD)
+ : (STI.hasFeature(ARM::FeatureFP16)
+ ? ARM::FK_VFPV3_D16_FP16
+ : ARM::FK_VFPV3_D16))
+ // -d16
+ : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16
+ : ARM::FK_VFPV3));
+ else if (STI.hasFeature(ARM::FeatureVFP2))
+ emitFPU(ARM::FK_VFPV2);
+ }
+
+ // ABI_HardFP_use attribute to indicate single precision FP.
+ if (STI.hasFeature(ARM::FeatureVFPOnlySP))
+ emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
+ ARMBuildAttrs::HardFPSinglePrecision);
+
+ if (STI.hasFeature(ARM::FeatureFP16))
+ emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
+
+ if (STI.hasFeature(ARM::FeatureMP))
+ emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
+
+ // Hardware divide in ARM mode is part of base arch, starting from ARMv8.
+ // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
+ // It is not possible to produce DisallowDIV: if hwdiv is present in the base
+ // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits.
+ // AllowDIVExt is only emitted if hwdiv isn't available in the base arch;
+ // otherwise, the default value (AllowDIVIfExists) applies.
+ if (STI.hasFeature(ARM::FeatureHWDivARM) && !STI.hasFeature(ARM::HasV8Ops))
+ emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
+
+ if (STI.hasFeature(ARM::FeatureDSP) && isV8M(STI))
+ emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed);
+
+ if (STI.hasFeature(ARM::FeatureStrictAlign))
+ emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+ ARMBuildAttrs::Not_Allowed);
+ else
+ emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+ ARMBuildAttrs::Allowed);
+
+ if (STI.hasFeature(ARM::FeatureTrustZone) &&
+ STI.hasFeature(ARM::FeatureVirtualization))
+ emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowTZVirtualization);
+ else if (STI.hasFeature(ARM::FeatureTrustZone))
+ emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZ);
+ else if (STI.hasFeature(ARM::FeatureVirtualization))
+ emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowVirtualization);
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index fc083b98395b..d0fd366ab9ed 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -83,13 +83,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// ADJCALLSTACKUP -> add, sp, sp, amount
MachineInstr &Old = *I;
DebugLoc dl = Old.getDebugLoc();
- unsigned Amount = Old.getOperand(0).getImm();
+ unsigned Amount = TII.getFrameSize(Old);
if (Amount != 0) {
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned Align = getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
+ Amount = alignTo(Amount, getStackAlignment());
// Replace the pseudo instruction with a new instruction...
unsigned Opc = Old.getOpcode();