diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 21:25:48 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 21:25:48 +0000 |
commit | d88c1a5a572cdb661c111098831fa526e933756f (patch) | |
tree | 97b32c3372106ac47ded3d1a99f9c023a8530073 /contrib/llvm/lib/Target/ARM | |
parent | 715652a404ee99f10c09c0a5edbb5883961b8c25 (diff) | |
parent | b915e9e0fc85ba6f398b3fab0db6a81a8913af94 (diff) |
Update llvm to trunk r290819 and resolve conflicts.
Notes
Notes:
svn path=/projects/clang400-import/; revision=311142
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
69 files changed, 5669 insertions, 1980 deletions
diff --git a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp index 9228cc2d7a9c..89859ba063d9 100644 --- a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp @@ -52,9 +52,7 @@ namespace { bool runOnMachineFunction(MachineFunction &Fn) override; - const char *getPassName() const override { - return "ARM A15 S->D optimizer"; - } + StringRef getPassName() const override { return "ARM A15 S->D optimizer"; } private: const ARMBaseInstrInfo *TII; diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h index 690ff86a0c86..be3048252bbc 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.h +++ b/contrib/llvm/lib/Target/ARM/ARM.h @@ -16,6 +16,7 @@ #define LLVM_LIB_TARGET_ARM_ARM_H #include "llvm/Support/CodeGen.h" +#include "ARMBasicBlockInfo.h" #include <functional> namespace llvm { @@ -46,6 +47,10 @@ FunctionPass *createThumb2SizeReductionPass( void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); +void computeBlockSize(MachineFunction *MF, MachineBasicBlock *MBB, + BasicBlockInfo &BBI); +std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF); + void initializeARMLoadStoreOptPass(PassRegistry &); void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index ef626b66a1e7..2a090faeee6a 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -99,6 +99,8 @@ def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", // Not to be confused with FeatureHasRetAddrStack (return address stack) def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", "Enable Reliability, Availability and Serviceability extensions">; +def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", + "Enable fast computation of positive address offsets">; // Cyclone has preferred instructions for zeroing VFP registers, which can @@ -295,7 +297,8 @@ def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", FeatureV7Clrex]>; def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", "Support ARM v8 instructions", - [HasV7Ops, FeatureAcquireRelease]>; + [HasV7Ops, FeatureAcquireRelease, + FeatureT2XtPk]>; def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", "Support ARM v8.1a instructions", [HasV8Ops]>; @@ -352,6 +355,8 @@ def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", "Cortex-R5 ARM processors", []>; def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7", "Cortex-R7 ARM processors", []>; +def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52", + "Cortex-R52 ARM processors", []>; def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3", "Cortex-M3 ARM processors", []>; @@ -388,7 +393,8 @@ def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>; def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops]>; def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops, - FeatureDSP]>; + FeatureDSP, + FeatureT2XtPk]>; def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>; @@ -409,13 +415,15 @@ def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops, FeatureNEON, FeatureDB, FeatureDSP, - FeatureAClass]>; + FeatureAClass, + FeatureT2XtPk]>; def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops, FeatureDB, FeatureDSP, FeatureHWDiv, - FeatureRClass]>; + FeatureRClass, + FeatureT2XtPk]>; def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, FeatureThumb2, @@ -470,6 +478,19 @@ def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps, FeatureCRC, FeatureRAS]>; +def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, + FeatureRClass, + FeatureDB, + FeatureHWDiv, + FeatureHWDivARM, + FeatureT2XtPk, + FeatureDSP, + FeatureCRC, + FeatureMP, + FeatureVirtualization, + FeatureFPARMv8, + FeatureNEON]>; + def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline", [HasV8MBaselineOps, FeatureNoARM, @@ -570,7 +591,6 @@ def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5, FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, FeatureVMLxForwarding, - FeatureT2XtPk, FeatureMP, FeatureVFP4]>; @@ -581,7 +601,6 @@ def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7, FeatureHasVMLxHazards, FeatureHasSlowFPVMLx, FeatureVMLxForwarding, - FeatureT2XtPk, FeatureMP, FeatureVFP4, FeatureHWDiv, @@ -595,15 +614,13 @@ def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8, FeatureSlowFPBrcc, FeatureHasVMLxHazards, FeatureHasSlowFPVMLx, - FeatureVMLxForwarding, - FeatureT2XtPk]>; + FeatureVMLxForwarding]>; def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9, FeatureHasRetAddrStack, FeatureTrustZone, FeatureHasVMLxHazards, FeatureVMLxForwarding, - FeatureT2XtPk, FeatureFP16, FeatureAvoidPartialCPSR, FeatureExpandMLx, @@ -618,7 +635,6 @@ def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, FeatureHasRetAddrStack, FeatureTrustZone, FeatureVMLxForwarding, - FeatureT2XtPk, FeatureVFP4, FeatureHWDiv, FeatureHWDivARM, @@ -632,7 +648,6 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, FeatureHasRetAddrStack, FeatureMuxedUnits, FeatureTrustZone, - FeatureT2XtPk, FeatureVFP4, FeatureMP, FeatureCheckVLDnAlign, @@ -647,7 +662,6 @@ def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, FeatureTrustZone, FeatureMP, FeatureVMLxForwarding, - FeatureT2XtPk, FeatureVFP4, FeatureHWDiv, FeatureHWDivARM, @@ -662,7 +676,6 @@ def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, FeatureMuxedUnits, FeatureCheckVLDnAlign, FeatureVMLxForwarding, - FeatureT2XtPk, FeatureFP16, FeatureAvoidPartialCPSR, FeatureVFP4, @@ -672,7 +685,6 @@ def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, FeatureHasRetAddrStack, FeatureNEONForFP, - FeatureT2XtPk, FeatureVFP4, FeatureMP, FeatureHWDiv, @@ -691,8 +703,7 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, // FIXME: R4 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, FeatureHasRetAddrStack, - FeatureAvoidPartialCPSR, - FeatureT2XtPk]>; + FeatureAvoidPartialCPSR]>; // FIXME: R4F has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, @@ -701,8 +712,7 @@ def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, FeatureHasSlowFPVMLx, FeatureVFP3, FeatureD16, - FeatureAvoidPartialCPSR, - FeatureT2XtPk]>; + FeatureAvoidPartialCPSR]>; // FIXME: R5 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, @@ -712,8 +722,7 @@ def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, FeatureSlowFPBrcc, FeatureHWDivARM, FeatureHasSlowFPVMLx, - FeatureAvoidPartialCPSR, - FeatureT2XtPk]>; + FeatureAvoidPartialCPSR]>; // FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5. def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, @@ -725,8 +734,7 @@ def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, FeatureSlowFPBrcc, FeatureHWDivARM, FeatureHasSlowFPVMLx, - FeatureAvoidPartialCPSR, - FeatureT2XtPk]>; + FeatureAvoidPartialCPSR]>; def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, FeatureHasRetAddrStack, @@ -737,8 +745,7 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, FeatureSlowFPBrcc, FeatureHWDivARM, FeatureHasSlowFPVMLx, - FeatureAvoidPartialCPSR, - FeatureT2XtPk]>; + FeatureAvoidPartialCPSR]>; def : ProcNoItin<"cortex-m3", [ARMv7m, ProcM3]>; def : ProcNoItin<"sc300", [ARMv7m, ProcM3]>; @@ -755,42 +762,38 @@ def : ProcNoItin<"cortex-m7", [ARMv7em, def : ProcNoItin<"cortex-a32", [ARMv8a, FeatureHWDiv, FeatureHWDivARM, - FeatureT2XtPk, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35, FeatureHWDiv, FeatureHWDivARM, - FeatureT2XtPk, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, FeatureHWDiv, FeatureHWDivARM, - FeatureT2XtPk, FeatureCrypto, - FeatureCRC]>; + FeatureCRC, + FeatureFPAO]>; def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57, FeatureHWDiv, FeatureHWDivARM, - FeatureT2XtPk, FeatureCrypto, - FeatureCRC]>; + FeatureCRC, + FeatureFPAO]>; def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, FeatureHWDiv, FeatureHWDivARM, - FeatureT2XtPk, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, FeatureHWDiv, FeatureHWDivARM, - FeatureT2XtPk, FeatureCrypto, FeatureCRC]>; @@ -798,7 +801,6 @@ def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureHasRetAddrStack, FeatureNEONForFP, - FeatureT2XtPk, FeatureVFP4, FeatureMP, FeatureHWDiv, @@ -812,10 +814,24 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1, FeatureHWDiv, FeatureHWDivARM, - FeatureT2XtPk, FeatureCrypto, FeatureCRC]>; +def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1, + FeatureHWDiv, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1, + FeatureHWDiv, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52, + FeatureFPAO]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 04863a7ecf8f..f20768ab77a5 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -74,8 +74,9 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { if (AFI->isThumbFunction()) { OutStreamer->EmitAssemblerFlag(MCAF_Code16); OutStreamer->EmitThumbFunc(CurrentFnSym); + } else { + OutStreamer->EmitAssemblerFlag(MCAF_Code32); } - OutStreamer->EmitLabel(CurrentFnSym); } @@ -96,6 +97,13 @@ void ARMAsmPrinter::EmitXXStructor(const DataLayout &DL, const Constant *CV) { OutStreamer->EmitValue(E, Size); } +void ARMAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + if (PromotedGlobals.count(GV)) + // The global was promoted into a constant pool. It should not be emitted. + return; + AsmPrinter::EmitGlobalVariable(GV); +} + /// runOnMachineFunction - This uses the EmitInstruction() /// method to print assembly for each instruction. /// @@ -108,6 +116,12 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { const Function* F = MF.getFunction(); const TargetMachine& TM = MF.getTarget(); + // Collect all globals that had their storage promoted to a constant pool. + // Functions are emitted before variables, so this accumulates promoted + // globals from all functions in PromotedGlobals. + for (auto *GV : AFI->getGlobalsPromotedToConstantPool()) + PromotedGlobals.insert(GV); + // Calculate this function's optimization goal. unsigned OptimizationGoal; if (F->hasFnAttribute(Attribute::OptimizeNone)) @@ -150,6 +164,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit the rest of the function body. EmitFunctionBody(); + // Emit the XRay table for this function. + EmitXRayTable(); + // If we need V4T thumb mode Register Indirect Jump pads, emit them. // These are created per function, rather than per TU, since it's // relatively easy to exceed the thumb branch range within a TU. @@ -215,6 +232,8 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, break; } case MachineOperand::MO_ConstantPoolIndex: + if (Subtarget->genExecuteOnly()) + llvm_unreachable("execute-only should not generate constant pools"); GetCPISymbol(MO.getIndex())->print(O, MAI); break; } @@ -249,7 +268,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, << "]"; return false; } - // Fallthrough + LLVM_FALLTHROUGH; case 'c': // Don't print "#" before an immediate operand. if (!MI->getOperand(OpNum).isImm()) return true; @@ -542,11 +561,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { raw_string_ostream OS(Flags); for (const auto &Function : M) - TLOF.emitLinkerFlagsForGlobal(OS, &Function, *Mang); + TLOF.emitLinkerFlagsForGlobal(OS, &Function); for (const auto &Global : M.globals()) - TLOF.emitLinkerFlagsForGlobal(OS, &Global, *Mang); + TLOF.emitLinkerFlagsForGlobal(OS, &Global); for (const auto &Alias : M.aliases()) - TLOF.emitLinkerFlagsForGlobal(OS, &Alias, *Mang); + TLOF.emitLinkerFlagsForGlobal(OS, &Alias); OS.flush(); @@ -588,9 +607,11 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU, if (CPU == "xscale") return ARMBuildAttrs::v5TEJ; - if (Subtarget->hasV8Ops()) + if (Subtarget->hasV8Ops()) { + if (Subtarget->isRClass()) + return ARMBuildAttrs::v8_R; return ARMBuildAttrs::v8_A; - else if (Subtarget->hasV8MMainlineOps()) + } else if (Subtarget->hasV8MMainlineOps()) return ARMBuildAttrs::v8_M_Main; else if (Subtarget->hasV7Ops()) { if (Subtarget->isMClass() && Subtarget->hasDSP()) @@ -614,6 +635,15 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU, return ARMBuildAttrs::v4; } +// Returns true if all functions have the same function attribute value. +// It also returns true when the module has no functions. +static bool checkFunctionsAttributeConsistency(const Module &M, StringRef Attr, + StringRef Value) { + return !any_of(M, [&](const Function &F) { + return F.getFnAttribute(Attr).getValueAsString() != Value; + }); +} + void ARMAsmPrinter::emitAttributes() { MCTargetStreamer &TS = *OutStreamer->getTargetStreamer(); ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); @@ -725,31 +755,48 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitFPU(ARM::FK_VFPV2); } + // RW data addressing. if (isPositionIndependent()) { - // PIC specific attributes. ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data, ARMBuildAttrs::AddressRWPCRel); + } else if (STI.isRWPI()) { + // RWPI specific attributes. + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data, + ARMBuildAttrs::AddressRWSBRel); + } + + // RO data addressing. + if (isPositionIndependent() || STI.isROPI()) { ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RO_data, ARMBuildAttrs::AddressROPCRel); + } + + // GOT use. + if (isPositionIndependent()) { ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use, ARMBuildAttrs::AddressGOT); } else { - // Allow direct addressing of imported data for all other relocation models. ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use, ARMBuildAttrs::AddressDirect); } - // Signal various FP modes. - if (!TM.Options.UnsafeFPMath) { + // Set FP Denormals. + if (checkFunctionsAttributeConsistency(*MMI->getModule(), + "denormal-fp-math", + "preserve-sign") || + TM.Options.FPDenormalMode == FPDenormal::PreserveSign) + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, + ARMBuildAttrs::PreserveFPSign); + else if (checkFunctionsAttributeConsistency(*MMI->getModule(), + "denormal-fp-math", + "positive-zero") || + TM.Options.FPDenormalMode == FPDenormal::PositiveZero) + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, + ARMBuildAttrs::PositiveZero); + else if (!TM.Options.UnsafeFPMath) ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::IEEEDenormals); - ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed); - - // If the user has permitted this code to choose the IEEE 754 - // rounding at run-time, emit the rounding attribute. - if (TM.Options.HonorSignDependentRoundingFPMathOption) - ATS.emitAttribute(ARMBuildAttrs::ABI_FP_rounding, ARMBuildAttrs::Allowed); - } else { + else { if (!STI.hasVFP2()) { // When the target doesn't have an FPU (by design or // intention), the assumptions made on the software support @@ -775,6 +822,21 @@ void ARMAsmPrinter::emitAttributes() { // absence of its emission implies zero). } + // Set FP exceptions and rounding + if (checkFunctionsAttributeConsistency(*MMI->getModule(), + "no-trapping-math", "true") || + TM.Options.NoTrappingFPMath) + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, + ARMBuildAttrs::Not_Allowed); + else if (!TM.Options.UnsafeFPMath) { + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed); + + // If the user has permitted this code to choose the IEEE 754 + // rounding at run-time, emit the rounding attribute. + if (TM.Options.HonorSignDependentRoundingFPMathOption) + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_rounding, ARMBuildAttrs::Allowed); + } + // TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath is the // equivalent of GCC's -ffinite-math-only flag. if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath) @@ -858,14 +920,16 @@ void ARMAsmPrinter::emitAttributes() { } } - // TODO: We currently only support either reserving the register, or treating - // it as another callee-saved register, but not as SB or a TLS pointer; It - // would instead be nicer to push this from the frontend as metadata, as we do - // for the wchar and enum size tags - if (STI.isR9Reserved()) - ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9Reserved); + // We currently do not support using R9 as the TLS pointer. + if (STI.isRWPI()) + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, + ARMBuildAttrs::R9IsSB); + else if (STI.isR9Reserved()) + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, + ARMBuildAttrs::R9Reserved); else - ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9IsGPR); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, + ARMBuildAttrs::R9IsGPR); if (STI.hasTrustZone() && STI.hasVirtualization()) ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, @@ -880,7 +944,7 @@ void ARMAsmPrinter::emitAttributes() { //===----------------------------------------------------------------------===// -static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber, +static MCSymbol *getPICLabel(StringRef Prefix, unsigned FunctionNumber, unsigned LabelId, MCContext &Ctx) { MCSymbol *Label = Ctx.getOrCreateSymbol(Twine(Prefix) @@ -899,6 +963,8 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { return MCSymbolRefExpr::VK_TPOFF; case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_GOTTPOFF; + case ARMCP::SBREL: + return MCSymbolRefExpr::VK_ARM_SBREL; case ARMCP::GOT_PREL: return MCSymbolRefExpr::VK_ARM_GOT_PREL; case ARMCP::SECREL: @@ -954,6 +1020,26 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV); + if (ACPV->isPromotedGlobal()) { + // This constant pool entry is actually a global whose storage has been + // promoted into the constant pool. This global may be referenced still + // by debug information, and due to the way AsmPrinter is set up, the debug + // info is immutable by the time we decide to promote globals to constant + // pools. Because of this, we need to ensure we emit a symbol for the global + // with private linkage (the default) so debug info can refer to it. + // + // However, if this global is promoted into several functions we must ensure + // we don't try and emit duplicate symbols! + auto *ACPC = cast<ARMConstantPoolConstant>(ACPV); + auto *GV = ACPC->getPromotedGlobal(); + if (!EmittedPromotedGlobalLabels.count(GV)) { + MCSymbol *GVSym = getSymbol(GV); + OutStreamer->EmitLabel(GVSym); + EmittedPromotedGlobalLabels.insert(GV); + } + return EmitGlobalConstant(DL, ACPC->getPromotedGlobalInit()); + } + MCSymbol *MCSym; if (ACPV->isLSDA()) { MCSym = getCurExceptionSym(); @@ -973,7 +1059,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { MCSym = MBB->getSymbol(); } else { assert(ACPV->isExtSymbol() && "unrecognized constant pool value"); - const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(); + auto Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(); MCSym = GetExternalSymbolSymbol(Sym); } @@ -1037,7 +1123,7 @@ void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) { // .word (LBB1 - LJTI_0_0) const MCExpr *Expr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); - if (isPositionIndependent()) + if (isPositionIndependent() || Subtarget->isROPI()) Expr = MCBinaryExpr::createSub(Expr, MCSymbolRefExpr::create(JTISymbol, OutContext), OutContext); @@ -1082,6 +1168,9 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI, const MachineOperand &MO1 = MI->getOperand(1); unsigned JTI = MO1.getIndex(); + if (Subtarget->isThumb1Only()) + EmitAlignment(2); + MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); OutStreamer->EmitLabel(JTISymbol); @@ -1628,6 +1717,91 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addReg(0)); return; } + case ARM::tTBB_JT: + case ARM::tTBH_JT: { + + bool Is8Bit = MI->getOpcode() == ARM::tTBB_JT; + unsigned Base = MI->getOperand(0).getReg(); + unsigned Idx = MI->getOperand(1).getReg(); + assert(MI->getOperand(1).isKill() && "We need the index register as scratch!"); + + // Multiply up idx if necessary. + if (!Is8Bit) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri) + .addReg(Idx) + .addReg(ARM::CPSR) + .addReg(Idx) + .addImm(1) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + + if (Base == ARM::PC) { + // TBB [base, idx] = + // ADDS idx, idx, base + // LDRB idx, [idx, #4] ; or LDRH if TBH + // LSLS idx, #1 + // ADDS pc, pc, idx + + // When using PC as the base, it's important that there is no padding + // between the last ADDS and the start of the jump table. The jump table + // is 4-byte aligned, so we ensure we're 4 byte aligned here too. + // + // FIXME: Ideally we could vary the LDRB index based on the padding + // between the sequence and jump table, however that relies on MCExprs + // for load indexes which are currently not supported. + OutStreamer->EmitCodeAlignment(4); + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr) + .addReg(Idx) + .addReg(Idx) + .addReg(Base) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + + unsigned Opc = Is8Bit ? ARM::tLDRBi : ARM::tLDRHi; + EmitToStreamer(*OutStreamer, MCInstBuilder(Opc) + .addReg(Idx) + .addReg(Idx) + .addImm(Is8Bit ? 4 : 2) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + } else { + // TBB [base, idx] = + // LDRB idx, [base, idx] ; or LDRH if TBH + // LSLS idx, #1 + // ADDS pc, pc, idx + + unsigned Opc = Is8Bit ? ARM::tLDRBr : ARM::tLDRHr; + EmitToStreamer(*OutStreamer, MCInstBuilder(Opc) + .addReg(Idx) + .addReg(Base) + .addReg(Idx) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + } + + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri) + .addReg(Idx) + .addReg(ARM::CPSR) + .addReg(Idx) + .addImm(1) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm())); + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr) + .addReg(ARM::PC) + .addReg(ARM::PC) + .addReg(Idx) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + return; + } case ARM::tBR_JTr: case ARM::BR_JTr: { // Lower and emit the instruction itself, then the jump table following it. @@ -1961,6 +2135,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addReg(0)); return; } + case ARM::PATCHABLE_FUNCTION_ENTER: + LowerPATCHABLE_FUNCTION_ENTER(*MI); + return; + case ARM::PATCHABLE_FUNCTION_EXIT: + LowerPATCHABLE_FUNCTION_EXIT(*MI); + return; + case ARM::PATCHABLE_TAIL_CALL: + LowerPATCHABLE_TAIL_CALL(*MI); + return; } MCInst TmpInst; @@ -1975,8 +2158,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Force static initialization. extern "C" void LLVMInitializeARMAsmPrinter() { - RegisterAsmPrinter<ARMAsmPrinter> X(TheARMLETarget); - RegisterAsmPrinter<ARMAsmPrinter> Y(TheARMBETarget); - RegisterAsmPrinter<ARMAsmPrinter> A(TheThumbLETarget); - RegisterAsmPrinter<ARMAsmPrinter> B(TheThumbBETarget); + RegisterAsmPrinter<ARMAsmPrinter> X(getTheARMLETarget()); + RegisterAsmPrinter<ARMAsmPrinter> Y(getTheARMBETarget()); + RegisterAsmPrinter<ARMAsmPrinter> A(getTheThumbLETarget()); + RegisterAsmPrinter<ARMAsmPrinter> B(getTheThumbBETarget()); } diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h index 97f5ca0ecbc2..ce0b04d56d9e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h @@ -56,12 +56,22 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter { /// -1 if uninitialized, 0 if conflicting goals int OptimizationGoals; + /// List of globals that have had their storage promoted to a constant + /// pool. This lives between calls to runOnMachineFunction and collects + /// data from every MachineFunction. It is used during doFinalization + /// when all non-function globals are emitted. + SmallPtrSet<const GlobalVariable*,2> PromotedGlobals; + /// Set of globals in PromotedGlobals that we've emitted labels for. + /// We need to emit labels even for promoted globals so that DWARF + /// debug info can link properly. + SmallPtrSet<const GlobalVariable*,2> EmittedPromotedGlobalLabels; + public: explicit ARMAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer); - const char *getPassName() const override { - return "ARM Assembly / Object Emitter"; + StringRef getPassName() const override { + return "ARM Assembly Printer"; } void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); @@ -90,11 +100,25 @@ public: void EmitStartOfAsmFile(Module &M) override; void EmitEndOfAsmFile(Module &M) override; void EmitXXStructor(const DataLayout &DL, const Constant *CV) override; - + void EmitGlobalVariable(const GlobalVariable *GV) override; + // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); + //===------------------------------------------------------------------===// + // XRay implementation + //===------------------------------------------------------------------===// +public: + // XRay-specific lowering for ARM. + void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); + void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); + void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); + // Helper function that emits the XRay sleds we've collected for a particular + // function. + void EmitXRayTable(); + private: + void EmitSled(const MachineInstr &MI, SledKind Kind); // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile() void emitAttributes(); diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 693f16499717..70a3246e34f1 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -382,7 +382,10 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } -unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { +unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { + assert(!BytesRemoved && "code size not handled"); + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); if (I == MBB.end()) return 0; @@ -406,11 +409,13 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, +unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, - const DebugLoc &DL) const { + const DebugLoc &DL, + int *BytesAdded) const { + assert(!BytesAdded && "code size not handled"); ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); int BOpc = !AFI->isThumbFunction() ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); @@ -419,7 +424,7 @@ unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); // Shouldn't be a fall through. - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert(TBB && "insertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && "ARM branch conditions have two components!"); @@ -448,7 +453,7 @@ unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, } bool ARMBaseInstrInfo:: -ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { +reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); Cond[0].setImm(ARMCC::getOppositeCondition(CC)); return false; @@ -575,6 +580,9 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const { if (!MI.isPredicable()) return false; + if (MI.isBundle()) + return false; + if (!isEligibleForITBlock(&MI)) return false; @@ -610,7 +618,7 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) { /// GetInstSize - Return the size of the specified MachineInstr. /// -unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const { +unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { const MachineBasicBlock &MBB = *MI.getParent(); const MachineFunction *MF = MBB.getParent(); const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); @@ -669,7 +677,7 @@ unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const { MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); while (++I != E && I->isInsideBundle()) { assert(!I->isBundle() && "No nested bundle!"); - Size += GetInstSizeInBytes(*I); + Size += getInstSizeInBytes(*I); } return Size; } @@ -868,7 +876,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -1051,7 +1059,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, @@ -2069,29 +2077,40 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, int RegListIdx = IsT1PushPop ? 2 : 4; // Calculate the space we'll need in terms of registers. - unsigned FirstReg = MI->getOperand(RegListIdx).getReg(); - unsigned RD0Reg, RegsNeeded; + unsigned RegsNeeded; + const TargetRegisterClass *RegClass; if (IsVFPPushPop) { - RD0Reg = ARM::D0; RegsNeeded = NumBytes / 8; + RegClass = &ARM::DPRRegClass; } else { - RD0Reg = ARM::R0; RegsNeeded = NumBytes / 4; + RegClass = &ARM::GPRRegClass; } // We're going to have to strip all list operands off before // re-adding them since the order matters, so save the existing ones // for later. SmallVector<MachineOperand, 4> RegList; - for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) - RegList.push_back(MI->getOperand(i)); + + // We're also going to need the first register transferred by this + // instruction, which won't necessarily be the first register in the list. + unsigned FirstRegEnc = -1; const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); + for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) { + MachineOperand &MO = MI->getOperand(i); + RegList.push_back(MO); + + if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc) + FirstRegEnc = TRI->getEncodingValue(MO.getReg()); + } + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); // Now try to find enough space in the reglist to allocate NumBytes. - for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded; - --CurReg) { + for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded; + --CurRegEnc) { + unsigned CurReg = RegClass->getRegister(CurRegEnc); if (!IsPop) { // Pushing any register is completely harmless, mark the // register involved as undef since we don't care about it in @@ -2291,6 +2310,7 @@ bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, default: break; case ARM::CMPri: case ARM::t2CMPri: + case ARM::tCMPi8: SrcReg = MI.getOperand(0).getReg(); SrcReg2 = 0; CmpMask = ~0; @@ -2477,8 +2497,21 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( if (isPredicated(*MI)) return false; + bool IsThumb1 = false; switch (MI->getOpcode()) { default: break; + case ARM::tLSLri: + case ARM::tLSRri: + case ARM::tLSLrr: + case ARM::tLSRrr: + case ARM::tSUBrr: + case ARM::tADDrr: + case ARM::tADDi3: + case ARM::tADDi8: + case ARM::tSUBi3: + case ARM::tSUBi8: + IsThumb1 = true; + LLVM_FALLTHROUGH; case ARM::RSBrr: case ARM::RSBri: case ARM::RSCrr: @@ -2511,7 +2544,11 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( case ARM::EORrr: case ARM::EORri: case ARM::t2EORrr: - case ARM::t2EORri: { + case ARM::t2EORri: + case ARM::t2LSRri: + case ARM::t2LSRrr: + case ARM::t2LSLri: + case ARM::t2LSLrr: { // Scan forward for the use of CPSR // When checking against MI: if it's a conditional code that requires // checking of the V bit or C bit, then this is not safe to do. @@ -2618,9 +2655,12 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( return false; } - // Toggle the optional operand to CPSR. - MI->getOperand(5).setReg(ARM::CPSR); - MI->getOperand(5).setIsDef(true); + // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always + // set CPSR so this is represented as an explicit output) + if (!IsThumb1) { + MI->getOperand(5).setReg(ARM::CPSR); + MI->getOperand(5).setIsDef(true); + } assert(!isPredicated(*MI) && "Can't use flags from predicated instruction"); CmpInstr.eraseFromParent(); @@ -2632,7 +2672,7 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( return true; } } - + return false; } @@ -4119,6 +4159,9 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, unsigned LoadOpc) const { + assert(!Subtarget.isROPI() && !Subtarget.isRWPI() && + "ROPI/RWPI not currently supported with stack guard"); + MachineBasicBlock &MBB = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); unsigned Reg = MI->getOperand(0).getReg(); @@ -4132,7 +4175,9 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, if (Subtarget.isGVIndirectSymbol(GV)) { MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); MIB.addReg(Reg, RegState::Kill).addImm(0); - auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; + auto Flags = MachineMemOperand::MOLoad | + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant; MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4); MIB.addMemOperand(MMO); diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 52b0ff17dea2..b01d5c8ec85f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -100,6 +100,10 @@ public: // Return whether the target has an explicit NOP encoding. bool hasNOP() const; + virtual void getNoopForElfTarget(MCInst &NopInst) const { + getNoopForMachoTarget(NopInst); + } + // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const =0; @@ -124,13 +128,15 @@ public: MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify = false) const override; - unsigned RemoveBranch(MachineBasicBlock &MBB) const override; - unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; + unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, - const DebugLoc &DL) const override; + const DebugLoc &DL, + int *BytesAdded = nullptr) const override; bool - ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; + reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; // Predication support. bool isPredicated(const MachineInstr &MI) const override; @@ -154,7 +160,7 @@ public: /// GetInstSize - Returns the size of the specified MachineInstr. /// - virtual unsigned GetInstSizeInBytes(const MachineInstr &MI) const; + unsigned getInstSizeInBytes(const MachineInstr &MI) const override; unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index aa968efc37d4..d995c631dd1c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -49,18 +49,13 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo() : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {} static unsigned getFramePointerReg(const ARMSubtarget &STI) { - if (STI.isTargetMachO()) - return ARM::R7; - else if (STI.isTargetWindows()) - return ARM::R11; - else // ARM EABI - return STI.isThumb() ? ARM::R7 : ARM::R11; + return STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11; } const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>(); - bool UseSplitPush = STI.splitFramePushPop(); + bool UseSplitPush = STI.splitFramePushPop(*MF); const MCPhysReg *RegList = STI.isTargetDarwin() ? CSR_iOS_SaveList @@ -136,6 +131,15 @@ ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const { return CSR_iOS_TLSCall_RegMask; } +const uint32_t * +ARMBaseRegisterInfo::getSjLjDispatchPreservedMask(const MachineFunction &MF) const { + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); + if (!STI.useSoftFloat() && STI.hasVFP2() && !STI.isThumb1Only()) + return CSR_NoRegs_RegMask; + else + return CSR_FPRegs_RegMask; +} + const uint32_t * ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, @@ -163,27 +167,29 @@ getReservedRegs(const MachineFunction &MF) const { // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); - Reserved.set(ARM::SP); - Reserved.set(ARM::PC); - Reserved.set(ARM::FPSCR); - Reserved.set(ARM::APSR_NZCV); + markSuperRegs(Reserved, ARM::SP); + markSuperRegs(Reserved, ARM::PC); + markSuperRegs(Reserved, ARM::FPSCR); + markSuperRegs(Reserved, ARM::APSR_NZCV); if (TFI->hasFP(MF)) - Reserved.set(getFramePointerReg(STI)); + markSuperRegs(Reserved, getFramePointerReg(STI)); if (hasBasePointer(MF)) - Reserved.set(BasePtr); + markSuperRegs(Reserved, BasePtr); // Some targets reserve R9. if (STI.isR9Reserved()) - Reserved.set(ARM::R9); + markSuperRegs(Reserved, ARM::R9); // Reserve D16-D31 if the subtarget doesn't support them. if (!STI.hasVFP3() || STI.hasD16()) { static_assert(ARM::D31 == ARM::D16 + 15, "Register list not consecutive!"); - Reserved.set(ARM::D16, ARM::D31 + 1); + for (unsigned R = 0; R < 16; ++R) + markSuperRegs(Reserved, ARM::D16 + R); } const TargetRegisterClass *RC = &ARM::GPRPairRegClass; for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I) for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI) - if (Reserved.test(*SI)) Reserved.set(*I); + if (Reserved.test(*SI)) markSuperRegs(Reserved, *I); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } @@ -289,8 +295,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, } // First prefer the paired physreg. - if (PairedPhys && - std::find(Order.begin(), Order.end(), PairedPhys) != Order.end()) + if (PairedPhys && is_contained(Order, PairedPhys)) Hints.push_back(PairedPhys); // Then prefer even or odd registers. @@ -332,7 +337,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg, } bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const ARMFrameLowering *TFI = getFrameLowering(MF); @@ -347,14 +352,14 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { // It's going to be better to use the SP or Base Pointer instead. When there // are variable sized objects, we can't reference off of the SP, so we // reserve a Base Pointer. - if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) { + if (AFI->isThumbFunction() && MFI.hasVarSizedObjects()) { // Conservatively estimate whether the negative offset from the frame // pointer will be sufficient to reach. If a function has a smallish // frame, it's less likely to have lots of spills and callee saved // space, so it's all more likely to be within range of the frame pointer. // If it's wrong, the scavenger will still enable access to work, it just // won't be optimal. - if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128) + if (AFI->isThumb2Function() && MFI.getLocalFrameSize() < 128) return false; return true; } @@ -389,10 +394,10 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: cannotEliminateFrame(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack()) + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI.adjustsStack()) return true; - return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() + return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || needsStackRealignment(MF); } @@ -536,7 +541,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // so it'll be negative. MachineFunction &MF = *MI->getParent()->getParent(); const ARMFrameLowering *TFI = getFrameLowering(MF); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // Estimate an offset from the frame pointer. @@ -551,7 +556,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // The incoming offset is relating to the SP at the start of the function, // but when we access the local it'll be relative to the SP after local // allocation, so adjust our SP-relative offset by that allocation size. - Offset += MFI->getLocalFrameSize(); + Offset += MFI.getLocalFrameSize(); // Assume that we'll have at least some spill slots allocated. // FIXME: This is a total SWAG number. We should run some statistics // and pick a real one. @@ -563,7 +568,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // on whether there are any local variables that would trigger it. unsigned StackAlign = TFI->getStackAlignment(); if (TFI->hasFP(MF) && - !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) { + !((MFI.getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) { if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset)) return false; } @@ -572,7 +577,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // to only disallow SP relative references in the live range of // the VLA(s). In practice, it's unclear how much difference that // would make, but it may be worth doing. - if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, ARM::SP, Offset)) + if (!MFI.hasVarSizedObjects() && isFrameOffsetLegal(MI, ARM::SP, Offset)) return false; // The offset likely isn't legal, we want to allocate a virtual base register. @@ -730,7 +735,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(TFI->hasReservedCallFrame(MF) && "Cannot use SP to access the emergency spill slot in " "functions without a reserved call frame"); - assert(!MF.getFrameInfo()->hasVarSizedObjects() && + assert(!MF.getFrameInfo().hasVarSizedObjects() && "Cannot use SP to access the emergency spill slot in " "functions with variable sized frame objects"); } diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 1eee94857e05..330e1535e863 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -99,11 +99,12 @@ public: /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; const MCPhysReg * - getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override; + getCalleeSavedRegsViaCopy(const MachineFunction *MF) const; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const override; const uint32_t *getTLSCallPreservedMask(const MachineFunction &MF) const; + const uint32_t *getSjLjDispatchPreservedMask(const MachineFunction &MF) const; /// getThisReturnPreservedMask - Returns a call preserved mask specific to the /// case that 'returned' is on an i32 first argument if the calling convention diff --git a/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h b/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h new file mode 100644 index 000000000000..780544f865df --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h @@ -0,0 +1,110 @@ +//===-- ARMBasicBlockInfo.h - Basic Block Information -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Utility functions and data structure for computing block size. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H +#define LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H + +#include "ARM.h" +#include "ARMMachineFunctionInfo.h" +using namespace llvm; + +namespace llvm { + +/// UnknownPadding - Return the worst case padding that could result from +/// unknown offset bits. This does not include alignment padding caused by +/// known offset bits. +/// +/// @param LogAlign log2(alignment) +/// @param KnownBits Number of known low offset bits. +inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { + if (KnownBits < LogAlign) + return (1u << LogAlign) - (1u << KnownBits); + return 0; +} + +/// BasicBlockInfo - Information about the offset and size of a single +/// basic block. +struct BasicBlockInfo { + /// Offset - Distance from the beginning of the function to the beginning + /// of this basic block. + /// + /// Offsets are computed assuming worst case padding before an aligned + /// block. This means that subtracting basic block offsets always gives a + /// conservative estimate of the real distance which may be smaller. + /// + /// Because worst case padding is used, the computed offset of an aligned + /// block may not actually be aligned. + unsigned Offset; + + /// Size - Size of the basic block in bytes. If the block contains + /// inline assembly, this is a worst case estimate. + /// + /// The size does not include any alignment padding whether from the + /// beginning of the block, or from an aligned jump table at the end. + unsigned Size; + + /// KnownBits - The number of low bits in Offset that are known to be + /// exact. The remaining bits of Offset are an upper bound. + uint8_t KnownBits; + + /// Unalign - When non-zero, the block contains instructions (inline asm) + /// of unknown size. The real size may be smaller than Size bytes by a + /// multiple of 1 << Unalign. + uint8_t Unalign; + + /// PostAlign - When non-zero, the block terminator contains a .align + /// directive, so the end of the block is aligned to 1 << PostAlign + /// bytes. + uint8_t PostAlign; + + BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0), + PostAlign(0) {} + + /// Compute the number of known offset bits internally to this block. + /// This number should be used to predict worst case padding when + /// splitting the block. + unsigned internalKnownBits() const { + unsigned Bits = Unalign ? Unalign : KnownBits; + // If the block size isn't a multiple of the known bits, assume the + // worst case padding. + if (Size & ((1u << Bits) - 1)) + Bits = countTrailingZeros(Size); + return Bits; + } + + /// Compute the offset immediately following this block. If LogAlign is + /// specified, return the offset the successor block will get if it has + /// this alignment. + unsigned postOffset(unsigned LogAlign = 0) const { + unsigned PO = Offset + Size; + unsigned LA = std::max(unsigned(PostAlign), LogAlign); + if (!LA) + return PO; + // Add alignment padding from the terminator. + return PO + UnknownPadding(LA, internalKnownBits()); + } + + /// Compute the number of known low bits of postOffset. If this block + /// contains inline asm, the number of known bits drops to the + /// instruction alignment. An aligned terminator may increase the number + /// of know bits. + /// If LogAlign is given, also consider the alignment of the next block. + unsigned postKnownBits(unsigned LogAlign = 0) const { + return std::max(std::max(unsigned(PostAlign), LogAlign), + internalKnownBits()); + } +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp new file mode 100644 index 000000000000..52c95b6244ac --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -0,0 +1,203 @@ +//===-- llvm/lib/Target/ARM/ARMCallLowering.cpp - Call lowering -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the lowering of LLVM calls to machine code calls for +/// GlobalISel. +/// +//===----------------------------------------------------------------------===// + +#include "ARMCallLowering.h" + +#include "ARMBaseInstrInfo.h" +#include "ARMISelLowering.h" + +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +#ifndef LLVM_BUILD_GLOBAL_ISEL +#error "This shouldn't be built without GISel" +#endif + +ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI) + : CallLowering(&TLI) {} + +static bool isSupportedType(const DataLayout DL, const ARMTargetLowering &TLI, + Type *T) { + EVT VT = TLI.getValueType(DL, T); + if (!VT.isSimple() || !VT.isInteger() || VT.isVector()) + return false; + + unsigned VTSize = VT.getSimpleVT().getSizeInBits(); + return VTSize == 8 || VTSize == 16 || VTSize == 32; +} + +namespace { +struct FuncReturnHandler : public CallLowering::ValueHandler { + FuncReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstrBuilder &MIB) + : ValueHandler(MIRBuilder, MRI), MIB(MIB) {} + + unsigned getStackAddress(uint64_t Size, int64_t Offset, + MachinePointerInfo &MPO) override { + llvm_unreachable("Don't know how to get a stack address yet"); + } + + void assignValueToReg(unsigned ValVReg, unsigned PhysReg, + CCValAssign &VA) override { + assert(VA.isRegLoc() && "Value shouldn't be assigned to reg"); + assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?"); + + assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size"); + assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size"); + + assert(VA.getLocInfo() != CCValAssign::SExt && + VA.getLocInfo() != CCValAssign::ZExt && + "ABI extensions not supported yet"); + + MIRBuilder.buildCopy(PhysReg, ValVReg); + MIB.addUse(PhysReg, RegState::Implicit); + } + + void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size, + MachinePointerInfo &MPO, CCValAssign &VA) override { + llvm_unreachable("Don't know how to assign a value to an address yet"); + } + + MachineInstrBuilder &MIB; +}; +} // End anonymous namespace. + +/// Lower the return value for the already existing \p Ret. This assumes that +/// \p MIRBuilder's insertion point is correct. +bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, + const Value *Val, unsigned VReg, + MachineInstrBuilder &Ret) const { + if (!Val) + // Nothing to do here. + return true; + + auto &MF = MIRBuilder.getMF(); + const auto &F = *MF.getFunction(); + + auto DL = MF.getDataLayout(); + auto &TLI = *getTLI<ARMTargetLowering>(); + if (!isSupportedType(DL, TLI, Val->getType())) + return false; + + CCAssignFn *AssignFn = + TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg()); + + ArgInfo RetInfo(VReg, Val->getType()); + setArgFlags(RetInfo, AttributeSet::ReturnIndex, DL, F); + + FuncReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret); + return handleAssignments(MIRBuilder, AssignFn, RetInfo, RetHandler); +} + +bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, + const Value *Val, unsigned VReg) const { + assert(!Val == !VReg && "Return value without a vreg"); + + auto Ret = AddDefaultPred(MIRBuilder.buildInstrNoInsert(ARM::BX_RET)); + + if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret)) + return false; + + MIRBuilder.insertInstr(Ret); + return true; +} + +namespace { +struct FormalArgHandler : public CallLowering::ValueHandler { + FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) + : ValueHandler(MIRBuilder, MRI) {} + + unsigned getStackAddress(uint64_t Size, int64_t Offset, + MachinePointerInfo &MPO) override { + assert(Size == 4 && "Unsupported size"); + + auto &MFI = MIRBuilder.getMF().getFrameInfo(); + + int FI = MFI.CreateFixedObject(Size, Offset, true); + MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); + + unsigned AddrReg = + MRI.createGenericVirtualRegister(LLT::pointer(MPO.getAddrSpace(), 32)); + MIRBuilder.buildFrameIndex(AddrReg, FI); + + return AddrReg; + } + + void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size, + MachinePointerInfo &MPO, CCValAssign &VA) override { + assert(Size == 4 && "Unsupported size"); + + auto MMO = MIRBuilder.getMF().getMachineMemOperand( + MPO, MachineMemOperand::MOLoad, Size, /* Alignment */ 0); + MIRBuilder.buildLoad(ValVReg, Addr, *MMO); + } + + void assignValueToReg(unsigned ValVReg, unsigned PhysReg, + CCValAssign &VA) override { + assert(VA.isRegLoc() && "Value shouldn't be assigned to reg"); + assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?"); + + assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size"); + assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size"); + + MIRBuilder.getMBB().addLiveIn(PhysReg); + MIRBuilder.buildCopy(ValVReg, PhysReg); + } +}; +} // End anonymous namespace + +bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, + const Function &F, + ArrayRef<unsigned> VRegs) const { + // Quick exit if there aren't any args + if (F.arg_empty()) + return true; + + if (F.isVarArg()) + return false; + + auto DL = MIRBuilder.getMF().getDataLayout(); + auto &TLI = *getTLI<ARMTargetLowering>(); + + auto &Args = F.getArgumentList(); + unsigned ArgIdx = 0; + for (auto &Arg : Args) { + ArgIdx++; + if (!isSupportedType(DL, TLI, Arg.getType())) + return false; + + // FIXME: This check as well as ArgIdx are going away as soon as we support + // loading values < 32 bits. + if (ArgIdx > 4 && Arg.getType()->getIntegerBitWidth() != 32) + return false; + } + + CCAssignFn *AssignFn = + TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg()); + + SmallVector<ArgInfo, 8> ArgInfos; + unsigned Idx = 0; + for (auto &Arg : Args) { + ArgInfo AInfo(VRegs[Idx], Arg.getType()); + setArgFlags(AInfo, Idx + 1, DL, F); + ArgInfos.push_back(AInfo); + Idx++; + } + + FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo()); + return handleAssignments(MIRBuilder, AssignFn, ArgInfos, ArgHandler); +} diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h new file mode 100644 index 000000000000..6a1b886b501f --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h @@ -0,0 +1,42 @@ +//===-- llvm/lib/Target/ARM/ARMCallLowering.h - Call lowering -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes how to lower LLVM calls to machine code calls. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ARMCALLLOWERING +#define LLVM_LIB_TARGET_ARM_ARMCALLLOWERING + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/ValueTypes.h" + +namespace llvm { + +class ARMTargetLowering; +class MachineInstrBuilder; + +class ARMCallLowering : public CallLowering { +public: + ARMCallLowering(const ARMTargetLowering &TLI); + + bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val, + unsigned VReg) const override; + + bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef<unsigned> VRegs) const override; + +private: + bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val, + unsigned VReg, MachineInstrBuilder &Ret) const; +}; +} // End of namespace llvm +#endif diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td index edb69581b9d3..9c278a52a7ff 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td @@ -242,6 +242,7 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ //===----------------------------------------------------------------------===// def CSR_NoRegs : CalleeSavedRegs<(add)>; +def CSR_FPRegs : CalleeSavedRegs<(add (sequence "D%u", 0, 31))>; def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, (sequence "D%u", 15, 8))>; diff --git a/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp b/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp new file mode 100644 index 000000000000..64f187d17e64 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp @@ -0,0 +1,72 @@ +//===--- ARMComputeBlockSize.cpp - Compute machine block sizes ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMBasicBlockInfo.h" +using namespace llvm; + +namespace llvm { + +// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions +// below may shrink MI. +static bool +mayOptimizeThumb2Instruction(const MachineInstr *MI) { + switch(MI->getOpcode()) { + // optimizeThumb2Instructions. + case ARM::t2LEApcrel: + case ARM::t2LDRpci: + // optimizeThumb2Branches. + case ARM::t2B: + case ARM::t2Bcc: + case ARM::tBcc: + // optimizeThumb2JumpTables. + case ARM::t2BR_JT: + return true; + } + return false; +} + +void computeBlockSize(MachineFunction *MF, MachineBasicBlock *MBB, + BasicBlockInfo &BBI) { + const ARMBaseInstrInfo *TII = + static_cast<const ARMBaseInstrInfo *>(MF->getSubtarget().getInstrInfo()); + bool isThumb = MF->getInfo<ARMFunctionInfo>()->isThumbFunction(); + BBI.Size = 0; + BBI.Unalign = 0; + BBI.PostAlign = 0; + + for (MachineInstr &I : *MBB) { + BBI.Size += TII->getInstSizeInBytes(I); + // For inline asm, getInstSizeInBytes returns a conservative estimate. + // The actual size may be smaller, but still a multiple of the instr size. + if (I.isInlineAsm()) + BBI.Unalign = isThumb ? 1 : 2; + // Also consider instructions that may be shrunk later. + else if (isThumb && mayOptimizeThumb2Instruction(&I)) + BBI.Unalign = 1; + } + + // tBR_JTr contains a .align 2 directive. + if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) { + BBI.PostAlign = 2; + MBB->getParent()->ensureAlignment(2); + } +} + +std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF) { + std::vector<BasicBlockInfo> BBInfo; + BBInfo.resize(MF->getNumBlockIDs()); + + for (MachineBasicBlock &MBB : *MF) + computeBlockSize(MF, &MBB, BBInfo[MBB.getNumber()]); + + return BBInfo; +} + +} // end namespace diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 8511f67dccd5..be1a37e3e362 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMBasicBlockInfo.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "Thumb2InstrInfo.h" @@ -57,18 +58,10 @@ static cl::opt<unsigned> CPMaxIteration("arm-constant-island-max-iteration", cl::Hidden, cl::init(30), cl::desc("The max number of iteration for converge")); - -/// UnknownPadding - Return the worst case padding that could result from -/// unknown offset bits. This does not include alignment padding caused by -/// known offset bits. -/// -/// @param LogAlign log2(alignment) -/// @param KnownBits Number of known low offset bits. -static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { - if (KnownBits < LogAlign) - return (1u << LogAlign) - (1u << KnownBits); - return 0; -} +static cl::opt<bool> SynthesizeThumb1TBB( + "arm-synthesize-thumb-1-tbb", cl::Hidden, cl::init(true), + cl::desc("Use compressed jump tables in Thumb-1 by synthesizing an " + "equivalent to the TBB/TBH instructions")); namespace { /// ARMConstantIslands - Due to limited PC-relative displacements, ARM @@ -83,78 +76,6 @@ namespace { /// CPE - A constant pool entry that has been placed somewhere, which /// tracks a list of users. class ARMConstantIslands : public MachineFunctionPass { - /// BasicBlockInfo - Information about the offset and size of a single - /// basic block. - struct BasicBlockInfo { - /// Offset - Distance from the beginning of the function to the beginning - /// of this basic block. - /// - /// Offsets are computed assuming worst case padding before an aligned - /// block. This means that subtracting basic block offsets always gives a - /// conservative estimate of the real distance which may be smaller. - /// - /// Because worst case padding is used, the computed offset of an aligned - /// block may not actually be aligned. - unsigned Offset; - - /// Size - Size of the basic block in bytes. If the block contains - /// inline assembly, this is a worst case estimate. - /// - /// The size does not include any alignment padding whether from the - /// beginning of the block, or from an aligned jump table at the end. - unsigned Size; - - /// KnownBits - The number of low bits in Offset that are known to be - /// exact. The remaining bits of Offset are an upper bound. - uint8_t KnownBits; - - /// Unalign - When non-zero, the block contains instructions (inline asm) - /// of unknown size. The real size may be smaller than Size bytes by a - /// multiple of 1 << Unalign. - uint8_t Unalign; - - /// PostAlign - When non-zero, the block terminator contains a .align - /// directive, so the end of the block is aligned to 1 << PostAlign - /// bytes. - uint8_t PostAlign; - - BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0), - PostAlign(0) {} - - /// Compute the number of known offset bits internally to this block. - /// This number should be used to predict worst case padding when - /// splitting the block. - unsigned internalKnownBits() const { - unsigned Bits = Unalign ? Unalign : KnownBits; - // If the block size isn't a multiple of the known bits, assume the - // worst case padding. - if (Size & ((1u << Bits) - 1)) - Bits = countTrailingZeros(Size); - return Bits; - } - - /// Compute the offset immediately following this block. If LogAlign is - /// specified, return the offset the successor block will get if it has - /// this alignment. - unsigned postOffset(unsigned LogAlign = 0) const { - unsigned PO = Offset + Size; - unsigned LA = std::max(unsigned(PostAlign), LogAlign); - if (!LA) - return PO; - // Add alignment padding from the terminator. - return PO + UnknownPadding(LA, internalKnownBits()); - } - - /// Compute the number of known low bits of postOffset. If this block - /// contains inline asm, the number of known bits drops to the - /// instruction alignment. An aligned terminator may increase the number - /// of know bits. - /// If LogAlign is given, also consider the alignment of the next block. - unsigned postKnownBits(unsigned LogAlign = 0) const { - return std::max(std::max(unsigned(PostAlign), LogAlign), - internalKnownBits()); - } - }; std::vector<BasicBlockInfo> BBInfo; @@ -273,6 +194,7 @@ namespace { bool isThumb; bool isThumb1; bool isThumb2; + bool isPositionIndependentOrROPI; public: static char ID; ARMConstantIslands() : MachineFunctionPass(ID) {} @@ -281,10 +203,10 @@ namespace { MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } - const char *getPassName() const override { + StringRef getPassName() const override { return "ARM constant island placement and branch shortening pass"; } @@ -319,7 +241,6 @@ namespace { bool fixupConditionalBr(ImmBranch &Br); bool fixupUnconditionalBr(ImmBranch &Br); bool undoLRSpillRestore(); - bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const; bool optimizeThumb2Instructions(); bool optimizeThumb2Branches(); bool reorderThumb2JumpTables(); @@ -330,7 +251,6 @@ namespace { MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB); - void computeBlockSize(MachineBasicBlock *MBB); unsigned getOffsetOf(MachineInstr *MI) const; unsigned getUserOffset(CPUser&) const; void dumpBBs(); @@ -405,6 +325,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget()); TII = STI->getInstrInfo(); + isPositionIndependentOrROPI = + STI->getTargetLowering()->isPositionIndependent() || STI->isROPI(); AFI = MF->getInfo<ARMFunctionInfo>(); isThumb = AFI->isThumbFunction(); @@ -412,6 +334,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { isThumb2 = AFI->isThumb2Function(); HasFarJump = false; + bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB); // This pass invalidates liveness information when it splits basic blocks. MF->getRegInfo().invalidateLiveness(); @@ -423,7 +346,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // Try to reorder and otherwise adjust the block layout to make good use // of the TB[BH] instructions. bool MadeChange = false; - if (isThumb2 && AdjustJumpTableBlocks) { + if (GenerateTBB && AdjustJumpTableBlocks) { scanFunctionJumpTables(); MadeChange |= reorderThumb2JumpTables(); // Data is out of date, so clear it. It'll be re-computed later. @@ -500,7 +423,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { MadeChange |= optimizeThumb2Branches(); // Optimize jump tables using TBB / TBH. - if (isThumb2) + if (GenerateTBB && !STI->genExecuteOnly()) MadeChange |= optimizeThumb2JumpTables(); // After a while, this might be made debug-only, but it is not expensive. @@ -626,9 +549,11 @@ void ARMConstantIslands::doInitialJumpTablePlacement( case ARM::t2BR_JT: JTOpcode = ARM::JUMPTABLE_INSTS; break; + case ARM::tTBB_JT: case ARM::t2TBB_JT: JTOpcode = ARM::JUMPTABLE_TBB; break; + case ARM::tTBH_JT: case ARM::t2TBH_JT: JTOpcode = ARM::JUMPTABLE_TBH; break; @@ -668,7 +593,7 @@ bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) { return false; MachineBasicBlock *NextBB = &*std::next(MBBI); - if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end()) + if (!MBB->isSuccessor(NextBB)) return false; // Try to analyze the end of the block. A potential fallthrough may already @@ -701,8 +626,9 @@ unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { case ARM::CONSTPOOL_ENTRY: break; case ARM::JUMPTABLE_TBB: - return 0; + return isThumb1 ? 2 : 0; case ARM::JUMPTABLE_TBH: + return isThumb1 ? 2 : 1; case ARM::JUMPTABLE_INSTS: return 1; case ARM::JUMPTABLE_ADDRS: @@ -724,7 +650,8 @@ unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { void ARMConstantIslands::scanFunctionJumpTables() { for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &I : MBB) - if (I.isBranch() && I.getOpcode() == ARM::t2BR_JT) + if (I.isBranch() && + (I.getOpcode() == ARM::t2BR_JT || I.getOpcode() == ARM::tBR_JTr)) T2JumpTables.push_back(&I); } } @@ -734,15 +661,8 @@ void ARMConstantIslands::scanFunctionJumpTables() { /// and finding all of the constant pool users. void ARMConstantIslands:: initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { - BBInfo.clear(); - BBInfo.resize(MF->getNumBlockIDs()); - // First thing, compute the size of all basic blocks, and see if the function - // has any inline assembly in it. If so, we have to be conservative about - // alignment assumptions, as we don't know for sure the size of any - // instructions in the inline assembly. - for (MachineBasicBlock &MBB : *MF) - computeBlockSize(&MBB); + BBInfo = computeAllBlockSizes(MF); // The known bits of the entry block offset are determined by the function // alignment. @@ -772,12 +692,13 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { default: continue; // Ignore other JT branches case ARM::t2BR_JT: + case ARM::tBR_JTr: T2JumpTables.push_back(&I); continue; // Does not get an entry in ImmBranches case ARM::Bcc: isCond = true; UOpc = ARM::B; - // Fallthrough + LLVM_FALLTHROUGH; case ARM::B: Bits = 24; Scale = 4; @@ -860,6 +781,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { case ARM::LDRi12: case ARM::LDRcp: case ARM::t2LDRpci: + case ARM::t2LDRHpci: Bits = 12; // +-offset_12 NegOk = true; break; @@ -875,6 +797,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { Scale = 4; // +-(offset_8*4) NegOk = true; break; + + case ARM::tLDRHi: + Bits = 5; + Scale = 2; // +(offset_5*2) + break; } // Remember that this is a user of a CP entry. @@ -901,32 +828,6 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { } } -/// computeBlockSize - Compute the size and some alignment information for MBB. -/// This function updates BBInfo directly. -void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) { - BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; - BBI.Size = 0; - BBI.Unalign = 0; - BBI.PostAlign = 0; - - for (MachineInstr &I : *MBB) { - BBI.Size += TII->GetInstSizeInBytes(I); - // For inline asm, GetInstSizeInBytes returns a conservative estimate. - // The actual size may be smaller, but still a multiple of the instr size. - if (I.isInlineAsm()) - BBI.Unalign = isThumb ? 1 : 2; - // Also consider instructions that may be shrunk later. - else if (isThumb && mayOptimizeThumb2Instruction(&I)) - BBI.Unalign = 1; - } - - // tBR_JTr contains a .align 2 directive. - if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) { - BBI.PostAlign = 2; - MBB->getParent()->ensureAlignment(2); - } -} - /// getOffsetOf - Return the current offset of the specified machine instruction /// from the start of the function. This offset changes as stuff is moved /// around inside the function. @@ -941,7 +842,7 @@ unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const { // Sum instructions before MI in MBB. for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { assert(I != MBB->end() && "Didn't find MI in its own basic block?"); - Offset += TII->GetInstSizeInBytes(*I); + Offset += TII->getInstSizeInBytes(*I); } return Offset; } @@ -1034,11 +935,11 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { // the new jump we added. (It should be possible to do this without // recounting everything, but it's very confusing, and this is rarely // executed.) - computeBlockSize(OrigBB); + computeBlockSize(MF, OrigBB, BBInfo[OrigBB->getNumber()]); // Figure out how large the NewMBB is. As the second half of the original // block, it may contain a tablejump. - computeBlockSize(NewBB); + computeBlockSize(MF, NewBB, BBInfo[NewBB->getNumber()]); // All BBOffsets following these blocks must be modified. adjustBBOffsetsAfter(OrigBB); @@ -1400,7 +1301,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); ImmBranches.push_back(ImmBranch(&UserMBB->back(), MaxDisp, false, UncondBr)); - computeBlockSize(UserMBB); + computeBlockSize(MF, UserMBB, BBInfo[UserMBB->getNumber()]); adjustBBOffsetsAfter(UserMBB); return; } @@ -1449,7 +1350,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // iterates at least once. BaseInsertOffset = std::max(UserBBI.postOffset() - UPad - 8, - UserOffset + TII->GetInstSizeInBytes(*UserMI) + 1); + UserOffset + TII->getInstSizeInBytes(*UserMI) + 1); DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset)); } unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad + @@ -1459,9 +1360,9 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, unsigned CPUIndex = CPUserIndex+1; unsigned NumCPUsers = CPUsers.size(); MachineInstr *LastIT = nullptr; - for (unsigned Offset = UserOffset + TII->GetInstSizeInBytes(*UserMI); + for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI); Offset < BaseInsertOffset; - Offset += TII->GetInstSizeInBytes(*MI), MI = std::next(MI)) { + Offset += TII->getInstSizeInBytes(*MI), MI = std::next(MI)) { assert(MI != UserMBB->end() && "Fell off end of block"); if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == &*MI) { CPUser &U = CPUsers[CPUIndex]; @@ -1551,7 +1452,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex, // it. Check for this so it will be removed from the WaterList. // Also remove any entry from NewWaterList. MachineBasicBlock *WaterBB = &*--NewMBB->getIterator(); - IP = std::find(WaterList.begin(), WaterList.end(), WaterBB); + IP = find(WaterList, WaterBB); if (IP != WaterList.end()) NewWaterList.erase(WaterBB); @@ -1762,7 +1663,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { splitBlockBeforeInstr(MI); // No need for the branch to the next block. We're adding an unconditional // branch to the destination. - int delta = TII->GetInstSizeInBytes(MBB->back()); + int delta = TII->getInstSizeInBytes(MBB->back()); BBInfo[MBB->getNumber()].Size -= delta; MBB->back().eraseFromParent(); // BBInfo[SplitBB].Offset is wrong temporarily, fixed below @@ -1778,18 +1679,18 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode())) .addMBB(NextBB).addImm(CC).addReg(CCReg); Br.MI = &MBB->back(); - BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back()); + BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); if (isThumb) BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB) .addImm(ARMCC::AL).addReg(0); else BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); - BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back()); + BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr); ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr)); // Remove the old conditional branch. It may or may not still be in MBB. - BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(*MI); + BBInfo[MI->getParent()->getNumber()].Size -= TII->getInstSizeInBytes(*MI); MI->eraseFromParent(); adjustBBOffsetsAfter(MBB); return true; @@ -1817,25 +1718,6 @@ bool ARMConstantIslands::undoLRSpillRestore() { return MadeChange; } -// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions -// below may shrink MI. -bool -ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const { - switch(MI->getOpcode()) { - // optimizeThumb2Instructions. - case ARM::t2LEApcrel: - case ARM::t2LDRpci: - // optimizeThumb2Branches. - case ARM::t2B: - case ARM::t2Bcc: - case ARM::tBcc: - // optimizeThumb2JumpTables. - case ARM::t2BR_JT: - return true; - } - return false; -} - bool ARMConstantIslands::optimizeThumb2Instructions() { bool MadeChange = false; @@ -2075,7 +1957,7 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI, if (RemovableAdd) { RemovableAdd->eraseFromParent(); - DeadSize += 4; + DeadSize += isThumb2 ? 4 : 2; } else if (BaseReg == EntryReg) { // The add wasn't removable, but clobbered the base for the TBB. So we can't // preserve it. @@ -2142,25 +2024,82 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { if (!ByteOk && !HalfWordOk) continue; + CPUser &User = CPUsers[JumpTableUserIndices[JTI]]; MachineBasicBlock *MBB = MI->getParent(); if (!MI->getOperand(0).isKill()) // FIXME: needed now? continue; - unsigned IdxReg = MI->getOperand(1).getReg(); - bool IdxRegKill = MI->getOperand(1).isKill(); - CPUser &User = CPUsers[JumpTableUserIndices[JTI]]; unsigned DeadSize = 0; bool CanDeleteLEA = false; bool BaseRegKill = false; - bool PreservedBaseReg = + + unsigned IdxReg = ~0U; + bool IdxRegKill = true; + if (isThumb2) { + IdxReg = MI->getOperand(1).getReg(); + IdxRegKill = MI->getOperand(1).isKill(); + + bool PreservedBaseReg = preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill); + if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg) + continue; + } else { + // We're in thumb-1 mode, so we must have something like: + // %idx = tLSLri %idx, 2 + // %base = tLEApcrelJT + // %t = tLDRr %idx, %base + unsigned BaseReg = User.MI->getOperand(0).getReg(); + + if (User.MI->getIterator() == User.MI->getParent()->begin()) + continue; + MachineInstr *Shift = User.MI->getPrevNode(); + if (Shift->getOpcode() != ARM::tLSLri || + Shift->getOperand(3).getImm() != 2 || + !Shift->getOperand(2).isKill()) + continue; + IdxReg = Shift->getOperand(2).getReg(); + unsigned ShiftedIdxReg = Shift->getOperand(0).getReg(); - if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg) - continue; + MachineInstr *Load = User.MI->getNextNode(); + if (Load->getOpcode() != ARM::tLDRr) + continue; + if (Load->getOperand(1).getReg() != ShiftedIdxReg || + Load->getOperand(2).getReg() != BaseReg || + !Load->getOperand(1).isKill()) + continue; + // If we're in PIC mode, there should be another ADD following. + if (isPositionIndependentOrROPI) { + MachineInstr *Add = Load->getNextNode(); + if (Add->getOpcode() != ARM::tADDrr || + Add->getOperand(2).getReg() != Load->getOperand(0).getReg() || + Add->getOperand(3).getReg() != BaseReg || + !Add->getOperand(2).isKill()) + continue; + if (Add->getOperand(0).getReg() != MI->getOperand(0).getReg()) + continue; + + Add->eraseFromParent(); + DeadSize += 2; + } else { + if (Load->getOperand(0).getReg() != MI->getOperand(0).getReg()) + continue; + } + + + // Now safe to delete the load and lsl. The LEA will be removed later. + CanDeleteLEA = true; + Shift->eraseFromParent(); + Load->eraseFromParent(); + DeadSize += 4; + } + DEBUG(dbgs() << "Shrink JT: " << *MI); MachineInstr *CPEMI = User.CPEMI; unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; + if (!isThumb2) + Opc = ByteOk ? ARM::tTBB_JT : ARM::tTBH_JT; + MachineBasicBlock::iterator MI_JT = MI; MachineInstr *NewJTMI = BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc)) @@ -2180,7 +2119,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { if (CanDeleteLEA) { User.MI->eraseFromParent(); - DeadSize += 4; + DeadSize += isThumb2 ? 4 : 2; // The LEA was eliminated, the TBB instruction becomes the only new user // of the jump table. @@ -2194,16 +2133,15 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { // record the TBB or TBH use. int CPEntryIdx = JumpTableEntryIndices[JTI]; auto &CPEs = CPEntries[CPEntryIdx]; - auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) { - return E.CPEMI == User.CPEMI; - }); + auto Entry = + find_if(CPEs, [&](CPEntry &E) { return E.CPEMI == User.CPEMI; }); ++Entry->RefCount; CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false)); } } - unsigned NewSize = TII->GetInstSizeInBytes(*NewJTMI); - unsigned OrigSize = TII->GetInstSizeInBytes(*MI); + unsigned NewSize = TII->getInstSizeInBytes(*NewJTMI); + unsigned OrigSize = TII->getInstSizeInBytes(*MI); MI->eraseFromParent(); int Delta = OrigSize - NewSize + DeadSize; @@ -2297,9 +2235,16 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // Add an unconditional branch from NewBB to BB. // There doesn't seem to be meaningful DebugInfo available; this doesn't // correspond directly to anything in the source. - assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?"); - BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB) - .addImm(ARMCC::AL).addReg(0); + if (isThumb2) + BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)) + .addMBB(BB) + .addImm(ARMCC::AL) + .addReg(0); + else + BuildMI(NewBB, DebugLoc(), TII->get(ARM::tB)) + .addMBB(BB) + .addImm(ARMCC::AL) + .addReg(0); // Update internal data structures to account for the newly inserted MBB. MF->RenumberBlocks(NewBB); diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp index c0db001cb6f1..2d1602873ce0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -46,7 +46,7 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id, ARMConstantPoolValue::~ARMConstantPoolValue() {} -const char *ARMConstantPoolValue::getModifierText() const { +StringRef ARMConstantPoolValue::getModifierText() const { switch (Modifier) { // FIXME: Are these case sensitive? It'd be nice to lower-case all the // strings if that's legal. @@ -60,6 +60,8 @@ const char *ARMConstantPoolValue::getModifierText() const { return "gottpoff"; case ARMCP::TPOFF: return "tpoff"; + case ARMCP::SBREL: + return "SBREL"; case ARMCP::SECREL: return "secrel32"; } @@ -129,6 +131,12 @@ ARMConstantPoolConstant::ARMConstantPoolConstant(const Constant *C, AddCurrentAddress), CVal(C) {} +ARMConstantPoolConstant::ARMConstantPoolConstant(const GlobalVariable *GV, + const Constant *C) + : ARMConstantPoolValue((Type *)C->getType(), 0, ARMCP::CPPromotedGlobal, 0, + ARMCP::no_modifier, false), + CVal(C), GVar(GV) {} + ARMConstantPoolConstant * ARMConstantPoolConstant::Create(const Constant *C, unsigned ID) { return new ARMConstantPoolConstant(C, ID, ARMCP::CPValue, 0, @@ -136,6 +144,12 @@ ARMConstantPoolConstant::Create(const Constant *C, unsigned ID) { } ARMConstantPoolConstant * +ARMConstantPoolConstant::Create(const GlobalVariable *GVar, + const Constant *Initializer) { + return new ARMConstantPoolConstant(GVar, Initializer); +} + +ARMConstantPoolConstant * ARMConstantPoolConstant::Create(const GlobalValue *GV, ARMCP::ARMCPModifier Modifier) { return new ARMConstantPoolConstant((Type*)Type::getInt32Ty(GV->getContext()), @@ -191,18 +205,17 @@ void ARMConstantPoolConstant::print(raw_ostream &O) const { // ARMConstantPoolSymbol //===----------------------------------------------------------------------===// -ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s, - unsigned id, - unsigned char PCAdj, +ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, StringRef s, + unsigned id, unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, bool AddCurrentAddress) - : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier, - AddCurrentAddress), - S(s) {} + : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier, + AddCurrentAddress), + S(s) {} -ARMConstantPoolSymbol * -ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s, - unsigned ID, unsigned char PCAdj) { +ARMConstantPoolSymbol *ARMConstantPoolSymbol::Create(LLVMContext &C, + StringRef s, unsigned ID, + unsigned char PCAdj) { return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false); } diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h index c07331d71dad..5f61832aa740 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h @@ -24,6 +24,7 @@ namespace llvm { class BlockAddress; class Constant; class GlobalValue; +class GlobalVariable; class LLVMContext; class MachineBasicBlock; @@ -33,7 +34,8 @@ namespace ARMCP { CPExtSymbol, CPBlockAddress, CPLSDA, - CPMachineBasicBlock + CPMachineBasicBlock, + CPPromotedGlobal }; enum ARMCPModifier { @@ -43,6 +45,7 @@ namespace ARMCP { GOTTPOFF, /// Global Offset Table, Thread Pointer Offset TPOFF, /// Thread Pointer Offset SECREL, /// Section Relative (Windows TLS) + SBREL, /// Static Base Relative (RWPI) }; } @@ -89,7 +92,7 @@ public: ~ARMConstantPoolValue() override; ARMCP::ARMCPModifier getModifier() const { return Modifier; } - const char *getModifierText() const; + StringRef getModifierText() const; bool hasModifier() const { return Modifier != ARMCP::no_modifier; } bool mustAddCurrentAddress() const { return AddCurrentAddress; } @@ -102,7 +105,8 @@ public: bool isBlockAddress() const { return Kind == ARMCP::CPBlockAddress; } bool isLSDA() const { return Kind == ARMCP::CPLSDA; } bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; } - + bool isPromotedGlobal() const{ return Kind == ARMCP::CPPromotedGlobal; } + int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) override; @@ -132,6 +136,7 @@ inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) { /// Functions, and BlockAddresses. class ARMConstantPoolConstant : public ARMConstantPoolValue { const Constant *CVal; // Constant being loaded. + const GlobalVariable *GVar = nullptr; ARMConstantPoolConstant(const Constant *C, unsigned ID, @@ -145,11 +150,14 @@ class ARMConstantPoolConstant : public ARMConstantPoolValue { unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, bool AddCurrentAddress); + ARMConstantPoolConstant(const GlobalVariable *GV, const Constant *Init); public: static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID); static ARMConstantPoolConstant *Create(const GlobalValue *GV, ARMCP::ARMCPModifier Modifier); + static ARMConstantPoolConstant *Create(const GlobalVariable *GV, + const Constant *Initializer); static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID, ARMCP::ARMCPKind Kind, unsigned char PCAdj); @@ -161,6 +169,12 @@ public: const GlobalValue *getGV() const; const BlockAddress *getBlockAddress() const; + const GlobalVariable *getPromotedGlobal() const { + return dyn_cast_or_null<GlobalVariable>(GVar); + } + const Constant *getPromotedGlobalInit() const { + return CVal; + } int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) override; @@ -173,7 +187,8 @@ public: void print(raw_ostream &O) const override; static bool classof(const ARMConstantPoolValue *APV) { - return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA(); + return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA() || + APV->isPromotedGlobal(); } bool equals(const ARMConstantPoolConstant *A) const { @@ -186,15 +201,15 @@ public: class ARMConstantPoolSymbol : public ARMConstantPoolValue { const std::string S; // ExtSymbol being loaded. - ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id, + ARMConstantPoolSymbol(LLVMContext &C, StringRef s, unsigned id, unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, bool AddCurrentAddress); public: - static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s, - unsigned ID, unsigned char PCAdj); + static ARMConstantPoolSymbol *Create(LLVMContext &C, StringRef s, unsigned ID, + unsigned char PCAdj); - const char *getSymbol() const { return S.c_str(); } + StringRef getSymbol() const { return S; } int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) override; diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index a7b299677c1c..95fcc8dcb453 100644 --- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -53,10 +53,10 @@ namespace { MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } - const char *getPassName() const override { + StringRef getPassName() const override { return "ARM pseudo instruction expansion pass"; } @@ -657,6 +657,9 @@ static bool IsAnAddressOperand(const MachineOperand &MO) { return true; case MachineOperand::MO_CFIIndex: return false; + case MachineOperand::MO_IntrinsicID: + case MachineOperand::MO_Predicate: + llvm_unreachable("should not exist post-isel"); } llvm_unreachable("unhandled machine operand type"); } @@ -1175,8 +1178,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, } // If there's dynamic realignment, adjust for it. if (RI.needsStackRealignment(MF)) { - MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned MaxAlign = MFI.getMaxAlignment(); assert (!AFI->isThumb1OnlyFunction()); // Emit bic r6, r6, MaxAlign assert(MaxAlign <= 256 && "The BIC instruction cannot encode " diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index 13724da5d4f7..df4dcb375750 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -112,11 +112,6 @@ class ARMFastISel final : public FastISel { const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm); - unsigned fastEmitInst_rri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - uint64_t Imm); unsigned fastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm); @@ -351,36 +346,6 @@ unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode, return ResultReg; } -unsigned ARMFastISel::fastEmitInst_rri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - uint64_t Imm) { - unsigned ResultReg = createResultReg(RC); - const MCInstrDesc &II = TII.get(MachineInstOpcode); - - // Make sure the input operands are sufficiently constrained to be legal - // for this instruction. - Op0 = constrainOperandRegClass(II, Op0, 1); - Op1 = constrainOperandRegClass(II, Op1, 2); - if (II.getNumDefs() >= 1) { - AddOptionalDefs( - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm)); - } else { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg) - .addReg(II.ImplicitDefs[0])); - } - return ResultReg; -} - unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { @@ -546,6 +511,10 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { // For now 32-bit only. if (VT != MVT::i32 || GV->isThreadLocal()) return 0; + // ROPI/RWPI not currently supported. + if (Subtarget->isROPI() || Subtarget->isRWPI()) + return 0; + bool IsIndirect = Subtarget->isGVIndirectSymbol(GV); const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass; @@ -764,7 +733,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i, ++GTI) { const Value *Op = *i; - if (StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = GTI.getStructTypeOrNull()) { const StructLayout *SL = DL.getStructLayout(STy); unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); @@ -1071,7 +1040,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, TII.get(Opc), Res) .addReg(SrcReg).addImm(1)); SrcReg = Res; - } // Fallthrough here. + LLVM_FALLTHROUGH; + } case MVT::i8: if (isThumb2) { if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) @@ -1844,7 +1814,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, // For AAPCS ABI targets, just use VFP variant of the calling convention. return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); } - // Fallthrough + LLVM_FALLTHROUGH; case CallingConv::C: case CallingConv::CXX_FAST_TLS: // Use target triple & subtarget features to do actual dispatch. @@ -1863,6 +1833,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); // Fall through to soft float variant, variadic functions don't // use hard floating point ABI. + LLVM_FALLTHROUGH; case CallingConv::ARM_AAPCS: return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); case CallingConv::ARM_APCS: @@ -2481,8 +2452,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { switch (I.getIntrinsicID()) { default: return false; case Intrinsic::frameaddress: { - MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo(); - MFI->setFrameAddressIsTaken(true); + MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); + MFI.setFrameAddressIsTaken(true); unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index e8c9f610ea64..c72db8aca108 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -30,6 +30,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" +#define DEBUG_TYPE "arm-frame-lowering" + using namespace llvm; static cl::opt<bool> @@ -57,18 +59,16 @@ bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const { /// or if frame pointer elimination is disabled. bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); - // iOS requires FP not to be clobbered for backtracing purpose. - if (STI.isTargetIOS() || STI.isTargetWatchOS()) + // ABI-required frame pointer. + if (MF.getTarget().Options.DisableFramePointerElim(MF)) return true; - const MachineFrameInfo *MFI = MF.getFrameInfo(); - // Always eliminate non-leaf frame pointers. - return ((MF.getTarget().Options.DisableFramePointerElim(MF) && - MFI->hasCalls()) || - RegInfo->needsStackRealignment(MF) || - MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken()); + // Frame pointer required for use within this function. + return (RegInfo->needsStackRealignment(MF) || + MFI.hasVarSizedObjects() || + MFI.isFrameAddressTaken()); } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is @@ -77,8 +77,8 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { /// add/sub sp brackets around call sites. Returns true if the call frame is /// included as part of the stack frame. bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - unsigned CFSize = FFI->getMaxCallFrameSize(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned CFSize = MFI.getMaxCallFrameSize(); // It's not always a good idea to include the call frame as part of the // stack frame. ARM (especially Thumb) has small immediate offset to // address the stack frame. So a large call frame can cause poor codegen @@ -86,7 +86,7 @@ bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 return false; - return !MF.getFrameInfo()->hasVarSizedObjects(); + return !MFI.hasVarSizedObjects(); } /// canSimplifyCallFramePseudos - If there is a reserved call frame, the @@ -95,7 +95,7 @@ bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { /// even when FP is available in Thumb2 mode. bool ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { - return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); + return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects(); } static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII, @@ -169,9 +169,9 @@ static int sizeOfSPAdjustment(const MachineInstr &MI) { static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - unsigned StackProbeSize = (MFI->getStackProtectorIndex() > 0) ? 4080 : 4096; + unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096; if (F->hasFnAttribute("stack-probe-size")) F->getFnAttribute("stack-probe-size") .getValueAsString() @@ -196,22 +196,21 @@ struct StackAdjustingInsts { } void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) { - auto Info = std::find_if(Insts.begin(), Insts.end(), - [&](InstInfo &Info) { return Info.I == I; }); + auto Info = find_if(Insts, [&](InstInfo &Info) { return Info.I == I; }); assert(Info != Insts.end() && "invalid sp adjusting instruction"); Info->SPAdjust += ExtraBytes; } - void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB, - const DebugLoc &dl, const ARMBaseInstrInfo &TII, - bool HasFP) { + void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl, + const ARMBaseInstrInfo &TII, bool HasFP) { + MachineFunction &MF = *MBB.getParent(); unsigned CFAOffset = 0; for (auto &Info : Insts) { if (HasFP && !Info.BeforeFPSet) return; CFAOffset -= Info.SPAdjust; - unsigned CFIIndex = MMI.addFrameInst( + unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, std::next(Info.I), dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -288,7 +287,7 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, void ARMFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineModuleInfo &MMI = MF.getMMI(); MCContext &Context = MMI.getContext(); @@ -301,8 +300,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, bool isARM = !AFI->isThumbFunction(); unsigned Align = STI.getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); - unsigned NumBytes = MFI->getStackSize(); - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + unsigned NumBytes = MFI.getStackSize(); + const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. @@ -339,7 +338,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes - ArgRegsSaveSize, true); } - DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); + DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP); return; } @@ -353,11 +352,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) { + if (STI.splitFramePushPop(MF)) { GPRCS2Size += 4; break; } - // fallthrough + LLVM_FALLTHROUGH; case ARM::R0: case ARM::R1: case ARM::R2: @@ -396,8 +395,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, int FramePtrOffsetInPush = 0; if (HasFP) { FramePtrOffsetInPush = - MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize; - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + + MFI.getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize; + AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); } AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); @@ -414,7 +413,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // .cfi_offset operations will reflect that. if (DPRGapSize) { assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs"); - if (tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize)) + if (LastPush != MBB.end() && + tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize)) DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize); else { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize, @@ -440,7 +440,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // leaves the stack pointer pointing to the DPRCS2 area. // // Adjust NumBytes to represent the stack slots below the DPRCS2 area. - NumBytes += MFI->getObjectOffset(D8SpillFI); + NumBytes += MFI.getObjectOffset(D8SpillFI); } else NumBytes = DPRCSOffset; @@ -526,7 +526,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, PushSize + FramePtrOffsetInPush, MachineInstr::FrameSetup); if (FramePtrOffsetInPush + PushSize != 0) { - unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( nullptr, MRI->getDwarfRegNum(FramePtr, true), -(ArgRegsSaveSize - FramePtrOffsetInPush))); BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -534,7 +534,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) @@ -557,9 +557,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) + if (STI.splitFramePushPop(MF)) break; - // fallthrough + LLVM_FALLTHROUGH; case ARM::R0: case ARM::R1: case ARM::R2: @@ -569,8 +569,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, case ARM::R6: case ARM::R7: case ARM::LR: - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -590,10 +590,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) { + if (STI.splitFramePushPop(MF)) { unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - unsigned Offset = MFI->getObjectOffset(FI); - unsigned CFIIndex = MMI.addFrameInst( + unsigned Offset = MFI.getObjectOffset(FI); + unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) @@ -614,8 +614,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, if ((Reg >= ARM::D0 && Reg <= ARM::D31) && (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) { unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - unsigned Offset = MFI->getObjectOffset(FI); - unsigned CFIIndex = MMI.addFrameInst( + unsigned Offset = MFI.getObjectOffset(FI); + unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) @@ -628,11 +628,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // throughout the process. If we have a frame pointer, it takes over the job // half-way through, so only the first few .cfi_def_cfa_offset instructions // actually get emitted. - DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); + DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP); if (STI.isTargetELF() && hasFP(MF)) - MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - - AFI->getFramePtrSpillOffset()); + MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - + AFI->getFramePtrSpillOffset()); AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); @@ -644,7 +644,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // If aligned NEON registers were spilled, the stack has already been // realigned. if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { - unsigned MaxAlign = MFI->getMaxAlignment(); + unsigned MaxAlign = MFI.getMaxAlignment(); assert(!AFI->isThumb1OnlyFunction()); if (!AFI->isThumbFunction()) { emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign, @@ -688,13 +688,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. - if (MFI->hasVarSizedObjects()) + if (MFI.hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); } void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const ARMBaseInstrInfo &TII = @@ -704,7 +704,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, bool isARM = !AFI->isThumbFunction(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); - int NumBytes = (int)MFI->getStackSize(); + int NumBytes = (int)MFI.getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); // All calls are tail calls in GHC calling conv, and functions have no @@ -753,7 +753,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, // This is bad, if an interrupt is taken after the mov, sp is in an // inconsistent state. // Use the first callee-saved register as a scratch register. - assert(!MFI->getPristineRegs(MF).test(ARM::R4) && + assert(!MFI.getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"); emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, ARMCC::AL, 0, TII); @@ -776,11 +776,11 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. - if (AFI->getDPRCalleeSavedAreaSize()) { + if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) { MBBI++; // Since vpop register list cannot have gaps, there may be multiple vpop // instructions in the epilogue. - while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) + while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD) MBBI++; } if (AFI->getDPRCalleeSavedGapSize()) { @@ -811,13 +811,13 @@ int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( MF.getSubtarget().getRegisterInfo()); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); + int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize(); int FPOffset = Offset - AFI->getFramePtrSpillOffset(); - bool isFixed = MFI->isFixedObjectIndex(FI); + bool isFixed = MFI.isFixedObjectIndex(FI); FrameReg = ARM::SP; Offset += SPAdj; @@ -893,16 +893,18 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); DebugLoc DL; - SmallVector<std::pair<unsigned,bool>, 4> Regs; + typedef std::pair<unsigned, bool> RegAndKill; + SmallVector<RegAndKill, 4> Regs; unsigned i = CSI.size(); while (i != 0) { unsigned LastReg = 0; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.splitFramePushPop())) continue; + if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue; // D-registers in the aligned area DPRCS2 are NOT spilled here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -927,6 +929,12 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, if (Regs.empty()) continue; + + std::sort(Regs.begin(), Regs.end(), [&](const RegAndKill &LHS, + const RegAndKill &RHS) { + return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first); + }); + if (Regs.size() > 1 || StrOpc== 0) { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) @@ -960,6 +968,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, unsigned NumAlignedDPRCS2Regs) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL; bool isTailCall = false; @@ -983,7 +992,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.splitFramePushPop())) continue; + if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue; // The aligned reloads from area DPRCS2 are not inserted here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -1012,6 +1021,11 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, if (Regs.empty()) continue; + + std::sort(Regs.begin(), Regs.end(), [&](unsigned LHS, unsigned RHS) { + return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS); + }); + if (Regs.size() > 1 || LdrOpc == 0) { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) @@ -1062,7 +1076,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); // Mark the D-register spill slots as properly aligned. Since MFI computes // stack slot layout backwards, this can actually mean that the d-reg stack @@ -1104,7 +1118,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, .addReg(ARM::SP) .addImm(8 * NumAlignedDPRCS2Regs))); - unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment(); + unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment(); // We must set parameter MustBeSingleInstruction to true, since // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform // stack alignment. Luckily, this can always be done since all ARM @@ -1359,7 +1373,7 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, unsigned FnSize = 0; for (auto &MBB : MF) { for (auto &MI : MBB) - FnSize += TII.GetInstSizeInBytes(MI); + FnSize += TII.getInstSizeInBytes(MI); } return FnSize; } @@ -1485,8 +1499,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + (void)TRI; // Silence unused warning in non-assert builds. unsigned FramePtr = RegInfo->getFrameRegister(MF); // Spill R4 if Thumb2 function requires stack realignment - it will be used as @@ -1495,7 +1511,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // instruction. // FIXME: It will be better just to find spare register here. if (AFI->isThumb2Function() && - (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) + (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) SavedRegs.set(ARM::R4); if (AFI->isThumb1OnlyFunction()) { @@ -1509,8 +1525,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // we've used all the registers and so R4 is already used, so not marking // it here will be OK. // FIXME: It will be better just to find spare register here. - unsigned StackSize = MFI->estimateStackSize(MF); - if (MFI->hasVarSizedObjects() || StackSize > 508) + unsigned StackSize = MFI.estimateStackSize(MF); + if (MFI.hasVarSizedObjects() || StackSize > 508) SavedRegs.set(ARM::R4); } @@ -1547,7 +1563,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, if (Spilled) { NumGPRSpills++; - if (!STI.splitFramePushPop()) { + if (!STI.splitFramePushPop(MF)) { if (Reg == ARM::LR) LRSpilled = true; CS1Spilled = true; @@ -1558,7 +1574,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, switch (Reg) { case ARM::LR: LRSpilled = true; - // Fallthrough + LLVM_FALLTHROUGH; case ARM::R0: case ARM::R1: case ARM::R2: case ARM::R3: case ARM::R4: case ARM::R5: @@ -1569,7 +1585,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, break; } } else { - if (!STI.splitFramePushPop()) { + if (!STI.splitFramePushPop(MF)) { UnspilledCS1GPRs.push_back(Reg); continue; } @@ -1616,7 +1632,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // and which instructions will need a scratch register for them. Is it // worth the effort and added fragility? unsigned EstimatedStackSize = - MFI->estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills); + MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills); if (hasFP(MF)) { if (AFI->hasStackFrame()) EstimatedStackSize += 4; @@ -1628,20 +1644,149 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, EstimatedStackSize += 16; // For possible paddings. bool BigStack = EstimatedStackSize >= estimateRSStackSizeLimit(MF, this) || - MFI->hasVarSizedObjects() || - (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); + MFI.hasVarSizedObjects() || + (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)); bool ExtraCSSpill = false; if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); + if (hasFP(MF)) { + SavedRegs.set(FramePtr); + // If the frame pointer is required by the ABI, also spill LR so that we + // emit a complete frame record. + if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) { + SavedRegs.set(ARM::LR); + LRSpilled = true; + NumGPRSpills++; + auto LRPos = find(UnspilledCS1GPRs, ARM::LR); + if (LRPos != UnspilledCS1GPRs.end()) + UnspilledCS1GPRs.erase(LRPos); + } + auto FPPos = find(UnspilledCS1GPRs, FramePtr); + if (FPPos != UnspilledCS1GPRs.end()) + UnspilledCS1GPRs.erase(FPPos); + NumGPRSpills++; + if (FramePtr == ARM::R7) + CS1Spilled = true; + } + + if (AFI->isThumb1OnlyFunction()) { + // For Thumb1-only targets, we need some low registers when we save and + // restore the high registers (which aren't allocatable, but could be + // used by inline assembly) because the push/pop instructions can not + // access high registers. If necessary, we might need to push more low + // registers to ensure that there is at least one free that can be used + // for the saving & restoring, and preferably we should ensure that as + // many as are needed are available so that fewer push/pop instructions + // are required. + + // Low registers which are not currently pushed, but could be (r4-r7). + SmallVector<unsigned, 4> AvailableRegs; + + // Unused argument registers (r0-r3) can be clobbered in the prologue for + // free. + int EntryRegDeficit = 0; + for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) { + if (!MF.getRegInfo().isLiveIn(Reg)) { + --EntryRegDeficit; + DEBUG(dbgs() << PrintReg(Reg, TRI) + << " is unused argument register, EntryRegDeficit = " + << EntryRegDeficit << "\n"); + } + } + + // Unused return registers can be clobbered in the epilogue for free. + int ExitRegDeficit = AFI->getReturnRegsCount() - 4; + DEBUG(dbgs() << AFI->getReturnRegsCount() + << " return regs used, ExitRegDeficit = " << ExitRegDeficit + << "\n"); + + int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit); + DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n"); + + // r4-r6 can be used in the prologue if they are pushed by the first push + // instruction. + for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) { + if (SavedRegs.test(Reg)) { + --RegDeficit; + DEBUG(dbgs() << PrintReg(Reg, TRI) + << " is saved low register, RegDeficit = " << RegDeficit + << "\n"); + } else { + AvailableRegs.push_back(Reg); + DEBUG(dbgs() + << PrintReg(Reg, TRI) + << " is non-saved low register, adding to AvailableRegs\n"); + } + } + + // r7 can be used if it is not being used as the frame pointer. + if (!hasFP(MF)) { + if (SavedRegs.test(ARM::R7)) { + --RegDeficit; + DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = " + << RegDeficit << "\n"); + } else { + AvailableRegs.push_back(ARM::R7); + DEBUG(dbgs() + << "%R7 is non-saved low register, adding to AvailableRegs\n"); + } + } + + // Each of r8-r11 needs to be copied to a low register, then pushed. + for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) { + if (SavedRegs.test(Reg)) { + ++RegDeficit; + DEBUG(dbgs() << PrintReg(Reg, TRI) + << " is saved high register, RegDeficit = " << RegDeficit + << "\n"); + } + } + + // LR can only be used by PUSH, not POP, and can't be used at all if the + // llvm.returnaddress intrinsic is used. This is only worth doing if we + // are more limited at function entry than exit. + if ((EntryRegDeficit > ExitRegDeficit) && + !(MF.getRegInfo().isLiveIn(ARM::LR) && + MF.getFrameInfo().isReturnAddressTaken())) { + if (SavedRegs.test(ARM::LR)) { + --RegDeficit; + DEBUG(dbgs() << "%LR is saved register, RegDeficit = " << RegDeficit + << "\n"); + } else { + AvailableRegs.push_back(ARM::LR); + DEBUG(dbgs() << "%LR is not saved, adding to AvailableRegs\n"); + } + } + + // If there are more high registers that need pushing than low registers + // available, push some more low registers so that we can use fewer push + // instructions. This might not reduce RegDeficit all the way to zero, + // because we can only guarantee that r4-r6 are available, but r8-r11 may + // need saving. + DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n"); + for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) { + unsigned Reg = AvailableRegs.pop_back_val(); + DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) + << " to make up reg deficit\n"); + SavedRegs.set(Reg); + NumGPRSpills++; + CS1Spilled = true; + ExtraCSSpill = true; + UnspilledCS1GPRs.erase(find(UnspilledCS1GPRs, Reg)); + if (Reg == ARM::LR) + LRSpilled = true; + } + DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit << "\n"); + } + // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. // Spill LR as well so we can fold BX_RET to the registers restore (LDM). if (!LRSpilled && CS1Spilled) { SavedRegs.set(ARM::LR); NumGPRSpills++; SmallVectorImpl<unsigned>::iterator LRPos; - LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), - (unsigned)ARM::LR); + LRPos = find(UnspilledCS1GPRs, (unsigned)ARM::LR); if (LRPos != UnspilledCS1GPRs.end()) UnspilledCS1GPRs.erase(LRPos); @@ -1649,18 +1794,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, ExtraCSSpill = true; } - if (hasFP(MF)) { - SavedRegs.set(FramePtr); - auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), - FramePtr); - if (FPPos != UnspilledCS1GPRs.end()) - UnspilledCS1GPRs.erase(FPPos); - NumGPRSpills++; - } - // If stack and double are 8-byte aligned and we are spilling an odd number // of GPRs, spill one extra callee save GPR so we won't have to pad between // the integer and double callee save areas. + DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n"); unsigned TargetAlign = getStackAlignment(); if (TargetAlign >= 8 && (NumGPRSpills & 1)) { if (CS1Spilled && !UnspilledCS1GPRs.empty()) { @@ -1672,6 +1809,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, (STI.isTargetWindows() && Reg == ARM::R11) || isARMLowRegister(Reg) || Reg == ARM::LR) { SavedRegs.set(Reg); + DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) + << " to make up alignment\n"); if (!MRI.isReserved(Reg)) ExtraCSSpill = true; break; @@ -1680,6 +1819,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { unsigned Reg = UnspilledCS2GPRs.front(); SavedRegs.set(Reg); + DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) + << " to make up alignment\n"); if (!MRI.isReserved(Reg)) ExtraCSSpill = true; } @@ -1725,9 +1866,9 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // closest to SP or frame pointer. assert(RS && "Register scavenging not provided"); const TargetRegisterClass *RC = &ARM::GPRRegClass; - RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); } } } @@ -1855,7 +1996,7 @@ void ARMFrameLowering::adjustForSegmentedStacks( if (!ST->isTargetAndroid() && !ST->isTargetLinux()) report_fatal_error("Segmented stacks not supported on this platform."); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); MCContext &Context = MMI.getContext(); const MCRegisterInfo *MRI = Context.getRegisterInfo(); @@ -1864,7 +2005,7 @@ void ARMFrameLowering::adjustForSegmentedStacks( ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL; - uint64_t StackSize = MFI->getStackSize(); + uint64_t StackSize = MFI.getStackSize(); // Do not generate a prologue for functions with a stack of size zero if (StackSize == 0) @@ -1951,14 +2092,14 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Emit the relevant DWARF information about the change in stack pointer as // well as where to find both r4 and r5 (the callee-save registers) CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8)); + MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8)); BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4)); BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8)); BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); @@ -2069,10 +2210,10 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Emit the DWARF info about the change in stack as well as where to find the // previous link register CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12)); + MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12)); BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12)); BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); @@ -2124,7 +2265,7 @@ void ARMFrameLowering::adjustForSegmentedStacks( } // Update the CFA offset now that we've popped - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); @@ -2147,17 +2288,17 @@ void ARMFrameLowering::adjustForSegmentedStacks( } // Update the CFA offset now that we've popped - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); // Tell debuggers that r4 and r5 are now the same as they were in the // previous function, that they're the "Same Value". - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( + CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue( nullptr, MRI->getDwarfRegNum(ScratchReg0, true))); BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( + CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue( nullptr, MRI->getDwarfRegNum(ScratchReg1, true))); BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 20db3d39bcae..c3e9591d5c70 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -70,9 +70,7 @@ public: return true; } - const char *getPassName() const override { - return "ARM Instruction Selection"; - } + StringRef getPassName() const override { return "ARM Instruction Selection"; } void PreprocessISelDAG() override; @@ -193,6 +191,8 @@ public: #include "ARMGenDAGISel.inc" private: + void transferMemOperands(SDNode *Src, SDNode *Dst); + /// Indexed (pre/post inc/dec) load matching code for ARM. bool tryARMIndexedLoad(SDNode *N); bool tryT1IndexedLoad(SDNode *N); @@ -222,10 +222,11 @@ private: const uint16_t *QOpcodes); /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs - /// should be 2, 3 or 4. The opcode array specifies the instructions used + /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used /// for loading D registers. (Q registers are not supported.) void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *Opcodes); + const uint16_t *DOpcodes, + const uint16_t *QOpcodes = nullptr); /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be @@ -244,6 +245,7 @@ private: bool tryInlineAsm(SDNode *N); void SelectConcatVector(SDNode *N); + void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); bool trySMLAWSMULW(SDNode *N); @@ -476,7 +478,9 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const { if (Subtarget->isThumb()) { if (Val <= 255) return 1; // MOV - if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW + if (Subtarget->hasV6T2Ops() && + (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1)) + return 1; // MOVW if (Val <= 510) return 2; // MOV + ADDi8 if (~Val <= 255) return 2; // MOV + MVN if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL @@ -1186,6 +1190,7 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, } else if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && + N.getOperand(0).getOpcode() != ISD::TargetConstantPool && N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); } else { @@ -1232,9 +1237,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, int FI = cast<FrameIndexSDNode>(N)->getIndex(); // Only multiples of 4 are allowed for the offset, so the frame object // alignment must be at least 4. - MachineFrameInfo *MFI = MF->getFrameInfo(); - if (MFI->getObjectAlignment(FI) < 4) - MFI->setObjectAlignment(FI, 4); + MachineFrameInfo &MFI = MF->getFrameInfo(); + if (MFI.getObjectAlignment(FI) < 4) + MFI.setObjectAlignment(FI, 4); Base = CurDAG->getTargetFrameIndex( FI, TLI->getPointerTy(CurDAG->getDataLayout())); OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); @@ -1255,9 +1260,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, int FI = cast<FrameIndexSDNode>(Base)->getIndex(); // For LHS+RHS to result in an offset that's a multiple of 4 the object // indexed by the LHS must be 4-byte aligned. - MachineFrameInfo *MFI = MF->getFrameInfo(); - if (MFI->getObjectAlignment(FI) < 4) - MFI->setObjectAlignment(FI, 4); + MachineFrameInfo &MFI = MF->getFrameInfo(); + if (MFI.getObjectAlignment(FI) < 4) + MFI.setObjectAlignment(FI, 4); Base = CurDAG->getTargetFrameIndex( FI, TLI->getPointerTy(CurDAG->getDataLayout())); } @@ -1469,6 +1474,12 @@ static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); } +void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); +} + bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); @@ -1527,16 +1538,20 @@ bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), CurDAG->getRegister(0, MVT::i32), Chain }; - ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, - MVT::i32, MVT::Other, Ops)); + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, + MVT::Other, Ops); + transferMemOperands(N, New); + ReplaceNode(N, New); return true; } else { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), CurDAG->getRegister(0, MVT::i32), Chain }; - ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, - MVT::i32, MVT::Other, Ops)); + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, + MVT::Other, Ops); + transferMemOperands(N, New); + ReplaceNode(N, New); return true; } } @@ -1548,8 +1563,8 @@ bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); EVT LoadedVT = LD->getMemoryVT(); ISD::MemIndexedMode AM = LD->getAddressingMode(); - if (AM == ISD::UNINDEXED || LD->getExtensionType() != ISD::NON_EXTLOAD || - AM != ISD::POST_INC || LoadedVT.getSimpleVT().SimpleTy != MVT::i32) + if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || + LoadedVT.getSimpleVT().SimpleTy != MVT::i32) return false; auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); @@ -1564,8 +1579,10 @@ bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), CurDAG->getRegister(0, MVT::i32), Chain }; - ReplaceNode(N, CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, MVT::i32, - MVT::Other, Ops)); + SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, + MVT::i32, MVT::Other, Ops); + transferMemOperands(N, New); + ReplaceNode(N, New); return true; } @@ -1610,8 +1627,10 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), CurDAG->getRegister(0, MVT::i32), Chain }; - ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, - MVT::Other, Ops)); + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, + MVT::Other, Ops); + transferMemOperands(N, New); + ReplaceNode(N, New); return true; } @@ -1744,6 +1763,12 @@ static bool isVLDfixed(unsigned Opc) case ARM::VLD1q16wb_fixed : return true; case ARM::VLD1q32wb_fixed : return true; case ARM::VLD1q64wb_fixed : return true; + case ARM::VLD1DUPd8wb_fixed : return true; + case ARM::VLD1DUPd16wb_fixed : return true; + case ARM::VLD1DUPd32wb_fixed : return true; + case ARM::VLD1DUPq8wb_fixed : return true; + case ARM::VLD1DUPq16wb_fixed : return true; + case ARM::VLD1DUPq32wb_fixed : return true; case ARM::VLD2d8wb_fixed : return true; case ARM::VLD2d16wb_fixed : return true; case ARM::VLD2d32wb_fixed : return true; @@ -1798,6 +1823,12 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; + case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; + case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; + case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; + case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; + case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; + case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; @@ -2140,7 +2171,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, unsigned Alignment = 0; if (NumVecs != 3) { Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); - unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8; + unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; if (Alignment > NumBytes) Alignment = NumBytes; if (Alignment < 8 && Alignment < NumBytes) @@ -2238,8 +2269,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, } void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *Opcodes) { - assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); + const uint16_t *DOpcodes, + const uint16_t *QOpcodes) { + assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); SDLoc dl(N); SDValue MemAddr, Align; @@ -2255,7 +2287,7 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, unsigned Alignment = 0; if (NumVecs != 3) { Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); - unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8; + unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; if (Alignment > NumBytes) Alignment = NumBytes; if (Alignment < 8 && Alignment < NumBytes) @@ -2267,19 +2299,21 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, } Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); - unsigned OpcodeIndex; + unsigned Opc; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vld-dup type"); - case MVT::v8i8: OpcodeIndex = 0; break; - case MVT::v4i16: OpcodeIndex = 1; break; + case MVT::v8i8: Opc = DOpcodes[0]; break; + case MVT::v16i8: Opc = QOpcodes[0]; break; + case MVT::v4i16: Opc = DOpcodes[1]; break; + case MVT::v8i16: Opc = QOpcodes[1]; break; case MVT::v2f32: - case MVT::v2i32: OpcodeIndex = 2; break; + case MVT::v2i32: Opc = DOpcodes[2]; break; + case MVT::v4f32: + case MVT::v4i32: Opc = QOpcodes[2]; break; } SDValue Pred = getAL(CurDAG, dl); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SDValue SuperReg; - unsigned Opc = Opcodes[OpcodeIndex]; SmallVector<SDValue, 6> Ops; Ops.push_back(MemAddr); Ops.push_back(Align); @@ -2287,6 +2321,8 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, // fixed-stride update instructions don't have an explicit writeback // operand. It's implicit in the opcode itself. SDValue Inc = N->getOperand(2); + if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) + Opc = getVLDSTRegisterUpdateOpcode(Opc); if (!isa<ConstantSDNode>(Inc.getNode())) Ops.push_back(Inc); // FIXME: VLD3 and VLD4 haven't been updated to that form yet. @@ -2305,14 +2341,18 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, ResTys.push_back(MVT::Other); SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); - SuperReg = SDValue(VLdDup, 0); // Extract the subregisters. - static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); - unsigned SubIdx = ARM::dsub_0; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - ReplaceUses(SDValue(N, Vec), - CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); + if (NumVecs == 1) { + ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); + } else { + SDValue SuperReg = SDValue(VLdDup, 0); + static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); + unsigned SubIdx = ARM::dsub_0; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + ReplaceUses(SDValue(N, Vec), + CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); + } ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); if (isUpdating) ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); @@ -2612,6 +2652,10 @@ static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0, } bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) { + if (!Subtarget->hasV6Ops() || + (Subtarget->isThumb() && !Subtarget->hasThumb2())) + return false; + SDLoc dl(N); SDValue Src0 = N->getOperand(0); SDValue Src1 = N->getOperand(1); @@ -2687,6 +2731,87 @@ void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1))); } +static Optional<std::pair<unsigned, unsigned>> +getContiguousRangeOfSetBits(const APInt &A) { + unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; + unsigned LastOne = A.countTrailingZeros(); + if (A.countPopulation() != (FirstOne - LastOne + 1)) + return Optional<std::pair<unsigned,unsigned>>(); + return std::make_pair(FirstOne, LastOne); +} + +void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { + assert(N->getOpcode() == ARMISD::CMPZ); + SwitchEQNEToPLMI = false; + + if (!Subtarget->isThumb()) + // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and + // LSR don't exist as standalone instructions - they need the barrel shifter. + return; + + // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) + SDValue And = N->getOperand(0); + if (!And->hasOneUse()) + return; + + SDValue Zero = N->getOperand(1); + if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || + And->getOpcode() != ISD::AND) + return; + SDValue X = And.getOperand(0); + auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); + + if (!C || !X->hasOneUse()) + return; + auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); + if (!Range) + return; + + // There are several ways to lower this: + SDNode *NewN; + SDLoc dl(N); + + auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { + if (Subtarget->isThumb2()) { + Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; + SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); + } else { + SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, + CurDAG->getTargetConstant(Imm, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; + return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); + } + }; + + if (Range->second == 0) { + // 1. Mask includes the LSB -> Simply shift the top N bits off + NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); + ReplaceNode(And.getNode(), NewN); + } else if (Range->first == 31) { + // 2. Mask includes the MSB -> Simply shift the bottom N bits off + NewN = EmitShift(ARM::tLSRri, X, Range->second); + ReplaceNode(And.getNode(), NewN); + } else if (Range->first == Range->second) { + // 3. Only one bit is set. We can shift this into the sign bit and use a + // PL/MI comparison. + NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); + ReplaceNode(And.getNode(), NewN); + + SwitchEQNEToPLMI = true; + } else if (!Subtarget->hasV6T2Ops()) { + // 4. Do a double shift to clear bottom and top bits, but only in + // thumb-1 mode as in thumb-2 we can use UBFX. + NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); + NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), + Range->second + (31 - Range->first)); + ReplaceNode(And.getNode(), NewN); + } + +} + void ARMDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); @@ -2761,9 +2886,9 @@ void ARMDAGToDAGISel::Select(SDNode *N) { if (Subtarget->isThumb1Only()) { // Set the alignment of the frame object to 4, to avoid having to generate // more than one ADD - MachineFrameInfo *MFI = MF->getFrameInfo(); - if (MFI->getObjectAlignment(FI) < 4) - MFI->setObjectAlignment(FI, 4); + MachineFrameInfo &MFI = MF->getFrameInfo(); + if (MFI.getObjectAlignment(FI) < 4) + MFI.setObjectAlignment(FI, 4); CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, CurDAG->getTargetConstant(0, dl, MVT::i32)); return; @@ -2914,6 +3039,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } } + break; } case ARMISD::VMOVRRD: @@ -2971,7 +3097,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::UMLAL:{ // UMAAL is similar to UMLAL but it adds two 32-bit values to the // 64-bit multiplication result. - if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC && + if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && + N->getOperand(2).getOpcode() == ARMISD::ADDC && N->getOperand(3).getOpcode() == ARMISD::ADDE) { SDValue Addc = N->getOperand(2); @@ -3037,6 +3164,37 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } } + case ARMISD::SUBE: { + if (!Subtarget->hasV6Ops()) + break; + // Look for a pattern to match SMMLS + // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) + if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || + N->getOperand(2).getOpcode() != ARMISD::SUBC || + !SDValue(N, 1).use_empty()) + break; + + if (Subtarget->isThumb()) + assert(Subtarget->hasThumb2() && + "This pattern should not be generated for Thumb"); + + SDValue SmulLoHi = N->getOperand(1); + SDValue Subc = N->getOperand(2); + auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); + + if (!Zero || Zero->getZExtValue() != 0 || + Subc.getOperand(1) != SmulLoHi.getValue(0) || + N->getOperand(1) != SmulLoHi.getValue(1) || + N->getOperand(2) != Subc.getValue(1)) + break; + + unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; + SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), + N->getOperand(0), getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32) }; + ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); + return; + } case ISD::LOAD: { if (Subtarget->isThumb() && Subtarget->hasThumb2()) { if (tryT2IndexedLoad(N)) @@ -3073,9 +3231,27 @@ void ARMDAGToDAGISel::Select(SDNode *N) { assert(N2.getOpcode() == ISD::Constant); assert(N3.getOpcode() == ISD::Register); - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast<ConstantSDNode>(N2)->getZExtValue()), dl, - MVT::i32); + unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); + + if (InFlag.getOpcode() == ARMISD::CMPZ) { + bool SwitchEQNEToPLMI; + SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); + InFlag = N->getOperand(4); + + if (SwitchEQNEToPLMI) { + switch ((ARMCC::CondCodes)CC) { + default: llvm_unreachable("CMPZ must be either NE or EQ!"); + case ARMCC::NE: + CC = (unsigned)ARMCC::MI; + break; + case ARMCC::EQ: + CC = (unsigned)ARMCC::PL; + break; + } + } + } + + SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, MVT::Glue, Ops); @@ -3089,6 +3265,80 @@ void ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->RemoveDeadNode(N); return; } + + case ARMISD::CMPZ: { + // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) + // This allows us to avoid materializing the expensive negative constant. + // The CMPZ #0 is useless and will be peepholed away but we need to keep it + // for its glue output. + SDValue X = N->getOperand(0); + auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); + if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { + int64_t Addend = -C->getSExtValue(); + + SDNode *Add = nullptr; + // In T2 mode, ADDS can be better than CMN if the immediate fits in a + // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. + // Outside that range we can just use a CMN which is 32-bit but has a + // 12-bit immediate range. + if (Subtarget->isThumb2() && Addend < 1<<8) { + SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); + } else if (!Subtarget->isThumb2() && Addend < 1<<8) { + // FIXME: Add T1 tADDi8 code. + SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, + CurDAG->getTargetConstant(Addend, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; + Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops); + } else if (!Subtarget->isThumb2() && Addend < 1<<3) { + SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, + CurDAG->getTargetConstant(Addend, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; + Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops); + } + if (Add) { + SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; + CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); + } + } + // Other cases are autogenerated. + break; + } + + case ARMISD::CMOV: { + SDValue InFlag = N->getOperand(4); + + if (InFlag.getOpcode() == ARMISD::CMPZ) { + bool SwitchEQNEToPLMI; + SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); + + if (SwitchEQNEToPLMI) { + SDValue ARMcc = N->getOperand(2); + ARMCC::CondCodes CC = + (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); + + switch (CC) { + default: llvm_unreachable("CMPZ must be either NE or EQ!"); + case ARMCC::NE: + CC = ARMCC::MI; + break; + case ARMCC::EQ: + CC = ARMCC::PL; + break; + } + SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); + SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, + N->getOperand(3), N->getOperand(4)}; + CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); + } + + } + // Other cases are autogenerated. + break; + } + case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); @@ -3174,6 +3424,15 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } + case ARMISD::VLD1DUP: { + static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, + ARM::VLD1DUPd32 }; + static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, + ARM::VLD1DUPq32 }; + SelectVLDDup(N, false, 1, DOpcodes, QOpcodes); + return; + } + case ARMISD::VLD2DUP: { static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, ARM::VLD2DUPd32 }; @@ -3197,6 +3456,17 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } + case ARMISD::VLD1DUP_UPD: { + static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, + ARM::VLD1DUPd16wb_fixed, + ARM::VLD1DUPd32wb_fixed }; + static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, + ARM::VLD1DUPq16wb_fixed, + ARM::VLD1DUPq32wb_fixed }; + SelectVLDDup(N, true, 1, DOpcodes, QOpcodes); + return; + } + case ARMISD::VLD2DUP_UPD: { static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed, @@ -4383,7 +4653,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, case InlineAsm::Constraint_i: // FIXME: It seems strange that 'i' is needed here since it's supposed to // be an immediate and not a memory constraint. - // Fallthrough. + LLVM_FALLTHROUGH; case InlineAsm::Constraint_m: case InlineAsm::Constraint_o: case InlineAsm::Constraint_Q: diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index 3cfcb1e09f0b..afba1587a743 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" @@ -59,18 +60,27 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); +STATISTIC(NumConstpoolPromoted, + "Number of constants with their storage promoted into constant pools"); static cl::opt<bool> ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); -// Disabled for causing self-hosting failures once returned-attribute inference -// was enabled. -static cl::opt<bool> -EnableThisRetForwarding("arm-this-return-forwarding", cl::Hidden, - cl::desc("Directly forward this return"), - cl::init(false)); +static cl::opt<bool> EnableConstpoolPromotion( + "arm-promote-constant", cl::Hidden, + cl::desc("Enable / disable promotion of unnamed_addr constants into " + "constant pools"), + cl::init(true)); +static cl::opt<unsigned> ConstpoolPromotionMaxSize( + "arm-promote-constant-max-size", cl::Hidden, + cl::desc("Maximum size of constant to promote into a constant pool"), + cl::init(64)); +static cl::opt<unsigned> ConstpoolPromotionMaxTotal( + "arm-promote-constant-max-total", cl::Hidden, + cl::desc("Maximum size of ALL constants to promote into a constant pool"), + cl::init(128)); namespace { class ARMCCState : public CCState { @@ -87,6 +97,171 @@ namespace { }; } +void ARMTargetLowering::InitLibcallCallingConvs() { + // The builtins on ARM always use AAPCS, irrespective of wheter C is AAPCS or + // AAPCS_VFP. + for (const auto LC : { + RTLIB::SHL_I16, + RTLIB::SHL_I32, + RTLIB::SHL_I64, + RTLIB::SHL_I128, + RTLIB::SRL_I16, + RTLIB::SRL_I32, + RTLIB::SRL_I64, + RTLIB::SRL_I128, + RTLIB::SRA_I16, + RTLIB::SRA_I32, + RTLIB::SRA_I64, + RTLIB::SRA_I128, + RTLIB::MUL_I8, + RTLIB::MUL_I16, + RTLIB::MUL_I32, + RTLIB::MUL_I64, + RTLIB::MUL_I128, + RTLIB::MULO_I32, + RTLIB::MULO_I64, + RTLIB::MULO_I128, + RTLIB::SDIV_I8, + RTLIB::SDIV_I16, + RTLIB::SDIV_I32, + RTLIB::SDIV_I64, + RTLIB::SDIV_I128, + RTLIB::UDIV_I8, + RTLIB::UDIV_I16, + RTLIB::UDIV_I32, + RTLIB::UDIV_I64, + RTLIB::UDIV_I128, + RTLIB::SREM_I8, + RTLIB::SREM_I16, + RTLIB::SREM_I32, + RTLIB::SREM_I64, + RTLIB::SREM_I128, + RTLIB::UREM_I8, + RTLIB::UREM_I16, + RTLIB::UREM_I32, + RTLIB::UREM_I64, + RTLIB::UREM_I128, + RTLIB::SDIVREM_I8, + RTLIB::SDIVREM_I16, + RTLIB::SDIVREM_I32, + RTLIB::SDIVREM_I64, + RTLIB::SDIVREM_I128, + RTLIB::UDIVREM_I8, + RTLIB::UDIVREM_I16, + RTLIB::UDIVREM_I32, + RTLIB::UDIVREM_I64, + RTLIB::UDIVREM_I128, + RTLIB::NEG_I32, + RTLIB::NEG_I64, + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_F128, + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::POWI_F32, + RTLIB::POWI_F64, + RTLIB::POWI_F80, + RTLIB::POWI_F128, + RTLIB::FPEXT_F64_F128, + RTLIB::FPEXT_F32_F128, + RTLIB::FPEXT_F32_F64, + RTLIB::FPEXT_F16_F32, + RTLIB::FPROUND_F32_F16, + RTLIB::FPROUND_F64_F16, + RTLIB::FPROUND_F80_F16, + RTLIB::FPROUND_F128_F16, + RTLIB::FPROUND_F64_F32, + RTLIB::FPROUND_F80_F32, + RTLIB::FPROUND_F128_F32, + RTLIB::FPROUND_F80_F64, + RTLIB::FPROUND_F128_F64, + RTLIB::FPTOSINT_F32_I32, + RTLIB::FPTOSINT_F32_I64, + RTLIB::FPTOSINT_F32_I128, + RTLIB::FPTOSINT_F64_I32, + RTLIB::FPTOSINT_F64_I64, + RTLIB::FPTOSINT_F64_I128, + RTLIB::FPTOSINT_F80_I32, + RTLIB::FPTOSINT_F80_I64, + RTLIB::FPTOSINT_F80_I128, + RTLIB::FPTOSINT_F128_I32, + RTLIB::FPTOSINT_F128_I64, + RTLIB::FPTOSINT_F128_I128, + RTLIB::FPTOUINT_F32_I32, + RTLIB::FPTOUINT_F32_I64, + RTLIB::FPTOUINT_F32_I128, + RTLIB::FPTOUINT_F64_I32, + RTLIB::FPTOUINT_F64_I64, + RTLIB::FPTOUINT_F64_I128, + RTLIB::FPTOUINT_F80_I32, + RTLIB::FPTOUINT_F80_I64, + RTLIB::FPTOUINT_F80_I128, + RTLIB::FPTOUINT_F128_I32, + RTLIB::FPTOUINT_F128_I64, + RTLIB::FPTOUINT_F128_I128, + RTLIB::SINTTOFP_I32_F32, + RTLIB::SINTTOFP_I32_F64, + RTLIB::SINTTOFP_I32_F80, + RTLIB::SINTTOFP_I32_F128, + RTLIB::SINTTOFP_I64_F32, + RTLIB::SINTTOFP_I64_F64, + RTLIB::SINTTOFP_I64_F80, + RTLIB::SINTTOFP_I64_F128, + RTLIB::SINTTOFP_I128_F32, + RTLIB::SINTTOFP_I128_F64, + RTLIB::SINTTOFP_I128_F80, + RTLIB::SINTTOFP_I128_F128, + RTLIB::UINTTOFP_I32_F32, + RTLIB::UINTTOFP_I32_F64, + RTLIB::UINTTOFP_I32_F80, + RTLIB::UINTTOFP_I32_F128, + RTLIB::UINTTOFP_I64_F32, + RTLIB::UINTTOFP_I64_F64, + RTLIB::UINTTOFP_I64_F80, + RTLIB::UINTTOFP_I64_F128, + RTLIB::UINTTOFP_I128_F32, + RTLIB::UINTTOFP_I128_F64, + RTLIB::UINTTOFP_I128_F80, + RTLIB::UINTTOFP_I128_F128, + RTLIB::OEQ_F32, + RTLIB::OEQ_F64, + RTLIB::OEQ_F128, + RTLIB::UNE_F32, + RTLIB::UNE_F64, + RTLIB::UNE_F128, + RTLIB::OGE_F32, + RTLIB::OGE_F64, + RTLIB::OGE_F128, + RTLIB::OLT_F32, + RTLIB::OLT_F64, + RTLIB::OLT_F128, + RTLIB::OLE_F32, + RTLIB::OLE_F64, + RTLIB::OLE_F128, + RTLIB::OGT_F32, + RTLIB::OGT_F64, + RTLIB::OGT_F128, + RTLIB::UO_F32, + RTLIB::UO_F64, + RTLIB::UO_F128, + RTLIB::O_F32, + RTLIB::O_F64, + RTLIB::O_F128, + }) + setLibcallCallingConv(LC, CallingConv::ARM_AAPCS); +} + // The APCS parameter registers. static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 @@ -103,7 +278,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, } MVT ElemTy = VT.getVectorElementType(); - if (ElemTy != MVT::i64 && ElemTy != MVT::f64) + if (ElemTy != MVT::f64) setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); @@ -174,6 +349,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + InitLibcallCallingConvs(); + if (Subtarget->isTargetMachO()) { // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2() && @@ -565,8 +742,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SDIV, MVT::v8i8, Custom); setOperationAction(ISD::UDIV, MVT::v4i16, Custom); setOperationAction(ISD::UDIV, MVT::v8i8, Custom); - setOperationAction(ISD::SETCC, MVT::v1i64, Expand); - setOperationAction(ISD::SETCC, MVT::v2i64, Expand); // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with // a destination type that is wider than the source, and nor does // it have a FP_TO_[SU]INT instruction with a narrower destination than @@ -803,19 +978,26 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UREM, MVT::i32, Expand); // Register based DivRem for AEABI (RTABI 4.2) if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || - Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) { + Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || + Subtarget->isTargetWindows()) { setOperationAction(ISD::SREM, MVT::i64, Custom); setOperationAction(ISD::UREM, MVT::i64, Custom); HasStandaloneRem = false; - setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); - setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); - setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod"); - setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod"); - setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod"); - setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod"); - setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod"); - setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod"); + for (const auto &LC : + {RTLIB::SDIVREM_I8, RTLIB::SDIVREM_I16, RTLIB::SDIVREM_I32}) + setLibcallName(LC, Subtarget->isTargetWindows() ? "__rt_sdiv" + : "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I64, Subtarget->isTargetWindows() + ? "__rt_sdiv64" + : "__aeabi_ldivmod"); + for (const auto &LC : + {RTLIB::UDIVREM_I8, RTLIB::UDIVREM_I16, RTLIB::UDIVREM_I32}) + setLibcallName(LC, Subtarget->isTargetWindows() ? "__rt_udiv" + : "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I64, Subtarget->isTargetWindows() + ? "__rt_udiv64" + : "__aeabi_uldivmod"); setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS); @@ -835,6 +1017,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UDIVREM, MVT::i32, Expand); } + if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT()) + for (auto &VT : {MVT::f32, MVT::f64}) + setOperationAction(ISD::FPOWI, VT, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); @@ -875,6 +1061,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } else { // If there's anything we can use as a barrier, go through custom lowering // for ATOMIC_FENCE. + // If target has DMB in thumb, Fences can be inserted. + if (Subtarget->hasDataBarrier()) + InsertFencesForAtomic = true; + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Subtarget->hasAnyDataBarrier() ? Custom : Expand); @@ -893,8 +1083,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the // Unordered/Monotonic case. - setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); + if (!InsertFencesForAtomic) { + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); + } } setOperationAction(ISD::PREFETCH, MVT::Other, Custom); @@ -1177,7 +1369,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; - case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK"; + case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK"; case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; case ARMISD::VCEQ: return "ARMISD::VCEQ"; @@ -1236,6 +1428,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; case ARMISD::VBSL: return "ARMISD::VBSL"; case ARMISD::MEMCPY: return "ARMISD::MEMCPY"; + case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP"; case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; @@ -1246,6 +1439,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; + case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD"; case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; @@ -1429,6 +1623,16 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, } } +CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC, + bool isVarArg) const { + return CCAssignFnForNode(CC, false, isVarArg); +} + +CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, + bool isVarArg) const { + return CCAssignFnForNode(CC, true, isVarArg); +} + /// CCAssignFnForNode - Selects the correct CCAssignFn for the given /// CallingConvention. CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, @@ -1464,9 +1668,7 @@ SDValue ARMTargetLowering::LowerCallResult( SmallVector<CCValAssign, 16> RVLocs; ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext(), Call); - CCInfo.AnalyzeCallResult(Ins, - CCAssignFnForNode(CallConv, /* Return*/ true, - isVarArg)); + CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg)); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -1474,7 +1676,7 @@ SDValue ARMTargetLowering::LowerCallResult( // Pass 'this' value directly from the argument to return value, to avoid // reg unit interference - if (i == 0 && isThisReturn && EnableThisRetForwarding) { + if (i == 0 && isThisReturn) { assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && "unexpected return calling convention register assignment"); InVals.push_back(ThisVal); @@ -1627,9 +1829,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<CCValAssign, 16> ArgLocs; ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), Call); - CCInfo.AnalyzeCallOperands(Outs, - CCAssignFnForNode(CallConv, /* Return*/ false, - isVarArg)); + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg)); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -1864,7 +2064,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, auto *BB = CLI.CS->getParent(); bool PreferIndirect = Subtarget->isThumb() && MF.getFunction()->optForMinSize() && - std::count_if(GV->user_begin(), GV->user_end(), [&BB](const User *U) { + count_if(GV->users(), [&BB](const User *U) { return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB; }) > 2; @@ -1880,10 +2080,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Callee = DAG.getNode( ARMISD::WrapperPIC, dl, PtrVt, DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); - Callee = - DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - /* Alignment = */ 0, MachineMemOperand::MOInvariant); + Callee = DAG.getLoad( + PtrVt, dl, DAG.getEntryNode(), Callee, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + /* Alignment = */ 0, MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant); } else if (Subtarget->isTargetCOFF()) { assert(Subtarget->isTargetWindows() && "Windows is the only supported COFF target"); @@ -1977,7 +2178,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) { - MF.getFrameInfo()->setHasTailCall(); + MF.getFrameInfo().setHasTailCall(); return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); } @@ -2060,9 +2261,9 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, /// incoming argument stack. static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, - MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, + MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII) { - unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; + unsigned Bytes = Arg.getValueSizeInBits() / 8; int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); @@ -2094,9 +2295,9 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, return false; assert(FI != INT_MAX); - if (!MFI->isFixedObjectIndex(FI)) + if (!MFI.isFixedObjectIndex(FI)) return false; - return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); + return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); } /// IsEligibleForTailCallOptimization - Check whether the call is eligible @@ -2121,11 +2322,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. - // Do not sibcall optimize vararg calls unless the call site is not passing - // any arguments. - if (isVarArg && !Outs.empty()) - return false; - // Exception-handling functions need a special set of instructions to indicate // a return to the hardware. Tail-calling another function would probably // break this. @@ -2155,8 +2351,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // Check that the call results are passed in the same way. LLVMContext &C = *DAG.getContext(); if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, - CCAssignFnForNode(CalleeCC, true, isVarArg), - CCAssignFnForNode(CallerCC, true, isVarArg))) + CCAssignFnForReturn(CalleeCC, isVarArg), + CCAssignFnForReturn(CallerCC, isVarArg))) return false; // The callee has to preserve all registers the caller needs to preserve. const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); @@ -2181,12 +2377,11 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // argument is passed on the stack. SmallVector<CCValAssign, 16> ArgLocs; ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call); - CCInfo.AnalyzeCallOperands(Outs, - CCAssignFnForNode(CalleeCC, false, isVarArg)); + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); if (CCInfo.getNextStackOffset()) { // Check if the arguments are already laid out in the right way as // the caller's fixed stack objects. - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); @@ -2236,8 +2431,7 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, LLVMContext &Context) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); - return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true, - isVarArg)); + return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); } static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, @@ -2288,8 +2482,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, *DAG.getContext(), Call); // Analyze outgoing return values. - CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, - isVarArg)); + CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); SDValue Flag; SmallVector<SDValue, 4> RetOps; @@ -2537,7 +2730,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, EVT PtrVT = getPointerTy(DAG.getDataLayout()); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); SDValue CPAddr; - bool IsPositionIndependent = isPositionIndependent(); + bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI(); if (!IsPositionIndependent) { CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); } else { @@ -2595,16 +2788,17 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, // The first entry in the descriptor is a function pointer that we must call // to obtain the address of the variable. SDValue Chain = DAG.getEntryNode(); - SDValue FuncTLVGet = - DAG.getLoad(MVT::i32, DL, Chain, DescAddr, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - /* Alignment = */ 4, MachineMemOperand::MONonTemporal | - MachineMemOperand::MOInvariant); + SDValue FuncTLVGet = DAG.getLoad( + MVT::i32, DL, Chain, DescAddr, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + /* Alignment = */ 4, + MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant); Chain = FuncTLVGet.getValue(1); MachineFunction &F = DAG.getMachineFunction(); - MachineFrameInfo *MFI = F.getFrameInfo(); - MFI->setAdjustsStack(true); + MachineFrameInfo &MFI = F.getFrameInfo(); + MFI.setAdjustsStack(true); // TLS calls preserve all registers except those that absolutely must be // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be @@ -2801,12 +2995,171 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("bogus TLS model"); } +/// Return true if all users of V are within function F, looking through +/// ConstantExprs. +static bool allUsersAreInFunction(const Value *V, const Function *F) { + SmallVector<const User*,4> Worklist; + for (auto *U : V->users()) + Worklist.push_back(U); + while (!Worklist.empty()) { + auto *U = Worklist.pop_back_val(); + if (isa<ConstantExpr>(U)) { + for (auto *UU : U->users()) + Worklist.push_back(UU); + continue; + } + + auto *I = dyn_cast<Instruction>(U); + if (!I || I->getParent()->getParent() != F) + return false; + } + return true; +} + +/// Return true if all users of V are within some (any) function, looking through +/// ConstantExprs. In other words, are there any global constant users? +static bool allUsersAreInFunctions(const Value *V) { + SmallVector<const User*,4> Worklist; + for (auto *U : V->users()) + Worklist.push_back(U); + while (!Worklist.empty()) { + auto *U = Worklist.pop_back_val(); + if (isa<ConstantExpr>(U)) { + for (auto *UU : U->users()) + Worklist.push_back(UU); + continue; + } + + if (!isa<Instruction>(U)) + return false; + } + return true; +} + +// Return true if T is an integer, float or an array/vector of either. +static bool isSimpleType(Type *T) { + if (T->isIntegerTy() || T->isFloatingPointTy()) + return true; + Type *SubT = nullptr; + if (T->isArrayTy()) + SubT = T->getArrayElementType(); + else if (T->isVectorTy()) + SubT = T->getVectorElementType(); + else + return false; + return SubT->isIntegerTy() || SubT->isFloatingPointTy(); +} + +static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, + EVT PtrVT, SDLoc dl) { + // If we're creating a pool entry for a constant global with unnamed address, + // and the global is small enough, we can emit it inline into the constant pool + // to save ourselves an indirection. + // + // This is a win if the constant is only used in one function (so it doesn't + // need to be duplicated) or duplicating the constant wouldn't increase code + // size (implying the constant is no larger than 4 bytes). + const Function *F = DAG.getMachineFunction().getFunction(); + + // We rely on this decision to inline being idemopotent and unrelated to the + // use-site. We know that if we inline a variable at one use site, we'll + // inline it elsewhere too (and reuse the constant pool entry). Fast-isel + // doesn't know about this optimization, so bail out if it's enabled else + // we could decide to inline here (and thus never emit the GV) but require + // the GV from fast-isel generated code. + if (!EnableConstpoolPromotion || + DAG.getMachineFunction().getTarget().Options.EnableFastISel) + return SDValue(); + + auto *GVar = dyn_cast<GlobalVariable>(GV); + if (!GVar || !GVar->hasInitializer() || + !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() || + !GVar->hasLocalLinkage()) + return SDValue(); + + // Ensure that we don't try and inline any type that contains pointers. If + // we inline a value that contains relocations, we move the relocations from + // .data to .text which is not ideal. + auto *Init = GVar->getInitializer(); + if (!isSimpleType(Init->getType())) + return SDValue(); + + // The constant islands pass can only really deal with alignment requests + // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote + // any type wanting greater alignment requirements than 4 bytes. We also + // can only promote constants that are multiples of 4 bytes in size or + // are paddable to a multiple of 4. Currently we only try and pad constants + // that are strings for simplicity. + auto *CDAInit = dyn_cast<ConstantDataArray>(Init); + unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType()); + unsigned Align = GVar->getAlignment(); + unsigned RequiredPadding = 4 - (Size % 4); + bool PaddingPossible = + RequiredPadding == 4 || (CDAInit && CDAInit->isString()); + if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize) + return SDValue(); + + unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding); + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + // We can't bloat the constant pool too much, else the ConstantIslands pass + // may fail to converge. If we haven't promoted this global yet (it may have + // multiple uses), and promoting it would increase the constant pool size (Sz + // > 4), ensure we have space to do so up to MaxTotal. + if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4) + if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >= + ConstpoolPromotionMaxTotal) + return SDValue(); + + // This is only valid if all users are in a single function OR it has users + // in multiple functions but it no larger than a pointer. We also check if + // GVar has constant (non-ConstantExpr) users. If so, it essentially has its + // address taken. + if (!allUsersAreInFunction(GVar, F) && + !(Size <= 4 && allUsersAreInFunctions(GVar))) + return SDValue(); + + // We're going to inline this global. Pad it out if needed. + if (RequiredPadding != 4) { + StringRef S = CDAInit->getAsString(); + + SmallVector<uint8_t,16> V(S.size()); + std::copy(S.bytes_begin(), S.bytes_end(), V.begin()); + while (RequiredPadding--) + V.push_back(0); + Init = ConstantDataArray::get(*DAG.getContext(), V); + } + + auto CPVal = ARMConstantPoolConstant::Create(GVar, Init); + SDValue CPAddr = + DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4); + if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) { + AFI->markGlobalAsPromotedToConstantPool(GVar); + AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() + + PaddedSize - 4); + } + ++NumConstpoolPromoted; + return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); +} + SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); const TargetMachine &TM = getTargetMachine(); + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + GV = GA->getBaseObject(); + bool IsRO = + (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) || + isa<Function>(GV); + + // promoteToConstantPool only if not generating XO text section + if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly()) + if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl)) + return V; + if (isPositionIndependent()) { bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); @@ -2833,6 +3186,23 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; + } else if (Subtarget->isROPI() && IsRO) { + // PC-relative. + SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT); + SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); + return Result; + } else if (Subtarget->isRWPI() && !IsRO) { + // SB-relative. + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GV, ARMCP::SBREL); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + SDValue G = DAG.getLoad( + PtrVT, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT); + SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, G); + return Result; } // If we have T2 ops, we can materialize the address directly via movt/movw @@ -2854,6 +3224,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const { + assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && + "ROPI/RWPI not currently supported for Darwin"); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); @@ -2880,6 +3252,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); assert(Subtarget->useMovt(DAG.getMachineFunction()) && "Windows on ARM expects to use movw/movt"); + assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && + "ROPI/RWPI not currently supported for Windows"); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); const ARMII::TOF TargetFlags = @@ -3097,8 +3471,8 @@ SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, SDValue ArgValue2; if (NextVA.isMemLoc()) { - MachineFrameInfo *MFI = MF.getFrameInfo(); - int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); + MachineFrameInfo &MFI = MF.getFrameInfo(); + int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true); // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); @@ -3139,7 +3513,7 @@ int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, // initialize stack frame. MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned RBegin, REnd; if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { @@ -3154,7 +3528,7 @@ int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, ArgOffset = -4 * (ARM::R4 - RBegin); auto PtrVT = getPointerTy(DAG.getDataLayout()); - int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false); + int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false); SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT); SmallVector<SDValue, 4> MemOps; @@ -3200,7 +3574,7 @@ SDValue ARMTargetLowering::LowerFormalArguments( const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -3208,9 +3582,7 @@ SDValue ARMTargetLowering::LowerFormalArguments( SmallVector<CCValAssign, 16> ArgLocs; ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), Prologue); - CCInfo.AnalyzeFormalArguments(Ins, - CCAssignFnForNode(CallConv, /* Return*/ false, - isVarArg)); + CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); SmallVector<SDValue, 16> ArgValues; SDValue ArgValue; @@ -3248,7 +3620,7 @@ SDValue ARMTargetLowering::LowerFormalArguments( CCInfo.rewindByValRegsInfo(); int lastInsIndex = -1; - if (isVarArg && MFI->hasVAStart()) { + if (isVarArg && MFI.hasVAStart()) { unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs); if (RegIdx != array_lengthof(GPRArgRegs)) ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]); @@ -3278,7 +3650,7 @@ SDValue ARMTargetLowering::LowerFormalArguments( VA = ArgLocs[++i]; // skip ahead to next loc SDValue ArgValue2; if (VA.isMemLoc()) { - int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); + int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, MachinePointerInfo::getFixedStack( @@ -3370,8 +3742,8 @@ SDValue ARMTargetLowering::LowerFormalArguments( CCInfo.nextInRegsParam(); } else { unsigned FIOffset = VA.getLocMemOffset(); - int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, - FIOffset, true); + int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8, + FIOffset, true); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); @@ -3385,7 +3757,7 @@ SDValue ARMTargetLowering::LowerFormalArguments( } // varargs - if (isVarArg && MFI->hasVAStart()) + if (isVarArg && MFI.hasVAStart()) VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(), TotalArgRegsSaveSize); @@ -4122,15 +4494,15 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI); Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy)); SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); - if (Subtarget->isThumb2()) { - // Thumb2 uses a two-level jump. That is, it jumps into the jump table + if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) { + // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table // which does another jump to the destination. This also makes it easier - // to translate it to TBB / TBH later. + // to translate it to TBB / TBH later (Thumb2 only). // FIXME: This might not work if the function is extremely large. return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, Addr, Op.getOperand(2), JTI); } - if (isPositionIndependent()) { + if (isPositionIndependent() || Subtarget->isROPI()) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); @@ -4320,8 +4692,8 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MFI->setReturnAddressIsTaken(true); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setReturnAddressIsTaken(true); if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); @@ -4346,8 +4718,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { const ARMBaseRegisterInfo &ARI = *static_cast<const ARMBaseRegisterInfo*>(RegInfo); MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MFI->setFrameAddressIsTaken(true); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful @@ -4520,6 +4892,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); SDValue ARMcc; + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); @@ -4530,15 +4903,23 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, DAG.getConstant(VTBits, dl, MVT::i32)); SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); - - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), - ISD::SETGE, ARMcc, DAG, dl); - SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); - SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, - CCR, Cmp); + SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); + SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), + ISD::SETGE, ARMcc, DAG, dl); + SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, + ARMcc, CCR, CmpLo); + + + SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); + SDValue HiBigShift = Opc == ISD::SRA + ? DAG.getNode(Opc, dl, VT, ShOpHi, + DAG.getConstant(VTBits - 1, dl, VT)) + : DAG.getConstant(0, dl, VT); + SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), + ISD::SETGE, ARMcc, DAG, dl); + SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, + ARMcc, CCR, CmpHi); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); @@ -4556,23 +4937,28 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); SDValue ARMcc; + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); assert(Op.getOpcode() == ISD::SHL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, DAG.getConstant(VTBits, dl, MVT::i32), ShAmt); SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); + SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); + SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, DAG.getConstant(VTBits, dl, MVT::i32)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); - SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); + SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); + SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), + ISD::SETGE, ARMcc, DAG, dl); + SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, + ARMcc, CCR, CmpHi); - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), + SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); - SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); - SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, - CCR, Cmp); + SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); + SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, + DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); @@ -4877,32 +5263,49 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); SDLoc dl(Op); + if (Op0.getValueType().getVectorElementType() == MVT::i64 && + (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) { + // Special-case integer 64-bit equality comparisons. They aren't legal, + // but they can be lowered with a few vector instructions. + unsigned CmpElements = CmpVT.getVectorNumElements() * 2; + EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements); + SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0); + SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1); + SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1, + DAG.getCondCode(ISD::SETEQ)); + SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp); + SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed); + Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged); + if (SetCCOpcode == ISD::SETNE) + Merged = DAG.getNOT(dl, Merged, CmpVT); + Merged = DAG.getSExtOrTrunc(Merged, dl, VT); + return Merged; + } + if (CmpVT.getVectorElementType() == MVT::i64) - // 64-bit comparisons are not legal. We've marked SETCC as non-Custom, - // but it's possible that our operands are 64-bit but our result is 32-bit. - // Bail in this case. + // 64-bit comparisons are not legal in general. return SDValue(); if (Op1.getValueType().isFloatingPoint()) { switch (SetCCOpcode) { default: llvm_unreachable("Illegal FP comparison"); case ISD::SETUNE: - case ISD::SETNE: Invert = true; // Fallthrough + case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; case ISD::SETOEQ: case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETOLT: - case ISD::SETLT: Swap = true; // Fallthrough + case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETOGT: case ISD::SETGT: Opc = ARMISD::VCGT; break; case ISD::SETOLE: - case ISD::SETLE: Swap = true; // Fallthrough + case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETOGE: case ISD::SETGE: Opc = ARMISD::VCGE; break; - case ISD::SETUGE: Swap = true; // Fallthrough + case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; - case ISD::SETUGT: Swap = true; // Fallthrough + case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; - case ISD::SETUEQ: Invert = true; // Fallthrough + case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH; case ISD::SETONE: // Expand this to (OLT | OGT). TmpOp0 = Op0; @@ -4911,7 +5314,9 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1); break; - case ISD::SETUO: Invert = true; // Fallthrough + case ISD::SETUO: + Invert = true; + LLVM_FALLTHROUGH; case ISD::SETO: // Expand this to (OLT | OGE). TmpOp0 = Op0; @@ -5168,11 +5573,28 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { - if (!ST->hasVFP3()) - return SDValue(); - bool IsDouble = Op.getValueType() == MVT::f64; ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); + const APFloat &FPVal = CFP->getValueAPF(); + + // Prevent floating-point constants from using literal loads + // when execute-only is enabled. + if (ST->genExecuteOnly()) { + APInt INTVal = FPVal.bitcastToAPInt(); + SDLoc DL(CFP); + if (IsDouble) { + SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32); + SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32); + if (!ST->isLittle()) + std::swap(Lo, Hi); + return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi); + } else { + return DAG.getConstant(INTVal, DL, MVT::i32); + } + } + + if (!ST->hasVFP3()) + return SDValue(); // Use the default (constant pool) lowering for double constants when we have // an SP-only FPU @@ -5180,7 +5602,6 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, return SDValue(); // Try splatting with a VMOV.f32... - const APFloat &FPVal = CFP->getValueAPF(); int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); if (ImmVal != -1) { @@ -5325,7 +5746,7 @@ static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"); - unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; @@ -5376,7 +5797,7 @@ static bool isVTBLMask(ArrayRef<int> M, EVT VT) { // want to check the low half and high half of the shuffle mask as if it were // the other case static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { - unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; @@ -5411,7 +5832,7 @@ static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ - unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; @@ -5446,7 +5867,7 @@ static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ // Requires similar checks to that of isVTRNMask with // respect the how results are returned. static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { - unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; @@ -5476,7 +5897,7 @@ static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ - unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; @@ -5517,7 +5938,7 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ // Requires similar checks to that of isVTRNMask with respect the how results // are returned. static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { - unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; @@ -5550,7 +5971,7 @@ static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ - unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; @@ -5650,6 +6071,9 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, unsigned SplatBitSize; bool HasAnyUndefs; if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + if (SplatUndef.isAllOnesValue()) + return DAG.getUNDEF(VT); + if (SplatBitSize <= 64) { // Check if an immediate VMOV works. EVT VmovVT; @@ -5732,7 +6156,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + unsigned EltSize = VT.getScalarSizeInBits(); // Use VDUP for non-constant splats. For f32 constant splats, reduce to // i32 and try again. @@ -5811,6 +6235,24 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, return shuffle; } + if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) { + // If we haven't found an efficient lowering, try splitting a 128-bit vector + // into two 64-bit vectors; we might discover a better way to lower it. + SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts); + EVT ExtVT = VT.getVectorElementType(); + EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2); + SDValue Lower = + DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2)); + if (Lower.getOpcode() == ISD::BUILD_VECTOR) + Lower = LowerBUILD_VECTOR(Lower, DAG, ST); + SDValue Upper = DAG.getBuildVector( + HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2)); + if (Upper.getOpcode() == ISD::BUILD_VECTOR) + Upper = LowerBUILD_VECTOR(Upper, DAG, ST); + if (Lower && Upper) + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper); + } + // Vectors with 32- or 64-bit elements can be built by directly assigning // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands // will be legalized. @@ -5896,7 +6338,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // Add this element source to the list if it's not already there. SDValue SourceVec = V.getOperand(0); - auto Source = std::find(Sources.begin(), Sources.end(), SourceVec); + auto Source = find(Sources, SourceVec); if (Source == Sources.end()) Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec)); @@ -5920,7 +6362,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, SmallestEltTy = SrcEltTy; } unsigned ResMultiplier = - VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits(); + VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits(); NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits(); EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts); @@ -6006,13 +6448,13 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // The stars all align, our next step is to produce the mask for the shuffle. SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1); - int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits(); + int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits(); for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { SDValue Entry = Op.getOperand(i); if (Entry.isUndef()) continue; - auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0)); + auto Src = find(Sources, Entry.getOperand(0)); int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue(); // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit @@ -6020,7 +6462,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // segment. EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType(); int BitsDefined = std::min(OrigEltTy.getSizeInBits(), - VT.getVectorElementType().getSizeInBits()); + VT.getScalarSizeInBits()); int LanesDefined = BitsDefined / BitsPerShuffleLane; // This source is expected to fill ResMultiplier lanes of the final shuffle, @@ -6080,7 +6522,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, bool ReverseVEXT, isV_UNDEF; unsigned Imm, WhichResult; - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + unsigned EltSize = VT.getScalarSizeInBits(); return (EltSize >= 32 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isVREVMask(M, VT, 64) || @@ -6223,7 +6665,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // of the same time so that they get CSEd properly. ArrayRef<int> ShuffleMask = SVN->getMask(); - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + unsigned EltSize = VT.getScalarSizeInBits(); if (EltSize <= 32) { if (SVN->isSplat()) { int Lane = SVN->getSplatIndex(); @@ -6309,7 +6751,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { EVT SubVT = SubV1.getValueType(); // We expect these to have been canonicalized to -1. - assert(std::all_of(ShuffleMask.begin(), ShuffleMask.end(), [&](int i) { + assert(all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"); @@ -6397,8 +6839,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { return SDValue(); SDValue Vec = Op.getOperand(0); - if (Op.getValueType() == MVT::i32 && - Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { + if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) { SDLoc dl(Op); return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); } @@ -6463,7 +6904,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *Elt = N->getOperand(i).getNode(); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + unsigned EltSize = VT.getScalarSizeInBits(); unsigned HalfSize = EltSize / 2; if (isSigned) { if (!isIntN(HalfSize, C->getSExtValue())) @@ -6590,7 +7031,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { // Construct a new BUILD_VECTOR with elements truncated to half the size. assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); EVT VT = N->getValueType(0); - unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; + unsigned EltSize = VT.getScalarSizeInBits() / 2; unsigned NumElts = VT.getVectorNumElements(); MVT TruncVT = MVT::getIntegerVT(EltSize); SmallVector<SDValue, 8> Ops; @@ -6915,7 +7356,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); auto PtrVT = getPointerTy(DAG.getDataLayout()); - MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Pair of floats / doubles used to pass the result. @@ -6929,7 +7370,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { // Create stack object for sret. const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); - int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); + int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL)); ArgListEntry Entry; @@ -7029,6 +7470,19 @@ SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK); } +static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) { + SDLoc DL(N); + SDValue Op = N->getOperand(1); + if (N->getValueType(0) == MVT::i32) + return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op, + DAG.getConstant(0, DL, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op, + DAG.getConstant(1, DL, MVT::i32)); + return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, + DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi)); +} + void ARMTargetLowering::ExpandDIV_Windows( SDValue Op, SelectionDAG &DAG, bool Signed, SmallVectorImpl<SDValue> &Results) const { @@ -7039,14 +7493,7 @@ void ARMTargetLowering::ExpandDIV_Windows( "unexpected type for custom lowering DIV"); SDLoc dl(Op); - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1), - DAG.getConstant(0, dl, MVT::i32)); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1), - DAG.getConstant(1, dl, MVT::i32)); - SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i32, Lo, Hi); - - SDValue DBZCHK = - DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, DAG.getEntryNode(), Or); + SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode()); SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK); @@ -7132,11 +7579,66 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N, Results.push_back(SDValue(CmpSwap, 2)); } +static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, + SelectionDAG &DAG) { + const auto &TLI = DAG.getTargetLoweringInfo(); + + assert(Subtarget.getTargetTriple().isOSMSVCRT() && + "Custom lowering is MSVCRT specific!"); + + SDLoc dl(Op); + SDValue Val = Op.getOperand(0); + MVT Ty = Val->getSimpleValueType(0); + SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1)); + SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow", + TLI.getPointerTy(DAG.getDataLayout())); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + Entry.Node = Val; + Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isZExt = true; + Args.push_back(Entry); + + Entry.Node = Exponent; + Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isZExt = true; + Args.push_back(Entry); + + Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext()); + + // In the in-chain to the call is the entry node If we are emitting a + // tailcall, the chain will be mutated if the node has a non-entry input + // chain. + SDValue InChain = DAG.getEntryNode(); + SDValue TCChain = InChain; + + const auto *F = DAG.getMachineFunction().getFunction(); + bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) && + F->getReturnType() == LCRTy; + if (IsTC) + InChain = TCChain; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(InChain) + .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args)) + .setTailCall(IsTC); + std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI); + + // Return the chain (the DAG root) if it is a tail call + return !CI.second.getNode() ? DAG.getRoot() : CI.first; +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); - case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::ConstantPool: + if (Subtarget->genExecuteOnly()) + llvm_unreachable("execute-only should not generate constant pools"); + return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: switch (Subtarget->getTargetTriple().getObjectFormat()) { @@ -7218,6 +7720,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("Don't know how to custom lower this!"); case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG); case ARMISD::WIN__DBZCHK: return SDValue(); } } @@ -7278,6 +7781,8 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const { + assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && + "ROPI/RWPI not currently supported with SjLj"); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); @@ -7396,8 +7901,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); - MachineFrameInfo *MFI = MF->getFrameInfo(); - int FI = MFI->getFunctionContextIndex(); + MachineFrameInfo &MFI = MF->getFrameInfo(); + int FI = MFI.getFunctionContextIndex(); const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass : &ARM::GPRnopcRegClass; @@ -7406,7 +7911,6 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, // associated with. DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; unsigned MaxCSNum = 0; - MachineModuleInfo &MMI = MF->getMMI(); for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { if (!BB->isEHPad()) continue; @@ -7418,9 +7922,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, if (!II->isEHLabel()) continue; MCSymbol *Sym = II->getOperand(0).getMCSymbol(); - if (!MMI.hasCallSiteLandingPad(Sym)) continue; + if (!MF->hasCallSiteLandingPad(Sym)) continue; - SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym); + SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym); for (SmallVectorImpl<unsigned>::iterator CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); CSI != CSE; ++CSI) { @@ -7491,8 +7995,10 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); // Add a register mask with no preserved registers. This results in all - // registers being marked as clobbered. - MIB.addRegMask(RI.getNoPreservedMask()); + // registers being marked as clobbered. This can't work if the dispatch block + // is in a Thumb1 function and is linked with ARM code which uses the FP + // registers, as there is no way to preserve the FP registers in Thumb1 mode. + MIB.addRegMask(RI.getSjLjDispatchPreservedMask(*MF)); bool IsPositionIndependent = isPositionIndependent(); unsigned NumLPads = LPadList.size(); @@ -7911,6 +8417,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, bool IsThumb1 = Subtarget->isThumb1Only(); bool IsThumb2 = Subtarget->isThumb2(); + bool IsThumb = Subtarget->isThumb(); if (Align & 1) { UnitSize = 1; @@ -7932,7 +8439,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, // Select the correct opcode and register class for unit size load/store bool IsNeon = UnitSize >= 8; - TRC = (IsThumb1 || IsThumb2) ? &ARM::tGPRRegClass : &ARM::GPRRegClass; + TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass; if (IsNeon) VecTRC = UnitSize == 16 ? &ARM::DPairRegClass : UnitSize == 8 ? &ARM::DPRRegClass @@ -8014,12 +8521,12 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, if ((LoopSize & 0xFFFF0000) != 0) Vtmp = MRI.createVirtualRegister(TRC); AddDefaultPred(BuildMI(BB, dl, - TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16), + TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp).addImm(LoopSize & 0xFFFF)); if ((LoopSize & 0xFFFF0000) != 0) AddDefaultPred(BuildMI(BB, dl, - TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16), + TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd) .addReg(Vtmp) .addImm(LoopSize >> 16)); @@ -8034,7 +8541,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); - if (IsThumb1) + if (IsThumb) AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg( varEnd, RegState::Define).addConstantPoolIndex(Idx)); else @@ -8201,17 +8708,20 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI, ContBB->splice(ContBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); ContBB->transferSuccessorsAndUpdatePHIs(MBB); + MBB->addSuccessor(ContBB); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); + BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0)); MF->push_back(TrapBB); - BuildMI(TrapBB, DL, TII->get(ARM::t2UDF)).addImm(249); MBB->addSuccessor(TrapBB); - BuildMI(*MBB, MI, DL, TII->get(ARM::tCBZ)) - .addReg(MI.getOperand(0).getReg()) - .addMBB(TrapBB); - AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::t2B)).addMBB(ContBB)); - MBB->addSuccessor(ContBB); + AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8)) + .addReg(MI.getOperand(0).getReg()) + .addImm(0)); + BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc)) + .addMBB(TrapBB) + .addImm(ARMCC::EQ) + .addReg(ARM::CPSR); MI.eraseFromParent(); return ContBB; @@ -8635,7 +9145,7 @@ static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, // (zext cc) can never be the all ones value. if (AllOnes) return false; - // Fall through. + LLVM_FALLTHROUGH; case ISD::SIGN_EXTEND: { SDLoc dl(N); EVT VT = N->getValueType(0); @@ -8962,7 +9472,8 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, // be combined into a UMLAL. The other pattern is AddcNode being combined // into an UMLAL and then using another addc is handled in ISelDAGToDAG. - if (!Subtarget->hasV6Ops()) + if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || + (Subtarget->isThumb() && !Subtarget->hasThumb2())) return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); SDNode *PrevAddc = nullptr; @@ -9964,6 +10475,7 @@ static SDValue CombineBaseUpdate(SDNode *N, isLaneOp = true; switch (N->getOpcode()) { default: llvm_unreachable("unexpected opcode for Neon base update"); + case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break; case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; @@ -10078,8 +10590,8 @@ static SDValue CombineBaseUpdate(SDNode *N, StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal); } - SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, - Ops, AlignedVecTy, + EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy; + SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT, MemN->getMemOperand()); // Update the uses. @@ -10211,19 +10723,44 @@ static SDValue PerformVDUPLANECombine(SDNode *N, return SDValue(); // Make sure the VMOV element size is not bigger than the VDUPLANE elements. - unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); + unsigned EltSize = Op.getScalarValueSizeInBits(); // The canonical VMOV for a zero vector uses a 32-bit element size. unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); unsigned EltBits; if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) EltSize = 8; EVT VT = N->getValueType(0); - if (EltSize > VT.getVectorElementType().getSizeInBits()) + if (EltSize > VT.getScalarSizeInBits()) return SDValue(); return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } +/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP. +static SDValue PerformVDUPCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + SDValue Op = N->getOperand(0); + + // Match VDUP(LOAD) -> VLD1DUP. + // We match this pattern here rather than waiting for isel because the + // transform is only legal for unindexed loads. + LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()); + if (LD && Op.hasOneUse() && LD->isUnindexed() && + LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) { + SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1), + DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) }; + SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other); + SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, + Ops, LD->getMemoryVT(), + LD->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1)); + return VLDDup; + } + + return SDValue(); +} + static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { EVT VT = N->getValueType(0); @@ -10255,8 +10792,8 @@ static SDValue PerformSTORECombine(SDNode *N, EVT StVT = St->getMemoryVT(); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); - unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); - unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); + unsigned FromEltSz = VT.getScalarSizeInBits(); + unsigned ToEltSz = StVT.getScalarSizeInBits(); // From, To sizes and ElemCount must be pow of two if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); @@ -10524,7 +11061,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { /// 0 <= Value <= ElementBits for a long left shift. static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); - int64_t ElementBits = VT.getVectorElementType().getSizeInBits(); + int64_t ElementBits = VT.getScalarSizeInBits(); if (! getVShiftImm(Op, ElementBits, Cnt)) return false; return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); @@ -10539,7 +11076,7 @@ static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); - int64_t ElementBits = VT.getVectorElementType().getSizeInBits(); + int64_t ElementBits = VT.getScalarSizeInBits(); if (! getVShiftImm(Op, ElementBits, Cnt)) return false; if (!isIntrinsic) @@ -11051,6 +11588,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); + case ARMISD::VDUP: return PerformVDUPCombine(N, DCI); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI.DAG, Subtarget); @@ -11066,6 +11604,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG); case ISD::LOAD: return PerformLOADCombine(N, DCI); + case ARMISD::VLD1DUP: case ARMISD::VLD2DUP: case ARMISD::VLD3DUP: case ARMISD::VLD4DUP: @@ -11234,6 +11773,17 @@ bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { return true; } +int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AS) const { + if (isLegalAddressingMode(DL, AM, Ty, AS)) { + if (Subtarget->hasFPAO()) + return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster + return 0; + } + return -1; +} + static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) @@ -11384,7 +11934,7 @@ bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, case 1: if (Subtarget->isThumb1Only()) return false; - // FALL THROUGH. + LLVM_FALLTHROUGH; default: // ARM doesn't support any R+R*scale+imm addr modes. if (AM.BaseOffs) @@ -11682,7 +12232,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT(); - unsigned MemBits = VT.getScalarType().getSizeInBits(); + unsigned MemBits = VT.getScalarSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); return; } @@ -12043,7 +12593,7 @@ static RTLIB::Libcall getDivRemLibcall( } static TargetLowering::ArgListTy getDivRemArgList( - const SDNode *N, LLVMContext *Context) { + const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) { assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemArgList"); @@ -12060,12 +12610,15 @@ static TargetLowering::ArgListTy getDivRemArgList( Entry.isZExt = !isSigned; Args.push_back(Entry); } + if (Subtarget->isTargetWindows() && Args.size() >= 2) + std::swap(Args[0], Args[1]); return Args; } SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || - Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) && + Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || + Subtarget->isTargetWindows()) && "Register-based DivRem lowering only"); unsigned Opcode = Op->getOpcode(); assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && @@ -12073,20 +12626,42 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { bool isSigned = (Opcode == ISD::SDIVREM); EVT VT = Op->getValueType(0); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + SDLoc dl(Op); + + // If the target has hardware divide, use divide + multiply + subtract: + // div = a / b + // rem = a - b * div + // return {div, rem} + // This should be lowered into UDIV/SDIV + MLS later on. + if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() && + Op->getSimpleValueType(0) == MVT::i32) { + unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; + const SDValue Dividend = Op->getOperand(0); + const SDValue Divisor = Op->getOperand(1); + SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor); + SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor); + SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul); + + SDValue Values[2] = {Div, Rem}; + return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values); + } RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(), VT.getSimpleVT().SimpleTy); SDValue InChain = DAG.getEntryNode(); TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(), - DAG.getContext()); + DAG.getContext(), + Subtarget); SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr); - SDLoc dl(Op); + if (Subtarget->isTargetWindows()) + InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain); + TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) @@ -12119,11 +12694,15 @@ SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const { RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT(). SimpleTy); SDValue InChain = DAG.getEntryNode(); - TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext()); + TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext(), + Subtarget); bool isSigned = N->getOpcode() == ISD::SREM; SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); + if (Subtarget->isTargetWindows()) + InChain = WinDBZCheckDenominator(DAG, N, InChain); + // Lower call CallLoweringInfo CLI(DAG); CLI.setChain(InChain) @@ -12342,6 +12921,14 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return true; } +bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, + unsigned Index) const { + if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) + return false; + + return (Index == 0 || Index == ResVT.getVectorNumElements()); +} + Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); @@ -12443,7 +13030,8 @@ ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return (Size <= (Subtarget->isMClass() ? 32U : 64U)) + bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); + return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; } @@ -12455,7 +13043,9 @@ bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR( // on the stack and close enough to the spill slot, this can lead to a // situation where the monitor always gets cleared and the atomic operation // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. - return getTargetMachine().getOptLevel() != 0; + bool hasAtomicCmpXchg = + !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); + return getTargetMachine().getOptLevel() != 0 && hasAtomicCmpXchg; } bool ARMTargetLowering::shouldInsertFencesForAtomic( @@ -12681,6 +13271,17 @@ static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start, /// /// Note that the new shufflevectors will be removed and we'll only generate one /// vst3 instruction in CodeGen. +/// +/// Example for a more general valid mask (Factor 3). Lower: +/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1, +/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19> +/// store <12 x i32> %i.vec, <12 x i32>* %ptr +/// +/// Into: +/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7> +/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35> +/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19> +/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4) bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const { @@ -12691,9 +13292,9 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, assert(VecTy->getVectorNumElements() % Factor == 0 && "Invalid interleaved store"); - unsigned NumSubElts = VecTy->getVectorNumElements() / Factor; + unsigned LaneLen = VecTy->getVectorNumElements() / Factor; Type *EltTy = VecTy->getVectorElementType(); - VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); + VectorType *SubVecTy = VectorType::get(EltTy, LaneLen); const DataLayout &DL = SI->getModule()->getDataLayout(); unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); @@ -12720,7 +13321,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, Op0 = Builder.CreatePtrToInt(Op0, IntVecTy); Op1 = Builder.CreatePtrToInt(Op1, IntVecTy); - SubVecTy = VectorType::get(IntTy, NumSubElts); + SubVecTy = VectorType::get(IntTy, LaneLen); } static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2, @@ -12736,9 +13337,28 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, SI->getModule(), StoreInts[Factor - 2], Tys); // Split the shufflevector operands into sub vectors for the new vstN call. - for (unsigned i = 0; i < Factor; i++) - Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts))); + auto Mask = SVI->getShuffleMask(); + for (unsigned i = 0; i < Factor; i++) { + if (Mask[i] >= 0) { + Ops.push_back(Builder.CreateShuffleVector( + Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen))); + } else { + unsigned StartMask = 0; + for (unsigned j = 1; j < LaneLen; j++) { + if (Mask[j*Factor + i] >= 0) { + StartMask = Mask[j*Factor + i] - j; + break; + } + } + // Note: If all elements in a chunk are undefs, StartMask=0! + // Note: Filling undef gaps with random elements is ok, since + // those elements were being written anyway (with undefs). + // In the case of all undefs we're defaulting to using elems from 0 + // Note: StartMask cannot be negative, it's checked in isReInterleaveMask + Ops.push_back(Builder.CreateShuffleVector( + Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen))); + } + } Ops.push_back(Builder.getInt32(SI->getAlignment())); Builder.CreateCall(VstNFunc, Ops); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 4906686616bc..5255d82d647a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -190,7 +190,8 @@ namespace llvm { MEMCPY, // Vector load N-element structure to all lanes: - VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE, + VLD1DUP = ISD::FIRST_TARGET_MEMORY_OPCODE, + VLD2DUP, VLD3DUP, VLD4DUP, @@ -202,6 +203,7 @@ namespace llvm { VLD2LN_UPD, VLD3LN_UPD, VLD4LN_UPD, + VLD1DUP_UPD, VLD2DUP_UPD, VLD3DUP_UPD, VLD4DUP_UPD, @@ -291,6 +293,14 @@ namespace llvm { /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; + + /// getScalingFactorCost - Return the cost of the scaling used in + /// addressing mode represented by AM. + /// If the AM is supported, the return value must be >= 0. + /// If the AM is not supported, the return value must be negative. + int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, + unsigned AS) const override; + bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; /// isLegalICmpImmediate - Return true if the specified immediate is legal @@ -421,6 +431,10 @@ namespace llvm { bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; + /// Return true if EXTRACT_SUBVECTOR is cheap for this result type + /// with this index. + bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override; + /// \brief Returns true if an argument of type Ty needs to be passed in a /// contiguous block of registers in calling convention CallConv. bool functionArgumentNeedsConsecutiveRegisters( @@ -482,6 +496,9 @@ namespace llvm { return HasStandaloneRem; } + CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const; + CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const; + protected: std::pair<const TargetRegisterClass *, uint8_t> findRepresentativeClass(const TargetRegisterInfo *TRI, @@ -506,6 +523,8 @@ namespace llvm { bool HasStandaloneRem = true; + void InitLibcallCallingConvs(); + void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); void addDRTypeForNEON(MVT VT); void addQRTypeForNEON(MVT VT); diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td index 37a83f70a1fb..488439fc24e0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -398,6 +398,14 @@ class tPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin, list<Predicate> Predicates = [IsThumb]; } +// PseudoInst that's in ARMv8-M baseline (Somewhere between Thumb and Thumb2) +class t2basePseudoInst<dag oops, dag iops, int sz, InstrItinClass itin, + list<dag> pattern> + : PseudoInst<oops, iops, itin, pattern> { + let Size = sz; + list<Predicate> Predicates = [IsThumb,HasV8MBaseline]; +} + // PseudoInst that's Thumb2-mode only. class t2PseudoInst<dag oops, dag iops, int sz, InstrItinClass itin, list<dag> pattern> @@ -999,6 +1007,15 @@ class VFPPat<dag pattern, dag result> : Pat<pattern, result> { class VFPNoNEONPat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [HasVFP2, DontUseNEONForFP]; } +class Thumb2DSPPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsThumb2, HasDSP]; +} +class Thumb2DSPMulPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsThumb2, UseMulOps, HasDSP]; +} +class Thumb2ExtractPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsThumb2, HasT2ExtractPack]; +} //===----------------------------------------------------------------------===// // Thumb Instruction Format Definitions. // diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp index 98b1b4ca4272..27b64322dfa9 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -123,7 +123,9 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const { MIB = BuildMI(MBB, MI, DL, get(ARM::MOV_ga_pcrel_ldr), Reg) .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); - auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; + auto Flags = MachineMemOperand::MOLoad | + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant; MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4); MIB.addMemOperand(MMO); diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index c9735f3ec277..c47393990e97 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -330,6 +330,8 @@ def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&" def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">; def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">; +def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -358,7 +360,23 @@ def imm16_31 : ImmLeaf<i32, [{ // sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. def sext_16_node : PatLeaf<(i32 GPR:$a), [{ - return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17; + if (CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17) + return true; + + if (N->getOpcode() != ISD::SRA) + return false; + if (N->getOperand(0).getOpcode() != ISD::SHL) + return false; + + auto *ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!ShiftVal || ShiftVal->getZExtValue() != 16) + return false; + + ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(0)->getOperand(1)); + if (!ShiftVal || ShiftVal->getZExtValue() != 16) + return false; + + return true; }]>; /// Split a 32-bit immediate into two 16 bit parts. @@ -3400,6 +3418,12 @@ def SXTAB : AI_exta_rrot<0b01101010, def SXTAH : AI_exta_rrot<0b01101011, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; +def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)), + (SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), + i16)), + (SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; + def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">; def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">; @@ -3427,6 +3451,11 @@ def UXTAB : AI_exta_rrot<0b01101110, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; def UXTAH : AI_exta_rrot<0b01101111, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; + +def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)), + (UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), + (UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; } // This isn't safe in general, the add is two 16-bit units, not a 32-bit add. @@ -3471,6 +3500,7 @@ def UBFX : I<(outs GPRnopc:$Rd), // Arithmetic Instructions. // +let isAdd = 1 in defm ADD : AsI1_bin_irs<0b0100, "add", IIC_iALUi, IIC_iALUr, IIC_iALUsr, add, 1>; defm SUB : AsI1_bin_irs<0b0010, "sub", @@ -3486,9 +3516,11 @@ defm SUB : AsI1_bin_irs<0b0010, "sub", // FIXME: Eliminate ADDS/SUBS pseudo opcodes after adding tablegen // support for an optional CPSR definition that corresponds to the DAG // node's second value. We can then eliminate the implicit def of CPSR. +let isAdd = 1 in defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMaddc, 1>; defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMsubc>; +let isAdd = 1 in defm ADC : AI1_adde_sube_irs<0b0101, "adc", ARMadde, 1>; defm SBC : AI1_adde_sube_irs<0b0110, "sbc", ARMsube>; @@ -5492,45 +5524,22 @@ def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>; def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; // smul* and smla* -def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16))), - (SMULBB GPR:$a, GPR:$b)>; def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), (SMULBB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra GPR:$b, (i32 16))), - (SMULBT GPR:$a, GPR:$b)>; def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), (SMULBT GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16))), - (SMULTB GPR:$a, GPR:$b)>; def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), (SMULTB GPR:$a, GPR:$b)>; - -def : ARMV5MOPat<(add GPR:$acc, - (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16)))), - (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)), (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5MOPat<(add GPR:$acc, - (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra GPR:$b, (i32 16)))), - (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5MOPat<(add GPR:$acc, - (mul (sra GPR:$a, (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16)))), - (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; - // Pre-v7 uses MCR for synchronization barriers. def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, Requires<[IsARM, HasV6]>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index defef4ea9073..b5fa8e999e2a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -610,14 +610,14 @@ def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ def VLDMQIA : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), IIC_fpLoad_m, "", - [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; + [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>; // Use VSTM to store a Q register as a D register pair. // This is a pseudo instruction that is expanded to VSTMD after reg alloc. def VSTMQIA : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), IIC_fpStore_m, "", - [(store (v2f64 DPair:$src), GPR:$Rn)]>; + [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>; // Classes for VLD* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -6849,6 +6849,16 @@ let Predicates = [IsBE] in { def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; } +// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian +def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), + (VREV64q8 (VLD1q8 addrmode6:$addr))>, Requires<[IsBE]>; +def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>, Requires<[IsBE]>; +def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), + (VREV64q16 (VLD1q16 addrmode6:$addr))>, Requires<[IsBE]>; +def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>, Requires<[IsBE]>; + // Fold extracting an element out of a v2i32 into a vfp register. def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td index 93a174f3678a..a681f64b05e6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -904,49 +904,51 @@ class T1sItGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin, let Inst{7-0} = imm8; } -// Add with carry register -let isCommutable = 1, Uses = [CPSR] in -def tADC : // A8.6.2 - T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, - "adc", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; - -// Add immediate -def tADDi3 : // A8.6.4 T1 - T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3), - IIC_iALUi, - "add", "\t$Rd, $Rm, $imm3", - [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]>, - Sched<[WriteALU]> { - bits<3> imm3; - let Inst{8-6} = imm3; -} - -def tADDi8 : // A8.6.4 T2 - T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), - (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi, - "add", "\t$Rdn, $imm8", - [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>, - Sched<[WriteALU]>; +let isAdd = 1 in { + // Add with carry register + let isCommutable = 1, Uses = [CPSR] in + def tADC : // A8.6.2 + T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, + "adc", "\t$Rdn, $Rm", + [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; + + // Add immediate + def tADDi3 : // A8.6.4 T1 + T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3), + IIC_iALUi, + "add", "\t$Rd, $Rm, $imm3", + [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]>, + Sched<[WriteALU]> { + bits<3> imm3; + let Inst{8-6} = imm3; + } -// Add register -let isCommutable = 1 in -def tADDrr : // A8.6.6 T1 - T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), - IIC_iALUr, - "add", "\t$Rd, $Rn, $Rm", - [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; - -let hasSideEffects = 0 in -def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr, - "add", "\t$Rdn, $Rm", []>, - T1Special<{0,0,?,?}>, Sched<[WriteALU]> { - // A8.6.6 T2 - bits<4> Rdn; - bits<4> Rm; - let Inst{7} = Rdn{3}; - let Inst{6-3} = Rm; - let Inst{2-0} = Rdn{2-0}; + def tADDi8 : // A8.6.4 T2 + T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), + (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi, + "add", "\t$Rdn, $imm8", + [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>, + Sched<[WriteALU]>; + + // Add register + let isCommutable = 1 in + def tADDrr : // A8.6.6 T1 + T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iALUr, + "add", "\t$Rd, $Rn, $Rm", + [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; + + let hasSideEffects = 0 in + def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr, + "add", "\t$Rdn, $Rm", []>, + T1Special<{0,0,?,?}>, Sched<[WriteALU]> { + // A8.6.6 T2 + bits<4> Rdn; + bits<4> Rm; + let Inst{7} = Rdn{3}; + let Inst{6-3} = Rm; + let Inst{2-0} = Rdn{2-0}; + } } // AND register @@ -1259,6 +1261,13 @@ def tUDF : TI<(outs), (ins imm0_255:$imm8), IIC_Br, "udf\t$imm8", let Inst{7-0} = imm8; } +def t__brkdiv0 : TI<(outs), (ins), IIC_Br, "__brkdiv0", + [(int_arm_undefined 249)]>, Encoding16, + Requires<[IsThumb, IsWindows]> { + let Inst = 0xdef9; + let isTerminator = 1; +} + // Zero-extend byte def tUXTB : // A8.6.262 T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), @@ -1306,6 +1315,18 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p), 2, IIC_iALUi, []>, Sched<[WriteALU]>; +// Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them +// and make use of the same compressed jump table format as Thumb-2. +let Size = 2 in { +def tTBB_JT : tPseudoInst<(outs), + (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, + Sched<[WriteBr]>; + +def tTBH_JT : tPseudoInst<(outs), + (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, + Sched<[WriteBr]>; +} + //===----------------------------------------------------------------------===// // TLS Instructions // diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index db8b9fb923bf..603d66403e65 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -536,9 +536,9 @@ class T2FourReg<dag oops, dag iops, InstrItinClass itin, } class T2MulLong<bits<3> opc22_20, bits<4> opc7_4, - dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : T2I<oops, iops, itin, opc, asm, pattern> { + string opc, list<dag> pattern> + : T2I<(outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64, + opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern> { bits<4> RdLo; bits<4> RdHi; bits<4> Rn; @@ -552,10 +552,11 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4, let Inst{7-4} = opc7_4; let Inst{3-0} = Rm; } -class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4, - dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : T2I<oops, iops, itin, opc, asm, pattern> { +class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4, string opc> + : T2I<(outs rGPR:$RdLo, rGPR:$RdHi), + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, + opc, "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi"> { bits<4> RdLo; bits<4> RdHi; bits<4> Rn; @@ -1983,12 +1984,19 @@ def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">; // A simple right-shift can also be used in most cases (the exception is the // SXTH operations with a rotate of 24: there the non-contiguous bits are // relevant). -def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)), - (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, - Requires<[HasT2ExtractPack, IsThumb2]>; -def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), i16)), - (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, - Requires<[HasT2ExtractPack, IsThumb2]>; +def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg + (srl rGPR:$Rm, rot_imm:$rot), i8)), + (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg + (srl rGPR:$Rm, imm8_or_16:$rot), i16)), + (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg + (rotr rGPR:$Rm, (i32 24)), i16)), + (t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>; +def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg + (or (srl rGPR:$Rm, (i32 24)), + (shl rGPR:$Rm, (i32 8))), i16)), + (t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>; // Zero extenders @@ -2017,12 +2025,12 @@ def t2UXTAH : T2I_exta_rrot<0b001, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">; -def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)), - (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, - Requires<[HasT2ExtractPack, IsThumb2]>; -def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), - (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, - Requires<[HasT2ExtractPack, IsThumb2]>; +def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), + 0xFF)), + (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), + 0xFFFF)), + (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; } @@ -2030,6 +2038,7 @@ def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), // Arithmetic Instructions. // +let isAdd = 1 in defm t2ADD : T2I_bin_ii12rs<0b000, "add", add, 1>; defm t2SUB : T2I_bin_ii12rs<0b101, "sub", sub>; @@ -2546,367 +2555,194 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, let Inst{7-4} = 0b0000; // Multiply } -def t2MLA: T2FourReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, - "mla", "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]>, - Requires<[IsThumb2, UseMulOps]> { +class T2FourRegMLA<bits<4> op7_4, string opc, list<dag> pattern> + : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, + opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>, + Requires<[IsThumb2, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; - let Inst{7-4} = 0b0000; // Multiply + let Inst{7-4} = op7_4; } -def t2MLS: T2FourReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, - "mls", "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]>, - Requires<[IsThumb2, UseMulOps]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b000; - let Inst{7-4} = 0b0001; // Multiply and Subtract -} +def t2MLA : T2FourRegMLA<0b0000, "mla", + [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), + rGPR:$Ra))]>; +def t2MLS: T2FourRegMLA<0b0001, "mls", + [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, + rGPR:$Rm)))]>; // Extra precision multiplies with low / high results let hasSideEffects = 0 in { let isCommutable = 1 in { -def t2SMULL : T2MulLong<0b000, 0b0000, - (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64, - "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>; - -def t2UMULL : T2MulLong<0b010, 0b0000, - (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64, - "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>; +def t2SMULL : T2MulLong<0b000, 0b0000, "smull", []>; +def t2UMULL : T2MulLong<0b010, 0b0000, "umull", []>; } // isCommutable // Multiply + accumulate -def t2SMLAL : T2MlaLong<0b100, 0b0000, - (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, - "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">; - -def t2UMLAL : T2MlaLong<0b110, 0b0000, - (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, - "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">; - -def t2UMAAL : T2MulLong<0b110, 0b0110, - (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, - "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, - Requires<[IsThumb2, HasDSP]>; +def t2SMLAL : T2MlaLong<0b100, 0b0000, "smlal">; +def t2UMLAL : T2MlaLong<0b110, 0b0000, "umlal">; +def t2UMAAL : T2MlaLong<0b110, 0b0110, "umaal">, Requires<[IsThumb2, HasDSP]>; } // hasSideEffects // Rounding variants of the below included for disassembly only // Most significant word multiply -def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, - "smmul", "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>, - Requires<[IsThumb2, HasDSP]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b101; - let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) - let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) -} - -def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, - "smmulr", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]> { +class T2SMMUL<bits<4> op7_4, string opc, list<dag> pattern> + : T2ThreeReg<(outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, + opc, "\t$Rd, $Rn, $Rm", pattern>, + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) - let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) + let Inst{7-4} = op7_4; } +def t2SMMUL : T2SMMUL<0b0000, "smmul", [(set rGPR:$Rd, (mulhs rGPR:$Rn, + rGPR:$Rm))]>; +def t2SMMULR : T2SMMUL<0b0001, "smmulr", []>; -def t2SMMLA : T2FourReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, - "smmla", "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>, +class T2FourRegSMMLA<bits<3> op22_20, bits<4> op7_4, string opc, + list<dag> pattern> + : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, + opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>, Requires<[IsThumb2, HasDSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b101; - let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) -} - -def t2SMMLAR: T2FourReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, - "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasDSP]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b101; - let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) -} - -def t2SMMLS: T2FourReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, - "smmls", "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>, - Requires<[IsThumb2, HasDSP, UseMulOps]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b110; - let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) -} - -def t2SMMLSR:T2FourReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, - "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasDSP]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b110; - let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) + let Inst{22-20} = op22_20; + let Inst{7-4} = op7_4; } -multiclass T2I_smul<string opc, SDNode opnode> { - def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, - !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16), - (sext_inreg rGPR:$Rm, i16)))]>, - Requires<[IsThumb2, HasDSP]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b001; - let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) - let Inst{7-6} = 0b00; - let Inst{5-4} = 0b00; - } - - def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, - !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16), - (sra rGPR:$Rm, (i32 16))))]>, - Requires<[IsThumb2, HasDSP]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b001; - let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) - let Inst{7-6} = 0b00; - let Inst{5-4} = 0b01; - } - - def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, - !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)), - (sext_inreg rGPR:$Rm, i16)))]>, - Requires<[IsThumb2, HasDSP]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b001; - let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) - let Inst{7-6} = 0b00; - let Inst{5-4} = 0b10; - } +def t2SMMLA : T2FourRegSMMLA<0b101, 0b0000, "smmla", + [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>; +def t2SMMLAR: T2FourRegSMMLA<0b101, 0b0001, "smmlar", []>; +def t2SMMLS: T2FourRegSMMLA<0b110, 0b0000, "smmls", []>; +def t2SMMLSR: T2FourRegSMMLA<0b110, 0b0001, "smmlsr", []>; - def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, - !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)), - (sra rGPR:$Rm, (i32 16))))]>, - Requires<[IsThumb2, HasDSP]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b001; - let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) - let Inst{7-6} = 0b00; - let Inst{5-4} = 0b11; - } - - def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, - !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", - []>, - Requires<[IsThumb2, HasDSP]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b011; - let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) - let Inst{7-6} = 0b00; - let Inst{5-4} = 0b00; - } - - def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, - !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", - []>, - Requires<[IsThumb2, HasDSP]> { +class T2ThreeRegSMUL<bits<3> op22_20, bits<2> op5_4, string opc, + list<dag> pattern> + : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, opc, + "\t$Rd, $Rn, $Rm", pattern>, + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b011; + let Inst{22-20} = op22_20; let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) let Inst{7-6} = 0b00; - let Inst{5-4} = 0b01; - } -} - - -multiclass T2I_smla<string opc, SDNode opnode> { - def BB : T2FourReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, - !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add rGPR:$Ra, - (opnode (sext_inreg rGPR:$Rn, i16), - (sext_inreg rGPR:$Rm, i16))))]>, - Requires<[IsThumb2, HasDSP, UseMulOps]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b001; - let Inst{7-6} = 0b00; - let Inst{5-4} = 0b00; - } - - def BT : T2FourReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, - !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16), - (sra rGPR:$Rm, (i32 16)))))]>, - Requires<[IsThumb2, HasDSP, UseMulOps]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b001; - let Inst{7-6} = 0b00; - let Inst{5-4} = 0b01; - } - - def TB : T2FourReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, - !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), - (sext_inreg rGPR:$Rm, i16))))]>, - Requires<[IsThumb2, HasDSP, UseMulOps]> { + let Inst{5-4} = op5_4; +} + +def t2SMULBB : T2ThreeRegSMUL<0b001, 0b00, "smulbb", + [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16), + (sext_inreg rGPR:$Rm, i16)))]>; +def t2SMULBT : T2ThreeRegSMUL<0b001, 0b01, "smulbt", + [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16), + (sra rGPR:$Rm, (i32 16))))]>; +def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb", + [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)), + (sext_inreg rGPR:$Rm, i16)))]>; +def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt", + [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)), + (sra rGPR:$Rm, (i32 16))))]>; +def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", []>; +def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", []>; + +def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn), + (t2SMULBB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))), + (t2SMULBT rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm), + (t2SMULTB rGPR:$Rn, rGPR:$Rm)>; + +class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc, + list<dag> pattern> + : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMUL16, + opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>, + Requires<[IsThumb2, HasDSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b001; + let Inst{22-20} = op22_20; let Inst{7-6} = 0b00; - let Inst{5-4} = 0b10; - } - - def TT : T2FourReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, - !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), - (sra rGPR:$Rm, (i32 16)))))]>, - Requires<[IsThumb2, HasDSP, UseMulOps]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b001; - let Inst{7-6} = 0b00; - let Inst{5-4} = 0b11; - } - - def WB : T2FourReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, - !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", - []>, - Requires<[IsThumb2, HasDSP, UseMulOps]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b011; - let Inst{7-6} = 0b00; - let Inst{5-4} = 0b00; - } - - def WT : T2FourReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, - !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", - []>, - Requires<[IsThumb2, HasDSP, UseMulOps]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0110; - let Inst{22-20} = 0b011; - let Inst{7-6} = 0b00; - let Inst{5-4} = 0b01; - } -} - -defm t2SMUL : T2I_smul<"smul", mul>; -defm t2SMLA : T2I_smla<"smla", mul>; + let Inst{5-4} = op5_4; +} + +def t2SMLABB : T2FourRegSMLA<0b001, 0b00, "smlabb", + [(set rGPR:$Rd, (add rGPR:$Ra, + (mul (sext_inreg rGPR:$Rn, i16), + (sext_inreg rGPR:$Rm, i16))))]>; +def t2SMLABT : T2FourRegSMLA<0b001, 0b01, "smlabt", + [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sext_inreg rGPR:$Rn, i16), + (sra rGPR:$Rm, (i32 16)))))]>; +def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb", + [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)), + (sext_inreg rGPR:$Rm, i16))))]>; +def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt", + [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)), + (sra rGPR:$Rm, (i32 16)))))]>; +def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", []>; +def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", []>; + +def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)), + (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; +def : Thumb2DSPMulPat<(add rGPR:$Ra, + (mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16)))), + (t2SMLABT rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; +def : Thumb2DSPMulPat<(add rGPR:$Ra, + (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)), + (t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; + +class T2SMLAL<bits<3> op22_20, bits<4> op7_4, string opc, list<dag> pattern> + : T2FourReg_mac<1, op22_20, op7_4, + (outs rGPR:$Ra, rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm), + IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>, + Requires<[IsThumb2, HasDSP]>; // Halfword multiple accumulate long: SMLAL<x><y> -def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd), - (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsThumb2, HasDSP]>; -def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd), - (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsThumb2, HasDSP]>; -def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd), - (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsThumb2, HasDSP]>; -def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd), - (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsThumb2, HasDSP]>; - -// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD -def t2SMUAD: T2ThreeReg_mac< - 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]> { - let Inst{15-12} = 0b1111; -} -def t2SMUADX:T2ThreeReg_mac< - 0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]> { +def t2SMLALBB : T2SMLAL<0b100, 0b1000, "smlalbb", []>; +def t2SMLALBT : T2SMLAL<0b100, 0b1001, "smlalbt", []>; +def t2SMLALTB : T2SMLAL<0b100, 0b1010, "smlaltb", []>; +def t2SMLALTT : T2SMLAL<0b100, 0b1011, "smlaltt", []>; + +class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc> + : T2ThreeReg_mac<0, op22_20, op7_4, + (outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm), + IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", []>, + Requires<[IsThumb2, HasDSP]> { let Inst{15-12} = 0b1111; } -def t2SMUSD: T2ThreeReg_mac< - 0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]> { - let Inst{15-12} = 0b1111; -} -def t2SMUSDX:T2ThreeReg_mac< - 0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]> { - let Inst{15-12} = 0b1111; -} -def t2SMLAD : T2FourReg_mac< - 0, 0b010, 0b0000, (outs rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad", - "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasDSP]>; -def t2SMLADX : T2FourReg_mac< - 0, 0b010, 0b0001, (outs rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx", - "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasDSP]>; -def t2SMLSD : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd", - "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasDSP]>; -def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx", - "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasDSP]>; -def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald", - "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]>; -def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), - (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx", - "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]>; -def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd), - (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld", - "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]>; -def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), - (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx", - "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]>; + +// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD +def t2SMUAD: T2DualHalfMul<0b010, 0b0000, "smuad">; +def t2SMUADX: T2DualHalfMul<0b010, 0b0001, "smuadx">; +def t2SMUSD: T2DualHalfMul<0b100, 0b0000, "smusd">; +def t2SMUSDX: T2DualHalfMul<0b100, 0b0001, "smusdx">; + +class T2DualHalfMulAdd<bits<3> op22_20, bits<4> op7_4, string opc> + : T2FourReg_mac<0, op22_20, op7_4, + (outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), + IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", []>, + Requires<[IsThumb2, HasDSP]>; + +def t2SMLAD : T2DualHalfMulAdd<0b010, 0b0000, "smlad">; +def t2SMLADX : T2DualHalfMulAdd<0b010, 0b0001, "smladx">; +def t2SMLSD : T2DualHalfMulAdd<0b100, 0b0000, "smlsd">; +def t2SMLSDX : T2DualHalfMulAdd<0b100, 0b0001, "smlsdx">; + +class T2DualHalfMulAddLong<bits<3> op22_20, bits<4> op7_4, string opc> + : T2FourReg_mac<1, op22_20, op7_4, + (outs rGPR:$Ra, rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm), + IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>, + Requires<[IsThumb2, HasDSP]>; + +def t2SMLALD : T2DualHalfMulAddLong<0b100, 0b1100, "smlald">; +def t2SMLALDX : T2DualHalfMulAddLong<0b100, 0b1101, "smlaldx">; +def t2SMLSLD : T2DualHalfMulAddLong<0b101, 0b1100, "smlsld">; +def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">; //===----------------------------------------------------------------------===// // Division Instructions. @@ -3545,7 +3381,9 @@ def t2B : T2I<(outs), (ins thumb_br_target:$target), IIC_Br, } let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in { -def t2BR_JT : t2PseudoInst<(outs), + +// available in both v8-M.Baseline and Thumb2 targets +def t2BR_JT : t2basePseudoInst<(outs), (ins GPR:$target, GPR:$index, i32imm:$jt), 0, IIC_Br, [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt)]>, @@ -3645,6 +3483,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), // Branch and Exchange Jazelle -- for disassembly only // Rm = Inst{19-16} +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in def t2BXJ : T2I<(outs), (ins GPRnopc:$func), NoItinerary, "bxj", "\t$func", []>, Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass]> { bits<4> func; @@ -3753,6 +3592,7 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", // Secure Monitor Call is a system instruction. // Option = Inst{19-16} +let isCall = 1, Uses = [SP] in def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []>, Requires<[IsThumb2, HasTrustZone]> { let Inst{31-27} = 0b11110; @@ -3809,6 +3649,7 @@ def : t2InstAlias<"srsia${p} $mode", (t2SRSIA imm0_31:$mode, pred:$p)>; def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>; // Return From Exception is a system instruction. +let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : T2I<oops, iops, itin, opc, asm, pattern>, @@ -4568,7 +4409,7 @@ def : t2InstAlias<"ldrsh${p} $Rt, $addr", (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; def : t2InstAlias<"ldr${p} $Rt, $addr", - (t2LDRpci GPRnopc:$Rt, t2ldrlabel:$addr, pred:$p)>; + (t2LDRpci GPR:$Rt, t2ldrlabel:$addr, pred:$p)>; def : t2InstAlias<"ldrb${p} $Rt, $addr", (t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; def : t2InstAlias<"ldrh${p} $Rt, $addr", diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td index e29d265ae3d1..e99048645685 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -624,7 +624,7 @@ def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", - [(set DPR:$Dd, (fextend SPR:$Sm))]> { + [(set DPR:$Dd, (fpextend SPR:$Sm))]> { // Instruction operands. bits<5> Dd; bits<5> Sm; @@ -641,7 +641,7 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, // Special case encoding: bits 11-8 is 0b1011. def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (fround DPR:$Dm))]> { + [(set SPR:$Sd, (fpround DPR:$Dm))]> { // Instruction operands. bits<5> Sd; bits<5> Dm; @@ -838,7 +838,7 @@ multiclass vcvt_inst<string opc, bits<2> rm, } } -defm VCVTA : vcvt_inst<"a", 0b00, frnd>; +defm VCVTA : vcvt_inst<"a", 0b00, fround>; defm VCVTN : vcvt_inst<"n", 0b01>; defm VCVTP : vcvt_inst<"p", 0b10, fceil>; defm VCVTM : vcvt_inst<"m", 0b11, ffloor>; @@ -938,7 +938,7 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm, Requires<[HasFPARMv8,HasDPVFP]>; } -defm VRINTA : vrint_inst_anpm<"a", 0b00, frnd>; +defm VRINTA : vrint_inst_anpm<"a", 0b00, fround>; defm VRINTN : vrint_inst_anpm<"n", 0b01>; defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>; defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp new file mode 100644 index 000000000000..2bdbe4fca3de --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -0,0 +1,109 @@ +//===- ARMInstructionSelector.cpp ----------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the InstructionSelector class for ARM. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#include "ARMInstructionSelector.h" +#include "ARMRegisterBankInfo.h" +#include "ARMSubtarget.h" +#include "ARMTargetMachine.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "arm-isel" + +using namespace llvm; + +#ifndef LLVM_BUILD_GLOBAL_ISEL +#error "You shouldn't build this" +#endif + +ARMInstructionSelector::ARMInstructionSelector(const ARMSubtarget &STI, + const ARMRegisterBankInfo &RBI) + : InstructionSelector(), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI) {} + +static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, + MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + unsigned DstReg = I.getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + return true; + + const RegisterBank *RegBank = RBI.getRegBank(DstReg, MRI, TRI); + (void)RegBank; + assert(RegBank && "Can't get reg bank for virtual register"); + + const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); + (void)DstSize; + unsigned SrcReg = I.getOperand(1).getReg(); + const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); + (void)SrcSize; + assert((DstSize == SrcSize || + // Copies are a means to setup initial types, the number of + // bits may not exactly match. + (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + DstSize <= SrcSize)) && + "Copy with different width?!"); + + assert(RegBank->getID() == ARM::GPRRegBankID && "Unsupported reg bank"); + const TargetRegisterClass *RC = &ARM::GPRRegClass; + + // No need to constrain SrcReg. It will get constrained when + // we hit another of its uses or its defs. + // Copies do not have constraints. + if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { + DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) + << " operand\n"); + return false; + } + return true; +} + +bool ARMInstructionSelector::select(MachineInstr &I) const { + assert(I.getParent() && "Instruction should be in a basic block!"); + assert(I.getParent()->getParent() && "Instruction should be in a function!"); + + auto &MBB = *I.getParent(); + auto &MF = *MBB.getParent(); + auto &MRI = MF.getRegInfo(); + + if (!isPreISelGenericOpcode(I.getOpcode())) { + if (I.isCopy()) + return selectCopy(I, TII, MRI, TRI, RBI); + + return true; + } + + MachineInstrBuilder MIB{MF, I}; + + using namespace TargetOpcode; + switch (I.getOpcode()) { + case G_ADD: + I.setDesc(TII.get(ARM::ADDrr)); + AddDefaultCC(AddDefaultPred(MIB)); + break; + case G_FRAME_INDEX: + // Add 0 to the given frame index and hope it will eventually be folded into + // the user(s). + I.setDesc(TII.get(ARM::ADDri)); + AddDefaultCC(AddDefaultPred(MIB.addImm(0))); + break; + case G_LOAD: + I.setDesc(TII.get(ARM::LDRi12)); + AddDefaultPred(MIB.addImm(0)); + break; + default: + return false; + } + + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h new file mode 100644 index 000000000000..5072cdd60ce4 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h @@ -0,0 +1,39 @@ +//===- ARMInstructionSelector ------------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares the targeting of the InstructionSelector class for ARM. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H +#define LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H + +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" + +namespace llvm { +class ARMBaseInstrInfo; +class ARMBaseRegisterInfo; +class ARMBaseTargetMachine; +class ARMRegisterBankInfo; +class ARMSubtarget; + +class ARMInstructionSelector : public InstructionSelector { +public: + ARMInstructionSelector(const ARMSubtarget &STI, + const ARMRegisterBankInfo &RBI); + + virtual bool select(MachineInstr &I) const override; + +private: + const ARMBaseInstrInfo &TII; + const ARMBaseRegisterInfo &TRI; + const ARMRegisterBankInfo &RBI; +}; + +} // End llvm namespace. +#endif diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp new file mode 100644 index 000000000000..255ea4bc7198 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -0,0 +1,44 @@ +//===- ARMLegalizerInfo.cpp --------------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the Machinelegalizer class for ARM. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#include "ARMLegalizerInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" +#include "llvm/Target/TargetOpcodes.h" + +using namespace llvm; + +#ifndef LLVM_BUILD_GLOBAL_ISEL +#error "You shouldn't build this" +#endif + +ARMLegalizerInfo::ARMLegalizerInfo() { + using namespace TargetOpcode; + + const LLT p0 = LLT::pointer(0, 32); + + const LLT s8 = LLT::scalar(8); + const LLT s16 = LLT::scalar(16); + const LLT s32 = LLT::scalar(32); + + setAction({G_FRAME_INDEX, p0}, Legal); + + setAction({G_LOAD, s32}, Legal); + setAction({G_LOAD, 1, p0}, Legal); + + for (auto Ty : {s8, s16, s32}) + setAction({G_ADD, Ty}, Legal); + + computeTables(); +} diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h new file mode 100644 index 000000000000..ca3eea81271b --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h @@ -0,0 +1,29 @@ +//===- ARMLegalizerInfo ------------------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares the targeting of the Machinelegalizer class for ARM. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H +#define LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +namespace llvm { + +class LLVMContext; + +/// This class provides the information for the target register banks. +class ARMLegalizerInfo : public LegalizerInfo { +public: + ARMLegalizerInfo(); +}; +} // End llvm namespace. +#endif diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 62d57f3f4986..48ab491b5be9 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -95,12 +95,10 @@ namespace { MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } - const char *getPassName() const override { - return ARM_LOAD_STORE_OPT_NAME; - } + StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; } private: /// A set of load/store MachineInstrs with same base register sorted by @@ -562,7 +560,7 @@ void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator Before) { // Initialize if we never queried in this block. if (!LiveRegsValid) { - LiveRegs.init(TRI); + LiveRegs.init(*TRI); LiveRegs.addLiveOuts(MBB); LiveRegPos = MBB.end(); LiveRegsValid = true; @@ -834,7 +832,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { assert(MO.isImplicit()); unsigned DefReg = MO.getReg(); - if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) + if (is_contained(ImpDefs, DefReg)) continue; // We can ignore cases where the super-reg is read and written. if (MI->readsRegister(DefReg)) @@ -1851,7 +1849,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { if (MBB.empty()) return false; MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - if (MBBI != MBB.begin() && + if (MBBI != MBB.begin() && MBBI != MBB.end() && (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::MOVPCLR)) { @@ -1953,7 +1951,7 @@ namespace { bool runOnMachineFunction(MachineFunction &Fn) override; - const char *getPassName() const override { + StringRef getPassName() const override { return ARM_PREALLOC_LOAD_STORE_OPT_NAME; } diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp index 7429acdb09ad..293a527b09e8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -21,6 +21,12 @@ #include "llvm/IR/Mangler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCStreamer.h" using namespace llvm; @@ -85,6 +91,8 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = GetSymbolRef(MO, GetJTISymbol(MO.getIndex())); break; case MachineOperand::MO_ConstantPoolIndex: + if (Subtarget->genExecuteOnly()) + llvm_unreachable("execute-only should not generate constant pools"); MCOp = GetSymbolRef(MO, GetCPISymbol(MO.getIndex())); break; case MachineOperand::MO_BlockAddress: @@ -93,7 +101,7 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, case MachineOperand::MO_FPImmediate: { APFloat Val = MO.getFPImm()->getValueAPF(); bool ignored; - Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); + Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &ignored); MCOp = MCOperand::createFPImm(Val.convertToDouble()); break; } @@ -150,3 +158,106 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, } } } + +void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) +{ + if (MI.getParent()->getParent()->getInfo<ARMFunctionInfo>() + ->isThumbFunction()) + { + MI.emitError("An attempt to perform XRay instrumentation for a" + " Thumb function (not supported). Detected when emitting a sled."); + return; + } + static const int8_t NoopsInSledCount = 6; + // We want to emit the following pattern: + // + // .Lxray_sled_N: + // ALIGN + // B #20 + // ; 6 NOP instructions (24 bytes) + // .tmpN + // + // We need the 24 bytes (6 instructions) because at runtime, we'd be patching + // over the full 28 bytes (7 instructions) with the following pattern: + // + // PUSH{ r0, lr } + // MOVW r0, #<lower 16 bits of function ID> + // MOVT r0, #<higher 16 bits of function ID> + // MOVW ip, #<lower 16 bits of address of __xray_FunctionEntry/Exit> + // MOVT ip, #<higher 16 bits of address of __xray_FunctionEntry/Exit> + // BLX ip + // POP{ r0, lr } + // + OutStreamer->EmitCodeAlignment(4); + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->EmitLabel(CurSled); + auto Target = OutContext.createTempSymbol(); + + // Emit "B #20" instruction, which jumps over the next 24 bytes (because + // register pc is 8 bytes ahead of the jump instruction by the moment CPU + // is executing it). + // By analogy to ARMAsmPrinter::emitPseudoExpansionLowering() |case ARM::B|. + // It is not clear why |addReg(0)| is needed (the last operand). + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc).addImm(20) + .addImm(ARMCC::AL).addReg(0)); + + MCInst Noop; + Subtarget->getInstrInfo()->getNoopForElfTarget(Noop); + for (int8_t I = 0; I < NoopsInSledCount; I++) + { + OutStreamer->EmitInstruction(Noop, getSubtargetInfo()); + } + + OutStreamer->EmitLabel(Target); + recordSled(CurSled, MI, Kind); +} + +void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) +{ + EmitSled(MI, SledKind::FUNCTION_ENTER); +} + +void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) +{ + EmitSled(MI, SledKind::FUNCTION_EXIT); +} + +void ARMAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) +{ + EmitSled(MI, SledKind::TAIL_CALL); +} + +void ARMAsmPrinter::EmitXRayTable() +{ + if (Sleds.empty()) + return; + + MCSection *Section = nullptr; + if (Subtarget->isTargetELF()) { + Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_GROUP | + ELF::SHF_MERGE, + 0, CurrentFnSym->getName()); + } else if (Subtarget->isTargetMachO()) { + Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, + SectionKind::getReadOnlyWithRel()); + } else { + llvm_unreachable("Unsupported target"); + } + + auto PrevSection = OutStreamer->getCurrentSectionOnly(); + OutStreamer->SwitchSection(Section); + for (const auto &Sled : Sleds) { + OutStreamer->EmitSymbolValue(Sled.Sled, 4); + OutStreamer->EmitSymbolValue(CurrentFnSym, 4); + auto Kind = static_cast<uint8_t>(Sled.Kind); + OutStreamer->EmitBytes( + StringRef(reinterpret_cast<const char *>(&Kind), 1)); + OutStreamer->EmitBytes( + StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1)); + OutStreamer->EmitZeros(6); + } + OutStreamer->SwitchSection(PrevSection); + + Sleds.clear(); +} diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp index b6dee9ff8385..50d8f0941460 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -11,14 +11,14 @@ using namespace llvm; -void ARMFunctionInfo::anchor() { } +void ARMFunctionInfo::anchor() {} ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()), hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()), - StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false), - RestoreSPFromFP(false), LRSpilledForFarJump(false), + StByValParamsPadding(0), ArgRegsSaveSize(0), ReturnRegsCount(0), + HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), - GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), - PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), - ArgumentStackSize(0), IsSplitCSR(false) {} + GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), PICLabelUId(0), + VarArgsFrameIndex(0), HasITBlocks(false), ArgumentStackSize(0), + IsSplitCSR(false), PromotedGlobalsIncrease(0) {} diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index f71497240ff3..8c485e89bf54 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -121,6 +121,12 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// copies. bool IsSplitCSR; + /// Globals that have had their storage promoted into the constant pool. + SmallPtrSet<const GlobalVariable*,2> PromotedGlobals; + + /// The amount the literal pool has been increasedby due to promoted globals. + int PromotedGlobalsIncrease; + public: ARMFunctionInfo() : isThumb(false), @@ -131,7 +137,8 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0), NumAlignedDPRCS2Regs(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false) {} + VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false), + PromotedGlobalsIncrease(0) {} explicit ARMFunctionInfo(MachineFunction &MF); @@ -226,6 +233,22 @@ public: } return It; } + + /// Indicate to the backend that \c GV has had its storage changed to inside + /// a constant pool. This means it no longer needs to be emitted as a + /// global variable. + void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV) { + PromotedGlobals.insert(GV); + } + SmallPtrSet<const GlobalVariable*, 2>& getGlobalsPromotedToConstantPool() { + return PromotedGlobals; + } + int getPromotedConstpoolIncrease() const { + return PromotedGlobalsIncrease; + } + void setPromotedConstpoolIncrease(int Sz) { + PromotedGlobalsIncrease = Sz; + } }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp index 73dcb9641b61..581d5fe159fd 100644 --- a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp @@ -29,12 +29,10 @@ public: MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } - const char *getPassName() const override { - return "optimise barriers pass"; - } + StringRef getPassName() const override { return "optimise barriers pass"; } }; char ARMOptimizeBarriersPass::ID = 0; } diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp new file mode 100644 index 000000000000..9bd036a1eace --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -0,0 +1,127 @@ +//===- ARMRegisterBankInfo.cpp -----------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the RegisterBankInfo class for ARM. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#include "ARMRegisterBankInfo.h" +#include "ARMInstrInfo.h" // For the register classes +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +#ifndef LLVM_BUILD_GLOBAL_ISEL +#error "You shouldn't build this" +#endif + +// FIXME: TableGen this. +// If it grows too much and TableGen still isn't ready to do the job, extract it +// into an ARMGenRegisterBankInfo.def (similar to AArch64). +namespace llvm { +namespace ARM { +RegisterBank GPRRegBank; +RegisterBank *RegBanks[] = {&GPRRegBank}; + +RegisterBankInfo::PartialMapping GPRPartialMapping{0, 32, GPRRegBank}; + +RegisterBankInfo::ValueMapping ValueMappings[] = { + {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}}; +} // end namespace arm +} // end namespace llvm + +ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) + : RegisterBankInfo(ARM::RegBanks, ARM::NumRegisterBanks) { + static bool AlreadyInit = false; + // We have only one set of register banks, whatever the subtarget + // is. Therefore, the initialization of the RegBanks table should be + // done only once. Indeed the table of all register banks + // (ARM::RegBanks) is unique in the compiler. At some point, it + // will get tablegen'ed and the whole constructor becomes empty. + if (AlreadyInit) + return; + AlreadyInit = true; + + // Initialize the GPR bank. + createRegisterBank(ARM::GPRRegBankID, "GPRB"); + + addRegBankCoverage(ARM::GPRRegBankID, ARM::GPRRegClassID, TRI); + addRegBankCoverage(ARM::GPRRegBankID, ARM::GPRwithAPSRRegClassID, TRI); + const RegisterBank &RBGPR = getRegBank(ARM::GPRRegBankID); + (void)RBGPR; + assert(&ARM::GPRRegBank == &RBGPR && "The order in RegBanks is messed up"); + assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRRegClassID)) && + "Subclass not added?"); + assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRwithAPSRRegClassID)) && + "Subclass not added?"); + assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRnopcRegClassID)) && + "Subclass not added?"); + assert(RBGPR.covers(*TRI.getRegClass(ARM::rGPRRegClassID)) && + "Subclass not added?"); + assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPRRegClassID)) && + "Subclass not added?"); + assert(RBGPR.covers(*TRI.getRegClass(ARM::tcGPRRegClassID)) && + "Subclass not added?"); + assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) && + "Subclass not added?"); + assert(RBGPR.getSize() == 32 && "GPRs should hold up to 32-bit"); +} + +const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass( + const TargetRegisterClass &RC) const { + using namespace ARM; + + switch (RC.getID()) { + case GPRRegClassID: + case tGPR_and_tcGPRRegClassID: + return getRegBank(ARM::GPRRegBankID); + default: + llvm_unreachable("Unsupported register kind"); + } + + llvm_unreachable("Switch should handle all register classes"); +} + +RegisterBankInfo::InstructionMapping +ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { + auto Opc = MI.getOpcode(); + + // Try the default logic for non-generic instructions that are either copies + // or already have some operands assigned to banks. + if (!isPreISelGenericOpcode(Opc)) { + InstructionMapping Mapping = getInstrMappingImpl(MI); + if (Mapping.isValid()) + return Mapping; + } + + using namespace TargetOpcode; + + unsigned NumOperands = MI.getNumOperands(); + const ValueMapping *OperandsMapping = &ARM::ValueMappings[0]; + + switch (Opc) { + case G_ADD: + case G_LOAD: + // FIXME: We're abusing the fact that everything lives in a GPR for now; in + // the real world we would use different mappings. + OperandsMapping = &ARM::ValueMappings[0]; + break; + case G_FRAME_INDEX: + OperandsMapping = getOperandsMapping({&ARM::ValueMappings[0], nullptr}); + break; + default: + return InstructionMapping{}; + } + + return InstructionMapping{DefaultMappingID, /*Cost=*/1, OperandsMapping, + NumOperands}; +} diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h new file mode 100644 index 000000000000..773920ee57a7 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h @@ -0,0 +1,41 @@ +//===- ARMRegisterBankInfo ---------------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares the targeting of the RegisterBankInfo class for ARM. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ARMREGISTERBANKINFO_H +#define LLVM_LIB_TARGET_ARM_ARMREGISTERBANKINFO_H + +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" + +namespace llvm { + +class TargetRegisterInfo; + +namespace ARM { +enum { + GPRRegBankID = 0, // General purpose registers + NumRegisterBanks, +}; +} // end namespace ARM + +/// This class provides the information for the target register banks. +class ARMRegisterBankInfo final : public RegisterBankInfo { +public: + ARMRegisterBankInfo(const TargetRegisterInfo &TRI); + + const RegisterBank & + getRegBankFromRegClass(const TargetRegisterClass &RC) const override; + + InstructionMapping getInstrMapping(const MachineInstr &MI) const override; +}; +} // End llvm namespace. +#endif diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td index 47a99313025c..b7d2d34614df 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSchedule.td +++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td @@ -364,3 +364,4 @@ include "ARMScheduleV6.td" include "ARMScheduleA8.td" include "ARMScheduleA9.td" include "ARMScheduleSwift.td" +include "ARMScheduleR52.td" diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td b/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td new file mode 100644 index 000000000000..1b40742a093b --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td @@ -0,0 +1,983 @@ +//==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SchedRead/Write data for the ARM Cortex-R52 processor. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The Cortex-R52 is an in-order pipelined superscalar microprocessor with +// a 8 stage pipeline. It can issue maximum two instructions in each cycle. +// There are two ALUs, one LDST, one MUL and a non-pipelined integer DIV. +// A number of forwarding paths enable results of computations to be input +// to subsequent operations before they are written to registers. +// This scheduler is a MachineScheduler. See TargetSchedule.td for details. + +def CortexR52Model : SchedMachineModel { + let MicroOpBufferSize = 0; // R52 is in-order processor + let IssueWidth = 2; // 2 micro-ops dispatched per cycle + let LoadLatency = 1; // Optimistic, assuming no misses + let MispredictPenalty = 8; // A branch direction mispredict, including PFU + let PostRAScheduler = 1; // Enable PostRA scheduler pass. + let CompleteModel = 0; // Covers instructions applicable to cortex-r52. +} + + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +// Modeling each pipeline as a ProcResource using the BufferSize = 0 since +// Cortex-R52 is an in-order processor. + +def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU +def R52UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC +def R52UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division +def R52UnitLd : ProcResource<1> { let BufferSize = 0; } // Load/Store +def R52UnitB : ProcResource<1> { let BufferSize = 0; } // Branch +def R52UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU +def R52UnitFPMUL : ProcResource<2> { let BufferSize = 0; } // FP MUL +def R52UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP DIV + +// Cortex-R52 specific SchedReads +def R52Read_ISS : SchedRead; +def R52Read_EX1 : SchedRead; +def R52Read_EX2 : SchedRead; +def R52Read_WRI : SchedRead; +def R52Read_F0 : SchedRead; // F0 maps to ISS stage of integer pipe +def R52Read_F1 : SchedRead; +def R52Read_F2 : SchedRead; + + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types which map ProcResources and set latency. + +let SchedModel = CortexR52Model in { + +// ALU - Write occurs in Late EX2 (independent of whether shift was required) +def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; } +def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; } +def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; } +def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; } + +// Compares +def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; } +def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; } +def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; } + +// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2) +def : WriteRes<WriteDiv, [R52UnitDiv]> { + let Latency = 8; let ResourceCycles = [8]; // not pipelined +} + +// Loads +def : WriteRes<WriteLd, [R52UnitLd]> { let Latency = 4; } +def : WriteRes<WritePreLd, [R52UnitLd]> { let Latency = 4; } + +// Branches - LR written in Late EX2 +def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; } +def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; } +def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; } + +// Misc +def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } +def : WriteRes<WriteCvtFP, [R52UnitALU]> { let Latency = 3; } + +def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage +def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS + + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedReadWrites. + +// Forwarding information - based on when an operand is read +def : ReadAdvance<R52Read_ISS, 0>; +def : ReadAdvance<R52Read_EX1, 1>; +def : ReadAdvance<R52Read_EX2, 2>; +def : ReadAdvance<R52Read_F0, 0>; +def : ReadAdvance<R52Read_F1, 1>; +def : ReadAdvance<R52Read_F2, 2>; + + +// Cortex-R52 specific SchedWrites for use with InstRW +def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; } +def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> { + let Latency = 8; let ResourceCycles = [8]; // not pipelined +} +def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; } +def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; } +def R52WriteAdr : SchedWriteRes<[]> { let Latency = 0; } +def R52WriteCC : SchedWriteRes<[]> { let Latency = 0; } +def R52WriteALU_EX1 : SchedWriteRes<[R52UnitALU]> { let Latency = 2; } +def R52WriteALU_EX2 : SchedWriteRes<[R52UnitALU]> { let Latency = 3; } +def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; } + +def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; } +def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; } + +def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; } +def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> { + let Latency = 4; +} +def R52WriteFPALU_F4 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; } +def R52Write2FPALU_F4 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> { + let Latency = 5; +} +def R52WriteFPALU_F5 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; } +def R52Write2FPALU_F5 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> { + let Latency = 6; +} +def R52WriteFPMUL_F5 : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; } +def R52Write2FPMUL_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> { + let Latency = 6; +} +def R52WriteFPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> { + let Latency = 11; // as it is internally two insns (MUL then ADD) +} +def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL, + R52UnitFPALU, R52UnitFPALU]> { + let Latency = 11; +} + +def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; } +def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; } + +def R52WriteFPDIV_SP : SchedWriteRes<[R52UnitFPDIV]> { + let Latency = 7; // FP div takes fixed #cycles + let ResourceCycles = [7]; // is not pipelined + } +def R52WriteFPDIV_DP : SchedWriteRes<[R52UnitFPDIV]> { + let Latency = 17; + let ResourceCycles = [17]; +} + + +//===----------------------------------------------------------------------===// +// Subtarget-specific - map operands to SchedReadWrites + +def : InstRW<[WriteALU], (instrs COPY)>; + +def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], + (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16", + "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>; + +def : InstRW<[R52WriteALU_EX1, R52Read_ISS], + (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi", + "t2MOVi", "t2MOV_ga_dyn")>; +def : InstRW<[R52WriteALU_EX2, R52Read_EX1], + (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel")>; +def : InstRW<[R52WriteLd,R52Read_ISS], + (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>; + +def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>; + +def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS], + (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI", + "(t|t2)UBFX", "(t|t2)SBFX")>; + +// Saturating arithmetic +def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1], + (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT", + "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX", + "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD", + "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT", + "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX", + "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>; + +// Parallel arithmetic +def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], + (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX", + "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8", + "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8", + "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>; + +// Flag setting. +def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], + (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX", + "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16", + "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16", + "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16", + "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX", + "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>; + +// Sum of Absolute Difference +def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS], + (instregex "USAD8", "t2USAD8", "tUSAD8","USADA8", "t2USADA8", "tUSADA8") >; + +// Integer Multiply +def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS], + (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT", + "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL", + "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT", + "t2SMULWB", "t2SMULWT", "t2SMUSD")>; + +// Multiply Accumulate +// Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs). +// The store pipeline is used partly for 64-bit operations. +def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS], + (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR", + "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR", + "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX", + "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX", + "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT", + "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT", + "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX", + "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$", + "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT", + "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX", + "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", + "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", + "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>; + +def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS], + (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>; + +// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles. +// However, that's non-trivial to specify, so we keep it uniform +def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], + (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)", + "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX", + "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)", + "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$", + "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$", + "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>; +def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS], + (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)", + "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)", + "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T", + "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)", + "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T", + "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>; + +def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>; +def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>; + +def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri","ANDS?ri", + "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri", + "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN", + "t2ORRri", "t2RSBS?ri", "t2SBCri")>; + +def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr", + "ANDS?rr", "BICS?rr", "CRC*", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr", + "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>; + +def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi", + "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi", + "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>; + +def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS], + (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr", + "ORRrsrr", "RSBrsr", "RSCrsr", "SBCrsr")>; + +def : InstRW<[R52WriteALU_EX1], + (instregex "ADR", "MOVSi", "MOVSsi", "MOVST?i16*", "MVNS?s?i", "t2MOVS?si")>; + +def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>; +def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS], + (instregex "ASRr", "RORS?r", "LSR", "LSL")>; + +def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>; +def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>; +def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>; +def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>; + +def : InstRW<[R52WriteALU_EX2, R52Read_ISS], + (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>; + +def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>; + +def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>; +def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>; + +//def : InstRW<[R52WriteLd, R52Read_ISS], (instregex "^LDRB?(_PRE_IMM|_POST_IMM)", "LDRrs")>; +//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_PRE_REG", "LDRB?rr")>; +//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_POST_REG")>; + +//def : InstRW<[R52WriteST, R52Read_ISS], (instregex "STRi12", "PICSTR")>; +//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_PRE_REG", "STRB?_PRE_REG")>; +//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_POST_REG", "STRB?_POST_REG")>; + + +// Integer Load, Multiple. +foreach Lat = 3-25 in { + def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> { + let Latency = Lat; + } + def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> { + let Latency = Lat; + let NumMicroOps = 0; + } +} +foreach NAddr = 1-16 in { + def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>; +} +def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; } +def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>; +def R52WriteILDM : SchedWriteVariant<[ + SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>, + + SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6Cy]>, + SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6Cy, R52WriteILDM7Cy]>, + + SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6Cy, R52WriteILDM7Cy, + R52WriteILDM8Cy]>, + SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6Cy, R52WriteILDM7Cy, + R52WriteILDM8Cy, R52WriteILDM9Cy]>, + + SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6Cy, R52WriteILDM7Cy, + R52WriteILDM8Cy, R52WriteILDM9Cy, + R52WriteILDM10Cy]>, + SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6Cy, R52WriteILDM7Cy, + R52WriteILDM8Cy, R52WriteILDM9Cy, + R52WriteILDM10Cy, R52WriteILDM11Cy]>, + + SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6Cy, R52WriteILDM7Cy, + R52WriteILDM8Cy, R52WriteILDM9Cy, + R52WriteILDM10Cy, R52WriteILDM11Cy, + R52WriteILDM12Cy]>, + SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6Cy, R52WriteILDM7Cy, + R52WriteILDM8Cy, R52WriteILDM9Cy, + R52WriteILDM10Cy, R52WriteILDM11Cy, + R52WriteILDM12Cy, R52WriteILDM13Cy]>, + + SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6Cy, R52WriteILDM7Cy, + R52WriteILDM8Cy, R52WriteILDM9Cy, + R52WriteILDM10Cy, R52WriteILDM11Cy, + R52WriteILDM12Cy, R52WriteILDM13Cy, + R52WriteILDM14Cy]>, + SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6Cy, R52WriteILDM7Cy, + R52WriteILDM8Cy, R52WriteILDM9Cy, + R52WriteILDM10Cy, R52WriteILDM11Cy, + R52WriteILDM12Cy, R52WriteILDM13Cy, + R52WriteILDM14Cy, R52WriteILDM15Cy]>, + + SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6Cy, R52WriteILDM7Cy, + R52WriteILDM8Cy, R52WriteILDM9Cy, + R52WriteILDM10Cy, R52WriteILDM11Cy, + R52WriteILDM12Cy, R52WriteILDM13Cy, + R52WriteILDM14Cy, R52WriteILDM15Cy, + R52WriteILDM16Cy]>, + SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6Cy, R52WriteILDM7Cy, + R52WriteILDM8Cy, R52WriteILDM9Cy, + R52WriteILDM10Cy, R52WriteILDM11Cy, + R52WriteILDM12Cy, R52WriteILDM13Cy, + R52WriteILDM14Cy, R52WriteILDM15Cy, + R52WriteILDM16Cy, R52WriteILDM17Cy]>, + + SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6Cy, R52WriteILDM7Cy, + R52WriteILDM8Cy, R52WriteILDM9Cy, + R52WriteILDM10Cy, R52WriteILDM11Cy, + R52WriteILDM12Cy, R52WriteILDM13Cy, + R52WriteILDM14Cy, R52WriteILDM15Cy, + R52WriteILDM16Cy, R52WriteILDM17Cy, + R52WriteILDM18Cy]>, + SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6Cy, R52WriteILDM7Cy, + R52WriteILDM8Cy, R52WriteILDM9Cy, + R52WriteILDM10Cy, R52WriteILDM11Cy, + R52WriteILDM12Cy, R52WriteILDM13Cy, + R52WriteILDM14Cy, R52WriteILDM15Cy, + R52WriteILDM16Cy, R52WriteILDM17Cy, + R52WriteILDM18Cy, R52WriteILDM19Cy]>, + +// Unknown number of registers, just use resources for two registers. + SchedVar<NoSchedPred, [R52WriteILDM4Cy, R52WriteILDM5Cy, + R52WriteILDM6CyNo, R52WriteILDM7CyNo, + R52WriteILDM8CyNo, R52WriteILDM9CyNo, + R52WriteILDM10CyNo, R52WriteILDM11CyNo, + R52WriteILDM12CyNo, R52WriteILDM13CyNo, + R52WriteILDM14CyNo, R52WriteILDM15CyNo, + R52WriteILDM16CyNo, R52WriteILDM17CyNo, + R52WriteILDM18Cy, R52WriteILDM19Cy]> +]> { let Variadic=1; } + +// Integer Store, Multiple +def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> { + let Latency = 4; + let NumMicroOps = 2; +} +foreach NumAddr = 1-16 in { + def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>; +} +def R52WriteISTM : SchedWriteVariant<[ + SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>, + SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>, + SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>, + SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>, + SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>, + SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>, + SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>, + SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>, + SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>, + SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>, + SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>, + SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>, + SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>, + SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>, + SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>, + // Unknow number of registers, just use resources for two registers. + SchedVar<NoSchedPred, [R52WriteISTM2]> +]>; + +def : InstRW<[R52WriteILDM, R52Read_ISS], + (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$", + "(t|sys)LDM(IA|DA|DB|IB)$")>; +def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS], + (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>; +def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS], + (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>; + +// Integer Store, Single Element +def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2], + (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS", + "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$", + "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>; + +def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2], + (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)", + "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)", + "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)", + "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>; + +// Integer Store, Dual +def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2], + (instregex "STRD$", "t2STRDi8", "STL", "t2STRD$", "t2STL")>; +def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2], + (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>; + +def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2], + (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>; +def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2], + (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD", + "PUSH", "tPUSH")>; + +// LDRLIT pseudo instructions, they expand to LDR + PICADD +def : InstRW<[R52WriteLd], + (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>; +// LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR +def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>; + + + +//===----------------------------------------------------------------------===// +// VFP, Floating Point Support +def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>; +def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>; + +def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>; +def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>; +def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>; + +def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>; +def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>; + +def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>; +def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>; + +def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>; +def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>; + +def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], + (instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>; + +def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>; +def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>; + + +//===----------------------------------------------------------------------===// +// Neon Support + +// vector multiple load stores +foreach NumAddr = 1-16 in { + def R52LMAddrPred#NumAddr : + SchedPredicate<"MI->getNumOperands() == "#NumAddr>; +} +foreach Lat = 1-32 in { + def R52WriteLM#Lat#Cy : SchedWriteRes<[]> { + let Latency = Lat; + } +} +foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue + def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> { + let Latency = 0; + let NumMicroOps = Num; + let ResourceCycles = [Num]; + } +} +def R52WriteVLDM : SchedWriteVariant<[ + // 1 D reg + SchedVar<R52LMAddrPred1, [R52WriteLM5Cy, + R52ReserveLd5Cy]>, + SchedVar<R52LMAddrPred2, [R52WriteLM5Cy, + R52ReserveLd5Cy]>, + + // 2 D reg + SchedVar<R52LMAddrPred3, [R52WriteLM5Cy, R52WriteLM6Cy, + R52ReserveLd6Cy]>, + SchedVar<R52LMAddrPred4, [R52WriteLM5Cy, R52WriteLM6Cy, + R52ReserveLd6Cy]>, + + // 3 D reg + SchedVar<R52LMAddrPred5, [R52WriteLM5Cy, R52WriteLM6Cy, + R52WriteLM7Cy, + R52ReserveLd4Cy]>, + SchedVar<R52LMAddrPred6, [R52WriteLM5Cy, R52WriteLM6Cy, + R52WriteLM7Cy, + R52ReserveLd7Cy]>, + + // 4 D reg + SchedVar<R52LMAddrPred7, [R52WriteLM5Cy, R52WriteLM6Cy, + R52WriteLM7Cy, R52WriteLM8Cy, + R52ReserveLd8Cy]>, + SchedVar<R52LMAddrPred8, [R52WriteLM5Cy, R52WriteLM6Cy, + R52WriteLM7Cy, R52WriteLM8Cy, + R52ReserveLd8Cy]>, + + // 5 D reg + SchedVar<R52LMAddrPred9, [R52WriteLM5Cy, R52WriteLM6Cy, + R52WriteLM7Cy, R52WriteLM8Cy, + R52WriteLM9Cy, + R52ReserveLd9Cy]>, + SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy, + R52WriteLM7Cy, R52WriteLM8Cy, + R52WriteLM9Cy, + R52ReserveLd9Cy]>, + + // 6 D reg + SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy, + R52WriteLM7Cy, R52WriteLM8Cy, + R52WriteLM9Cy, R52WriteLM10Cy, + R52ReserveLd10Cy]>, + SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy, + R52WriteLM7Cy, R52WriteLM8Cy, + R52WriteLM9Cy, R52WriteLM10Cy, + R52ReserveLd10Cy]>, + + // 7 D reg + SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy, + R52WriteLM7Cy, R52WriteLM8Cy, + R52WriteLM9Cy, R52WriteLM10Cy, + R52WriteLM11Cy, + R52ReserveLd11Cy]>, + SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy, + R52WriteLM7Cy, R52WriteLM8Cy, + R52WriteLM9Cy, R52WriteLM10Cy, + R52WriteLM11Cy, + R52ReserveLd11Cy]>, + + // 8 D reg + SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy, + R52WriteLM7Cy, R52WriteLM8Cy, + R52WriteLM9Cy, R52WriteLM10Cy, + R52WriteLM11Cy, R52WriteLM12Cy, + R52ReserveLd12Cy]>, + SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy, + R52WriteLM7Cy, R52WriteLM8Cy, + R52WriteLM9Cy, R52WriteLM10Cy, + R52WriteLM11Cy, R52WriteLM12Cy, + R52ReserveLd12Cy]>, + // unknown number of reg. + SchedVar<NoSchedPred, [R52WriteLM5Cy, R52WriteLM6Cy, + R52WriteLM7Cy, R52WriteLM8Cy, + R52WriteLM9Cy, R52WriteLM10Cy, + R52WriteLM11Cy, R52WriteLM12Cy, + R52ReserveLd5Cy]> +]> { let Variadic=1;} + +// variable stores. Cannot dual-issue +def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1]; +} +def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [2]; +} +def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> { + let Latency = 7; + let NumMicroOps = 6; + let ResourceCycles = [3]; +} +def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> { + let Latency = 8; + let NumMicroOps = 8; + let ResourceCycles = [4]; +} +def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> { + let Latency = 9; + let NumMicroOps = 10; + let ResourceCycles = [5]; +} +def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> { + let Latency = 10; + let NumMicroOps = 12; + let ResourceCycles = [6]; +} +def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> { + let Latency = 11; + let NumMicroOps = 14; + let ResourceCycles = [7]; +} +def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> { + let Latency = 12; + let NumMicroOps = 16; + let ResourceCycles = [8]; +} +def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> { + let Latency = 13; + let NumMicroOps = 18; + let ResourceCycles = [9]; +} +def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> { + let Latency = 14; + let NumMicroOps = 20; + let ResourceCycles = [10]; +} +def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> { + let Latency = 15; + let NumMicroOps = 22; + let ResourceCycles = [11]; +} + +def R52WriteSTM : SchedWriteVariant<[ + SchedVar<R52LMAddrPred1, [R52WriteSTM5]>, + SchedVar<R52LMAddrPred2, [R52WriteSTM5]>, + SchedVar<R52LMAddrPred3, [R52WriteSTM6]>, + SchedVar<R52LMAddrPred4, [R52WriteSTM6]>, + SchedVar<R52LMAddrPred5, [R52WriteSTM7]>, + SchedVar<R52LMAddrPred6, [R52WriteSTM7]>, + SchedVar<R52LMAddrPred7, [R52WriteSTM8]>, + SchedVar<R52LMAddrPred8, [R52WriteSTM8]>, + SchedVar<R52LMAddrPred9, [R52WriteSTM9]>, + SchedVar<R52LMAddrPred10, [R52WriteSTM9]>, + SchedVar<R52LMAddrPred11, [R52WriteSTM10]>, + SchedVar<R52LMAddrPred12, [R52WriteSTM10]>, + SchedVar<R52LMAddrPred13, [R52WriteSTM11]>, + SchedVar<R52LMAddrPred14, [R52WriteSTM11]>, + SchedVar<R52LMAddrPred15, [R52WriteSTM12]>, + SchedVar<R52LMAddrPred16, [R52WriteSTM12]>, + // unknown number of registers, just use resources for two + SchedVar<NoSchedPred, [R52WriteSTM6]> +]>; + +// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with +// another instruction in slot-1, but only in the last issue. +def R52WriteVLD1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5;} +def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [2]; +} +def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [3]; +} +def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> { + let Latency = 8; + let NumMicroOps = 7; + let ResourceCycles = [4]; +} +def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [2]; +} +def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [3]; +} +def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> { + let Latency = 8; + let NumMicroOps = 7; + let ResourceCycles = [4]; +} +def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> { + let Latency = 9; + let NumMicroOps = 9; + let ResourceCycles = [5]; +} + + +def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>; +def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>; +def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>; + +def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>; +def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>; +def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>; + +def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>; + +def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], + (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>; +def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], + (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2], + (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>; + +def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], + (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>; + +def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>; +def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>; + +def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>; +def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>; + +def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>; +def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>; + +def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>; + +def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], + (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>; +def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], + (instregex "VCVT", "VSITO", "VUITO", "VTO")>; + +def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>; +def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>; +def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>; +def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>; + +def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>; +def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>; + +def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>; +def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>; + +def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>; +def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>; + +def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>; +def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>; +def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>; +def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>; +def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>; +def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>; +def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>; +def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>; +def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>; +def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2], + (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>; +def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2], + (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>; +def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>; +def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], + (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>; +def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>; +def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>; + +//--- +// VLDx. Vector Loads +//--- +// 1-element structure load +def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>; +def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>; +def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>; +def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>; +def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>; +def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>; + +def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>; +def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>; +def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>; + +def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>; +def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>; +def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>; +def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>; +def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>; +def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>; + +def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>; +def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>; +def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>; +def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>; + +// 2-element structure load +def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>; +def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>; +def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>; +def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>; +def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>; +def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>; + +def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>; +def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>; +def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>; +def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>; +def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>; +def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>; +def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>; +def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>; + +def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>; +def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>; + +def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>; +def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>; + +def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>; +def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>; +def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>; +def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>; + +// 3-element structure load +def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>; +def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>; +def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>; +def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; +def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>; +def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + +def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>; +def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>; +def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>; + +def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>; +def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; +def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; +def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>; + +// 4-element structure load +def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>; +def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>; +def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>; +def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>; +def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; +def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + + +def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>; +def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>; +def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>; +def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>; +def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; +def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>; + +//--- +// VSTx. Vector Stores +//--- +// 1-element structure store +def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>; +def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>; +def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>; +def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>; +def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>; +def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>; + +def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>; +def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>; +def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>; + +def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>; +def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>; +def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>; +def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>; +def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>; +def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>; + +def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>; +def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>; +def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>; + +// 2-element structure store +def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>; +def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>; +def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>; + +def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>; +def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>; +def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>; +def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>; +def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>; +def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>; + +def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>; +def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>; +def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>; + +def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>; +def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>; +def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>; +def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>; +def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>; +def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>; + +// 3-element structure store +def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>; +def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>; +def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>; + +def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>; +def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>; +def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>; +def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>; +def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>; +def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>; + +def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>; +def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>; +def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; + +def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>; +def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>; +def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>; +def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>; +def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>; +def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>; + +// 4-element structure store +def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>; +def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>; +def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>; + +def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>; +def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>; +def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>; +def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>; +def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>; +def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>; + +def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>; +def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; +def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + +def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>; +def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>; +def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>; +def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>; +def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>; +def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>; + +} // R52 SchedModel diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp index 1d7eef9ddcfd..e2df0bddd0d1 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -31,6 +31,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/TargetParser.h" using namespace llvm; @@ -58,8 +59,7 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", - "Allow IT blocks based on ARMv7"), - clEnumValEnd)); + "Allow IT blocks based on ARMv7"))); /// ForceFastISel - Use the fast-isel, even for subtargets where it is not /// currently supported (for testing only). @@ -76,6 +76,11 @@ ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, return *this; } +/// EnableExecuteOnly - Enables the generation of execute-only code on supported +/// targets +static cl::opt<bool> +EnableExecuteOnly("arm-execute-only"); + ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, StringRef FS) { ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS); @@ -89,8 +94,9 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle) : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps), - CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), - TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)), + GenExecuteOnly(EnableExecuteOnly), CPUString(CPU), IsLittle(IsLittle), + TargetTriple(TT), Options(TM.Options), TM(TM), + FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so // we can query directly. InstrInfo(isThumb1Only() @@ -98,7 +104,32 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, : !isThumb() ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this) : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), - TLInfo(TM, *this) {} + TLInfo(TM, *this), GISel() {} + +const CallLowering *ARMSubtarget::getCallLowering() const { + assert(GISel && "Access to GlobalISel APIs not set"); + return GISel->getCallLowering(); +} + +const InstructionSelector *ARMSubtarget::getInstructionSelector() const { + assert(GISel && "Access to GlobalISel APIs not set"); + return GISel->getInstructionSelector(); +} + +const LegalizerInfo *ARMSubtarget::getLegalizerInfo() const { + assert(GISel && "Access to GlobalISel APIs not set"); + return GISel->getLegalizerInfo(); +} + +const RegisterBankInfo *ARMSubtarget::getRegBankInfo() const { + assert(GISel && "Access to GlobalISel APIs not set"); + return GISel->getRegBankInfo(); +} + +bool ARMSubtarget::isXRaySupported() const { + // We don't currently suppport Thumb, but Windows requires Thumb. + return hasV6Ops() && hasARMOps() && !isTargetWindows(); +} void ARMSubtarget::initializeEnvironment() { // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this @@ -117,10 +148,11 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isTargetDarwin()) { StringRef ArchName = TargetTriple.getArchName(); - if (ArchName.endswith("v7s")) + unsigned ArchKind = llvm::ARM::parseArch(ArchName); + if (ArchKind == llvm::ARM::AK_ARMV7S) // Default to the Swift CPU when targeting armv7s/thumbv7s. CPUString = "swift"; - else if (ArchName.endswith("v7k")) + else if (ArchKind == llvm::ARM::AK_ARMV7K) // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k. // ARMv7k does not use SjLj exception handling. CPUString = "cortex-a7"; @@ -143,6 +175,10 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // Assert this for now to make the change obvious. assert(hasV6T2Ops() || !hasThumb2()); + // Execute only support requires movt support + if (genExecuteOnly()) + assert(hasV8MBaselineOps() && !NoMovt && "Cannot generate execute-only code for this target"); + // Keep a pointer to static instruction cost data for the specified CPU. SchedModel = getSchedModelForCPU(CPUString); @@ -199,6 +235,9 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { (Options.UnsafeFPMath || isTargetDarwin())) UseNEONForSinglePrecisionFP = true; + if (isRWPI()) + ReserveR9 = true; + // FIXME: Teach TableGen to deal with these instead of doing it manually here. switch (ARMProcFamily) { case Others: @@ -234,6 +273,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { case CortexR7: case CortexM3: case ExynosM1: + case CortexR52: break; case Krait: PreISelOperandLatencyAdjustment = 1; @@ -261,6 +301,15 @@ bool ARMSubtarget::isAAPCS16_ABI() const { return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; } +bool ARMSubtarget::isROPI() const { + return TM.getRelocationModel() == Reloc::ROPI || + TM.getRelocationModel() == Reloc::ROPI_RWPI; +} +bool ARMSubtarget::isRWPI() const { + return TM.getRelocationModel() == Reloc::RWPI || + TM.getRelocationModel() == Reloc::ROPI_RWPI; +} + bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) return true; @@ -268,7 +317,7 @@ bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { // 32 bit macho has no relocation for a-b if a is undefined, even if b is in // the section that is being relocated. This means we have to use o load even // for GVs that are known to be local to the dso. - if (isTargetDarwin() && TM.isPositionIndependent() && + if (isTargetMachO() && TM.isPositionIndependent() && (GV->isDeclarationForLinker() || GV->hasCommonLinkage())) return true; @@ -300,9 +349,7 @@ bool ARMSubtarget::enablePostRAScheduler() const { return (!isThumb() || hasThumb2()); } -bool ARMSubtarget::enableAtomicExpand() const { - return hasAnyDataBarrier() && (!isThumb() || hasV8MBaselineOps()); -} +bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); } bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const { // For general targets, the prologue can grow when VFPs are allocated with @@ -316,7 +363,7 @@ bool ARMSubtarget::useMovt(const MachineFunction &MF) const { // immediates as it is inherently position independent, and may be out of // range otherwise. return !NoMovt && hasV8MBaselineOps() && - (isTargetWindows() || !MF.getFunction()->optForMinSize()); + (isTargetWindows() || !MF.getFunction()->optForMinSize() || genExecuteOnly()); } bool ARMSubtarget::useFastISel() const { diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index 910de0e1e72d..8c8218d0f432 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -25,6 +25,7 @@ #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -43,7 +44,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, - CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexM3, + CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexR52, CortexM3, CortexA32, CortexA35, CortexA53, CortexA57, CortexA72, CortexA73, Krait, Swift, ExynosM1 }; @@ -53,7 +54,8 @@ protected: enum ARMArchEnum { ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te, ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r, - ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline + ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline, + ARMv8r }; public: @@ -234,6 +236,9 @@ protected: /// particularly effective at zeroing a VFP register. bool HasZeroCycleZeroing = false; + /// HasFPAO - if true, processor does positive address offset computation faster + bool HasFPAO = false; + /// If true, if conversion may decide to leave some instructions unpredicated. bool IsProfitableToUnpredicate = false; @@ -296,6 +301,9 @@ protected: /// Generate calls via indirect call instructions. bool GenLongCalls = false; + /// Generate code that does not contain data access to code sections. + bool GenExecuteOnly = false; + /// Target machine allowed unsafe FP math (such as use of NEON fp) bool UnsafeFPMath = false; @@ -346,6 +354,9 @@ public: ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle); + /// This object will take onwership of \p GISelAccessor. + void setGISelAccessor(GISelAccessor &GISel) { this->GISel.reset(&GISel); } + /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. unsigned getMaxInlineSizeThreshold() const { @@ -375,6 +386,11 @@ public: return &InstrInfo->getRegisterInfo(); } + const CallLowering *getCallLowering() const override; + const InstructionSelector *getInstructionSelector() const override; + const LegalizerInfo *getLegalizerInfo() const override; + const RegisterBankInfo *getRegBankInfo() const override; + private: ARMSelectionDAGInfo TSInfo; // Either Thumb1FrameLowering or ARMFrameLowering. @@ -383,6 +399,11 @@ private: std::unique_ptr<ARMBaseInstrInfo> InstrInfo; ARMTargetLowering TLInfo; + /// Gather the accessor points to GlobalISel-related APIs. + /// This is used to avoid ifndefs spreading around while GISel is + /// an optional library. + std::unique_ptr<GISelAccessor> GISel; + void initializeEnvironment(); void initSubtargetFeatures(StringRef CPU, StringRef FS); ARMFrameLowering *initializeFrameLowering(StringRef CPU, StringRef FS); @@ -452,6 +473,7 @@ public: bool hasTrustZone() const { return HasTrustZone; } bool has8MSecExt() const { return Has8MSecExt; } bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } + bool hasFPAO() const { return HasFPAO; } bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; } bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; } bool hasSlowVDUP32() const { return HasSlowVDUP32; } @@ -475,6 +497,7 @@ public: bool useNaClTrap() const { return UseNaClTrap; } bool useSjLjEH() const { return UseSjLjEH; } bool genLongCalls() const { return GenLongCalls; } + bool genExecuteOnly() const { return GenExecuteOnly; } bool hasFP16() const { return HasFP16; } bool hasD16() const { return HasD16; } @@ -540,10 +563,15 @@ public: } bool isTargetAndroid() const { return TargetTriple.isAndroid(); } + virtual bool isXRaySupported() const override; + bool isAPCS_ABI() const; bool isAAPCS_ABI() const; bool isAAPCS16_ABI() const; + bool isROPI() const; + bool isRWPI() const; + bool useSoftFloat() const { return UseSoftFloat; } bool isThumb() const { return InThumbMode; } bool isThumb1Only() const { return InThumbMode && !HasThumb2; } @@ -557,11 +585,17 @@ public: return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; } + bool useR7AsFramePointer() const { + return isTargetDarwin() || (!isTargetWindows() && isThumb()); + } /// Returns true if the frame setup is split into two separate pushes (first /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent - /// to lr. - bool splitFramePushPop() const { - return isTargetMachO(); + /// to lr. This is always required on Thumb1-only targets, as the push and + /// pop instructions can't access the high registers. + bool splitFramePushPop(const MachineFunction &MF) const { + return (useR7AsFramePointer() && + MF.getTarget().Options.DisableFramePointerElim(MF)) || + isThumb1Only(); } bool useStride4VFPs(const MachineFunction &MF) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp index dc730a675bef..70c9567d99f8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -10,11 +10,19 @@ // //===----------------------------------------------------------------------===// +#include "ARMTargetMachine.h" #include "ARM.h" +#include "ARMCallLowering.h" #include "ARMFrameLowering.h" -#include "ARMTargetMachine.h" +#include "ARMInstructionSelector.h" +#include "ARMLegalizerInfo.h" +#include "ARMRegisterBankInfo.h" #include "ARMTargetObjectFile.h" #include "ARMTargetTransformInfo.h" +#include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" @@ -22,6 +30,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" @@ -50,12 +59,13 @@ EnableGlobalMerge("arm-global-merge", cl::Hidden, extern "C" void LLVMInitializeARMTarget() { // Register the target. - RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget); - RegisterTargetMachine<ARMBETargetMachine> Y(TheARMBETarget); - RegisterTargetMachine<ThumbLETargetMachine> A(TheThumbLETarget); - RegisterTargetMachine<ThumbBETargetMachine> B(TheThumbBETarget); + RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget()); + RegisterTargetMachine<ARMBETargetMachine> Y(getTheARMBETarget()); + RegisterTargetMachine<ThumbLETargetMachine> A(getTheThumbLETarget()); + RegisterTargetMachine<ThumbBETargetMachine> B(getTheThumbBETarget()); PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeGlobalISel(Registry); initializeARMLoadStoreOptPass(Registry); initializeARMPreAllocLoadStoreOptPass(Registry); } @@ -84,11 +94,13 @@ computeTargetABI(const Triple &TT, StringRef CPU, ARMBaseTargetMachine::ARMABI TargetABI = ARMBaseTargetMachine::ARM_ABI_UNKNOWN; + unsigned ArchKind = llvm::ARM::parseCPUArch(CPU); + StringRef ArchName = llvm::ARM::getArchName(ArchKind); // FIXME: This is duplicated code from the front end and should be unified. if (TT.isOSBinFormatMachO()) { if (TT.getEnvironment() == llvm::Triple::EABI || (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) || - CPU.startswith("cortex-m")) { + llvm::ARM::parseArchProfile(ArchName) == llvm::ARM::PK_M) { TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; } else if (TT.isWatchABI()) { TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16; @@ -184,6 +196,10 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT, // Default relocation model on Darwin is PIC. return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static; + if (*RM == Reloc::ROPI || *RM == Reloc::RWPI || *RM == Reloc::ROPI_RWPI) + assert(TT.isOSBinFormatELF() && + "ROPI/RWPI currently only supported for ELF"); + // DynamicNoPIC is only used on darwin. if (*RM == Reloc::DynamicNoPIC && !TT.isOSDarwin()) return Reloc::Static; @@ -224,6 +240,29 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, ARMBaseTargetMachine::~ARMBaseTargetMachine() {} +#ifdef LLVM_BUILD_GLOBAL_ISEL +namespace { +struct ARMGISelActualAccessor : public GISelAccessor { + std::unique_ptr<CallLowering> CallLoweringInfo; + std::unique_ptr<InstructionSelector> InstSelector; + std::unique_ptr<LegalizerInfo> Legalizer; + std::unique_ptr<RegisterBankInfo> RegBankInfo; + const CallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + const InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } +}; +} // End anonymous namespace. +#endif + const ARMSubtarget * ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); @@ -255,6 +294,24 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle); + +#ifndef LLVM_BUILD_GLOBAL_ISEL + GISelAccessor *GISel = new GISelAccessor(); +#else + ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor(); + GISel->CallLoweringInfo.reset(new ARMCallLowering(*I->getTargetLowering())); + GISel->Legalizer.reset(new ARMLegalizerInfo()); + + auto *RBI = new ARMRegisterBankInfo(*I->getRegisterInfo()); + + // FIXME: At this point, we can't rely on Subtarget having RBI. + // It's awkward to mix passing RBI and the Subtarget; should we pass + // TII/TRI as well? + GISel->InstSelector.reset(new ARMInstructionSelector(*I, *RBI)); + + GISel->RegBankInfo.reset(RBI); +#endif + I->setGISelAccessor(*GISel); } return I.get(); } @@ -346,6 +403,12 @@ public: void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; +#ifdef LLVM_BUILD_GLOBAL_ISEL + bool addIRTranslator() override; + bool addLegalizeMachineIR() override; + bool addRegBankSelect() override; + bool addGlobalInstructionSelect() override; +#endif void addPreRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; @@ -406,6 +469,28 @@ bool ARMPassConfig::addInstSelector() { return false; } +#ifdef LLVM_BUILD_GLOBAL_ISEL +bool ARMPassConfig::addIRTranslator() { + addPass(new IRTranslator()); + return false; +} + +bool ARMPassConfig::addLegalizeMachineIR() { + addPass(new Legalizer()); + return false; +} + +bool ARMPassConfig::addRegBankSelect() { + addPass(new RegBankSelect()); + return false; +} + +bool ARMPassConfig::addGlobalInstructionSelect() { + addPass(new InstructionSelect()); + return false; +} +#endif + void ARMPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { addPass(createMLxExpansionPass()); @@ -436,8 +521,8 @@ void ARMPassConfig::addPreSched2() { return this->TM->getSubtarget<ARMSubtarget>(F).restrictIT(); })); - addPass(createIfConverter([this](const Function &F) { - return !this->TM->getSubtarget<ARMSubtarget>(F).isThumb1Only(); + addPass(createIfConverter([](const MachineFunction &MF) { + return !MF.getSubtarget<ARMSubtarget>().isThumb1Only(); })); } addPass(createThumb2ITBlockPass()); @@ -447,8 +532,8 @@ void ARMPassConfig::addPreEmitPass() { addPass(createThumb2SizeReductionPass()); // Constant island pass work on unbundled instructions. - addPass(createUnpackMachineBundles([this](const Function &F) { - return this->TM->getSubtarget<ARMSubtarget>(F).isThumb2(); + addPass(createUnpackMachineBundles([](const MachineFunction &MF) { + return MF.getSubtarget<ARMSubtarget>().isThumb2(); })); // Don't optimize barriers at -O0. diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp index eaed5cc68750..625c4280e1a6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -27,8 +27,10 @@ using namespace dwarf; void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { - bool isAAPCS_ABI = static_cast<const ARMTargetMachine &>(TM).TargetABI == - ARMTargetMachine::ARMABI::ARM_ABI_AAPCS; + const ARMTargetMachine &ARM_TM = static_cast<const ARMTargetMachine &>(TM); + bool isAAPCS_ABI = ARM_TM.TargetABI == ARMTargetMachine::ARMABI::ARM_ABI_AAPCS; + genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly(); + TargetLoweringObjectFileELF::Initialize(Ctx, TM); InitializeELF(isAAPCS_ABI); @@ -38,19 +40,28 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, AttributesSection = getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0); + + // Make code section unreadable when in execute-only mode + if (genExecuteOnly) { + unsigned Type = ELF::SHT_PROGBITS; + unsigned Flags = ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_ARM_PURECODE; + // Since we cannot modify flags for an existing section, we create a new + // section with the right flags, and use 0 as the unique ID for + // execute-only text + TextSection = Ctx.getELFSection(".text", Type, Flags, 0, "", 0U); + } } const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference( - const GlobalValue *GV, unsigned Encoding, Mangler &Mang, - const TargetMachine &TM, MachineModuleInfo *MMI, - MCStreamer &Streamer) const { + const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM, + MachineModuleInfo *MMI, MCStreamer &Streamer) const { if (TM.getMCAsmInfo()->getExceptionHandlingType() != ExceptionHandling::ARM) return TargetLoweringObjectFileELF::getTTypeGlobalReference( - GV, Encoding, Mang, TM, MMI, Streamer); + GV, Encoding, TM, MMI, Streamer); assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only"); - return MCSymbolRefExpr::create(TM.getSymbol(GV, Mang), + return MCSymbolRefExpr::create(TM.getSymbol(GV), MCSymbolRefExpr::VK_ARM_TARGET2, getContext()); } @@ -59,3 +70,23 @@ getDebugThreadLocalSymbol(const MCSymbol *Sym) const { return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_ARM_TLSLDO, getContext()); } + +MCSection * +ARMElfTargetObjectFile::getExplicitSectionGlobal(const GlobalObject *GO, + SectionKind SK, const TargetMachine &TM) const { + // Set execute-only access for the explicit section + if (genExecuteOnly && SK.isText()) + SK = SectionKind::getExecuteOnly(); + + return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, SK, TM); +} + +MCSection * +ARMElfTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO, + SectionKind SK, const TargetMachine &TM) const { + // Place the global in the execute-only text section + if (genExecuteOnly && SK.isText()) + SK = SectionKind::getExecuteOnly(); + + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, SK, TM); +} diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h index b1db201cb30d..24e755ddac27 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h @@ -18,6 +18,7 @@ class MCContext; class TargetMachine; class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { + mutable bool genExecuteOnly = false; protected: const MCSection *AttributesSection; public: @@ -28,14 +29,20 @@ public: void Initialize(MCContext &Ctx, const TargetMachine &TM) override; - const MCExpr * - getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding, - Mangler &Mang, const TargetMachine &TM, - MachineModuleInfo *MMI, - MCStreamer &Streamer) const override; + const MCExpr *getTTypeGlobalReference(const GlobalValue *GV, + unsigned Encoding, + const TargetMachine &TM, + MachineModuleInfo *MMI, + MCStreamer &Streamer) const override; /// \brief Describe a TLS variable address within debug info. const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override; + + MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const override; + + MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const override; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 13c5dc61acd9..10e6297ef1ed 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -41,7 +41,7 @@ int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { // Thumb1. if (SImmVal >= 0 && SImmVal < 256) return 1; - if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) + if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) return 2; // Load from constantpool. return 3; @@ -69,6 +69,25 @@ int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Idx == 1) return 0; + if (Opcode == Instruction::And) + // Conversion to BIC is free, and means we can use ~Imm instead. + return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty)); + + if (Opcode == Instruction::Add) + // Conversion to SUB is free, and means we can use -Imm instead. + return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty)); + + if (Opcode == Instruction::ICmp && Imm.isNegative() && + Ty->getIntegerBitWidth() == 32) { + int64_t NegImm = -Imm.getSExtValue(); + if (ST->isThumb2() && NegImm < 1<<12) + // icmp X, #-C -> cmn X, #C + return 0; + if (ST->isThumb() && NegImm < 1<<8) + // icmp X, #-C -> adds X, #C + return 0; + } + return getIntImmCost(Imm, Ty); } diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index a0ca9e648002..d83228afb0ab 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -45,13 +45,6 @@ public: : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} - // Provide value semantics. MSVC requires that we spell all of these out. - ARMTTIImpl(const ARMTTIImpl &Arg) - : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} - ARMTTIImpl(ARMTTIImpl &&Arg) - : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), - TLI(std::move(Arg.TLI)) {} - bool enableInterleavedAccessVectorization() { return true; } /// Floating-point computation using ARMv8 AArch32 Advanced @@ -128,6 +121,16 @@ public: int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, unsigned AddressSpace); + + bool shouldBuildLookupTablesForConstant(Constant *C) const { + // In the ROPI and RWPI relocation models we can't have pointers to global + // variables or functions in constant data, so don't convert switches to + // lookup tables if any of the values would need relocation. + if (ST->isROPI() || ST->isRWPI()) + return !C->needsRelocation(); + + return true; + } /// @} }; diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index f5de8a3cd25e..c243a2d35979 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" #include "llvm/Support/COFF.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" @@ -52,6 +53,20 @@ using namespace llvm; namespace { +enum class ImplicitItModeTy { Always, Never, ARMOnly, ThumbOnly }; + +static cl::opt<ImplicitItModeTy> ImplicitItMode( + "arm-implicit-it", cl::init(ImplicitItModeTy::ARMOnly), + cl::desc("Allow conditional instructions outdside of an IT block"), + cl::values(clEnumValN(ImplicitItModeTy::Always, "always", + "Accept in both ISAs, emit implicit ITs in Thumb"), + clEnumValN(ImplicitItModeTy::Never, "never", + "Warn in ARM, reject in Thumb"), + clEnumValN(ImplicitItModeTy::ARMOnly, "arm", + "Accept in ARM, reject in Thumb"), + clEnumValN(ImplicitItModeTy::ThumbOnly, "thumb", + "Warn in ARM, emit implicit ITs in Thumb"))); + class ARMOperand; enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; @@ -145,6 +160,16 @@ class ARMAsmParser : public MCTargetAsmParser { bool NextSymbolIsThumb; + bool useImplicitITThumb() const { + return ImplicitItMode == ImplicitItModeTy::Always || + ImplicitItMode == ImplicitItModeTy::ThumbOnly; + } + + bool useImplicitITARM() const { + return ImplicitItMode == ImplicitItModeTy::Always || + ImplicitItMode == ImplicitItModeTy::ARMOnly; + } + struct { ARMCC::CondCodes Cond; // Condition for IT block. unsigned Mask:4; // Condition mask for instructions. @@ -153,40 +178,174 @@ class ARMAsmParser : public MCTargetAsmParser { // '0' inverse of condition (else). // Count of instructions in IT block is // 4 - trailingzeroes(mask) - - bool FirstCond; // Explicit flag for when we're parsing the - // First instruction in the IT block. It's - // implied in the mask, so needs special - // handling. + // Note that this does not have the same encoding + // as in the IT instruction, which also depends + // on the low bit of the condition code. unsigned CurPosition; // Current position in parsing of IT - // block. In range [0,3]. Initialized - // according to count of instructions in block. - // ~0U if no active IT block. + // block. In range [0,4], with 0 being the IT + // instruction itself. Initialized according to + // count of instructions in block. ~0U if no + // active IT block. + + bool IsExplicit; // true - The IT instruction was present in the + // input, we should not modify it. + // false - The IT instruction was added + // implicitly, we can extend it if that + // would be legal. } ITState; + + llvm::SmallVector<MCInst, 4> PendingConditionalInsts; + + void flushPendingInstructions(MCStreamer &Out) override { + if (!inImplicitITBlock()) { + assert(PendingConditionalInsts.size() == 0); + return; + } + + // Emit the IT instruction + unsigned Mask = getITMaskEncoding(); + MCInst ITInst; + ITInst.setOpcode(ARM::t2IT); + ITInst.addOperand(MCOperand::createImm(ITState.Cond)); + ITInst.addOperand(MCOperand::createImm(Mask)); + Out.EmitInstruction(ITInst, getSTI()); + + // Emit the conditonal instructions + assert(PendingConditionalInsts.size() <= 4); + for (const MCInst &Inst : PendingConditionalInsts) { + Out.EmitInstruction(Inst, getSTI()); + } + PendingConditionalInsts.clear(); + + // Clear the IT state + ITState.Mask = 0; + ITState.CurPosition = ~0U; + } + bool inITBlock() { return ITState.CurPosition != ~0U; } + bool inExplicitITBlock() { return inITBlock() && ITState.IsExplicit; } + bool inImplicitITBlock() { return inITBlock() && !ITState.IsExplicit; } bool lastInITBlock() { return ITState.CurPosition == 4 - countTrailingZeros(ITState.Mask); } void forwardITPosition() { if (!inITBlock()) return; // Move to the next instruction in the IT block, if there is one. If not, - // mark the block as done. + // mark the block as done, except for implicit IT blocks, which we leave + // open until we find an instruction that can't be added to it. unsigned TZ = countTrailingZeros(ITState.Mask); - if (++ITState.CurPosition == 5 - TZ) + if (++ITState.CurPosition == 5 - TZ && ITState.IsExplicit) ITState.CurPosition = ~0U; // Done with the IT block after this. } - void Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) { - return getParser().Note(L, Msg, Ranges); + // Rewind the state of the current IT block, removing the last slot from it. + void rewindImplicitITPosition() { + assert(inImplicitITBlock()); + assert(ITState.CurPosition > 1); + ITState.CurPosition--; + unsigned TZ = countTrailingZeros(ITState.Mask); + unsigned NewMask = 0; + NewMask |= ITState.Mask & (0xC << TZ); + NewMask |= 0x2 << TZ; + ITState.Mask = NewMask; + } + + // Rewind the state of the current IT block, removing the last slot from it. + // If we were at the first slot, this closes the IT block. + void discardImplicitITBlock() { + assert(inImplicitITBlock()); + assert(ITState.CurPosition == 1); + ITState.CurPosition = ~0U; + return; } - bool Warning(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = None) { - return getParser().Warning(L, Msg, Ranges); + + // Get the encoding of the IT mask, as it will appear in an IT instruction. + unsigned getITMaskEncoding() { + assert(inITBlock()); + unsigned Mask = ITState.Mask; + unsigned TZ = countTrailingZeros(Mask); + if ((ITState.Cond & 1) == 0) { + assert(Mask && TZ <= 3 && "illegal IT mask value!"); + Mask ^= (0xE << TZ) & 0xF; + } + return Mask; } - bool Error(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = None) { - return getParser().Error(L, Msg, Ranges); + + // Get the condition code corresponding to the current IT block slot. + ARMCC::CondCodes currentITCond() { + unsigned MaskBit; + if (ITState.CurPosition == 1) + MaskBit = 1; + else + MaskBit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1; + + return MaskBit ? ITState.Cond : ARMCC::getOppositeCondition(ITState.Cond); + } + + // Invert the condition of the current IT block slot without changing any + // other slots in the same block. + void invertCurrentITCondition() { + if (ITState.CurPosition == 1) { + ITState.Cond = ARMCC::getOppositeCondition(ITState.Cond); + } else { + ITState.Mask ^= 1 << (5 - ITState.CurPosition); + } + } + + // Returns true if the current IT block is full (all 4 slots used). + bool isITBlockFull() { + return inITBlock() && (ITState.Mask & 1); + } + + // Extend the current implicit IT block to have one more slot with the given + // condition code. + void extendImplicitITBlock(ARMCC::CondCodes Cond) { + assert(inImplicitITBlock()); + assert(!isITBlockFull()); + assert(Cond == ITState.Cond || + Cond == ARMCC::getOppositeCondition(ITState.Cond)); + unsigned TZ = countTrailingZeros(ITState.Mask); + unsigned NewMask = 0; + // Keep any existing condition bits. + NewMask |= ITState.Mask & (0xE << TZ); + // Insert the new condition bit. + NewMask |= (Cond == ITState.Cond) << TZ; + // Move the trailing 1 down one bit. + NewMask |= 1 << (TZ - 1); + ITState.Mask = NewMask; + } + + // Create a new implicit IT block with a dummy condition code. + void startImplicitITBlock() { + assert(!inITBlock()); + ITState.Cond = ARMCC::AL; + ITState.Mask = 8; + ITState.CurPosition = 1; + ITState.IsExplicit = false; + return; + } + + // Create a new explicit IT block with the given condition and mask. The mask + // should be in the parsed format, with a 1 implying 't', regardless of the + // low bit of the condition. + void startExplicitITBlock(ARMCC::CondCodes Cond, unsigned Mask) { + assert(!inITBlock()); + ITState.Cond = Cond; + ITState.Mask = Mask; + ITState.CurPosition = 0; + ITState.IsExplicit = true; + return; + } + + void Note(SMLoc L, const Twine &Msg, SMRange Range = None) { + return getParser().Note(L, Msg, Range); + } + bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) { + return getParser().Warning(L, Msg, Range); + } + bool Error(SMLoc L, const Twine &Msg, SMRange Range = None) { + return getParser().Error(L, Msg, Range); } bool validatetLDMRegList(const MCInst &Inst, const OperandVector &Operands, @@ -355,6 +514,7 @@ class ARMAsmParser : public MCTargetAsmParser { bool processInstruction(MCInst &Inst, const OperandVector &Ops, MCStreamer &Out); bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands); bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands); + bool isITBlockTerminator(MCInst &Inst) const; public: enum ARMMatchResultTy { @@ -363,6 +523,7 @@ public: Match_RequiresV6, Match_RequiresThumb2, Match_RequiresV8, + Match_RequiresFlagSetting, #define GET_OPERAND_DIAGNOSTIC_TYPES #include "ARMGenAsmMatcher.inc" @@ -399,6 +560,9 @@ public: OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; + unsigned MatchInstruction(OperandVector &Operands, MCInst &Inst, + uint64_t &ErrorInfo, bool MatchingInlineAsm, + bool &EmitInITBlock, MCStreamer &Out); void onLabelParsed(MCSymbol *Symbol) override; }; } // end anonymous namespace @@ -3286,7 +3450,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { } /// parseITCondCode - Try to parse a condition code for an IT instruction. -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseITCondCode(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); @@ -3324,7 +3488,7 @@ ARMAsmParser::parseITCondCode(OperandVector &Operands) { /// parseCoprocNumOperand - Try to parse an coprocessor number operand. The /// token must be an Identifier when called, and if it is a coprocessor /// number, the token is eaten and the operand is added to the operand list. -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); @@ -3347,7 +3511,7 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) { /// parseCoprocRegOperand - Try to parse an coprocessor register operand. The /// token must be an Identifier when called, and if it is a coprocessor /// number, the token is eaten and the operand is added to the operand list. -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); @@ -3366,7 +3530,7 @@ ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) { /// parseCoprocOptionOperand - Try to parse an coprocessor option operand. /// coproc_option : '{' imm0_255 '}' -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); @@ -3447,8 +3611,8 @@ static unsigned getDRegFromQReg(unsigned QReg) { /// Parse a register list. bool ARMAsmParser::parseRegisterList(OperandVector &Operands) { MCAsmParser &Parser = getParser(); - assert(Parser.getTok().is(AsmToken::LCurly) && - "Token is not a Left Curly Brace"); + if (Parser.getTok().isNot(AsmToken::LCurly)) + return TokError("Token is not a Left Curly Brace"); SMLoc S = Parser.getTok().getLoc(); Parser.Lex(); // Eat '{' token. SMLoc RegLoc = Parser.getTok().getLoc(); @@ -3576,7 +3740,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) { } // Helper function to parse the lane index for vector lists. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +OperandMatchResultTy ARMAsmParser:: parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { MCAsmParser &Parser = getParser(); Index = 0; // Always return a defined index value. @@ -3628,7 +3792,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { } // parse a vector register list -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseVectorList(OperandVector &Operands) { MCAsmParser &Parser = getParser(); VectorLaneTy LaneKind; @@ -3880,7 +4044,7 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) { } /// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options. -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); @@ -3952,7 +4116,7 @@ ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) { } /// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options. -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); @@ -4004,7 +4168,7 @@ ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) { /// parseProcIFlagsOperand - Try to parse iflags from CPS instruction. -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); @@ -4039,7 +4203,7 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) { } /// parseMSRMaskOperand - Try to parse mask flags from MSR instruction. -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); @@ -4192,7 +4356,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { /// parseBankedRegOperand - Try to parse a banked register (e.g. "lr_irq") for /// use in the MRS/MSR instructions added to support virtualization. -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); @@ -4247,7 +4411,7 @@ ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) { return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low, int High) { MCAsmParser &Parser = getParser(); @@ -4296,7 +4460,7 @@ ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low, return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseSetEndImm(OperandVector &Operands) { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); @@ -4326,7 +4490,7 @@ ARMAsmParser::parseSetEndImm(OperandVector &Operands) { /// lsl #n 'n' in [0,31] /// asr #n 'n' in [1,32] /// n == 32 encoded as n == 0. -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseShifterImm(OperandVector &Operands) { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); @@ -4397,7 +4561,7 @@ ARMAsmParser::parseShifterImm(OperandVector &Operands) { /// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family /// of instructions. Legal values are: /// ror #n 'n' in {0, 8, 16, 24} -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseRotImm(OperandVector &Operands) { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); @@ -4444,7 +4608,7 @@ ARMAsmParser::parseRotImm(OperandVector &Operands) { return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseModImm(OperandVector &Operands) { MCAsmParser &Parser = getParser(); MCAsmLexer &Lexer = getLexer(); @@ -4561,7 +4725,7 @@ ARMAsmParser::parseModImm(OperandVector &Operands) { } } -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseBitfield(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); @@ -4630,7 +4794,7 @@ ARMAsmParser::parseBitfield(OperandVector &Operands) { return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parsePostIdxReg(OperandVector &Operands) { // Check for a post-index addressing register operand. Specifically: // postidx_reg := '+' register {, shift} @@ -4680,7 +4844,7 @@ ARMAsmParser::parsePostIdxReg(OperandVector &Operands) { return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseAM3Offset(OperandVector &Operands) { // Check for a post-index addressing register operand. Specifically: // am3offset := '+' register @@ -4833,8 +4997,8 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst, bool ARMAsmParser::parseMemory(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc S, E; - assert(Parser.getTok().is(AsmToken::LBrac) && - "Token is not a Left Bracket"); + if (Parser.getTok().isNot(AsmToken::LBrac)) + return TokError("Token is not a Left Bracket"); S = Parser.getTok().getLoc(); Parser.Lex(); // Eat left bracket token. @@ -5082,7 +5246,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, } /// parseFPImm - A floating point immediate expression operand. -ARMAsmParser::OperandMatchResultTy +OperandMatchResultTy ARMAsmParser::parseFPImm(OperandVector &Operands) { MCAsmParser &Parser = getParser(); // Anything that can accept a floating point constant as an operand @@ -5131,7 +5295,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); SMLoc Loc = Tok.getLoc(); if (Tok.is(AsmToken::Real) && isVmovf) { - APFloat RealVal(APFloat::IEEEsingle, Tok.getString()); + APFloat RealVal(APFloat::IEEEsingle(), Tok.getString()); uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); // If we had a '-' in front, toggle the sign bit. IntVal ^= (uint64_t)isNegative << 31; @@ -5259,7 +5423,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { return false; } // w/ a ':' after the '#', it's just like a plain ':'. - // FALLTHROUGH + LLVM_FALLTHROUGH; } case AsmToken::Colon: { S = Parser.getTok().getLoc(); @@ -5289,6 +5453,9 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { if (getParser().parseExpression(SubExprVal)) return true; E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + // execute-only: we assume that assembly programmers know what they are + // doing and allow literal pool creation here Operands.push_back(ARMOperand::CreateConstantPoolImm(SubExprVal, S, E)); return false; } @@ -5842,7 +6009,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // In Thumb1, only the branch (B) instruction can be predicated. if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") { - Parser.eatToEndOfStatement(); return Error(NameLoc, "conditional execution not supported in Thumb1"); } @@ -5856,14 +6022,12 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (Mnemonic == "it") { SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2); if (ITMask.size() > 3) { - Parser.eatToEndOfStatement(); return Error(Loc, "too many conditions on IT instruction"); } unsigned Mask = 8; for (unsigned i = ITMask.size(); i != 0; --i) { char pos = ITMask[i - 1]; if (pos != 't' && pos != 'e') { - Parser.eatToEndOfStatement(); return Error(Loc, "illegal IT block condition mask '" + ITMask + "'"); } Mask >>= 1; @@ -5889,14 +6053,12 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // If we had a carry-set on an instruction that can't do that, issue an // error. if (!CanAcceptCarrySet && CarrySetting) { - Parser.eatToEndOfStatement(); return Error(NameLoc, "instruction '" + Mnemonic + "' can not set flags, but 's' suffix specified"); } // If we had a predication code on an instruction that can't do that, issue an // error. if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) { - Parser.eatToEndOfStatement(); return Error(NameLoc, "instruction '" + Mnemonic + "' is not predicable, but condition code specified"); } @@ -5940,7 +6102,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // For for ARM mode generate an error if the .n qualifier is used. if (ExtraToken == ".n" && !isThumb()) { SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start); - Parser.eatToEndOfStatement(); return Error(Loc, "instruction with .n (narrow) qualifier not allowed in " "arm mode"); } @@ -5958,28 +6119,19 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. if (parseOperand(Operands, Mnemonic)) { - Parser.eatToEndOfStatement(); return true; } - while (getLexer().is(AsmToken::Comma)) { - Parser.Lex(); // Eat the comma. - + while (parseOptionalToken(AsmToken::Comma)) { // Parse and remember the operand. if (parseOperand(Operands, Mnemonic)) { - Parser.eatToEndOfStatement(); return true; } } } - if (getLexer().isNot(AsmToken::EndOfStatement)) { - SMLoc Loc = getLexer().getLoc(); - Parser.eatToEndOfStatement(); - return Error(Loc, "unexpected token in argument list"); - } - - Parser.Lex(); // Consume the EndOfStatement + if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list")) + return true; if (RequireVFPRegisterListCheck) { ARMOperand &Op = static_cast<ARMOperand &>(*Operands.back()); @@ -6043,10 +6195,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // Rt2 must be Rt + 1 and Rt must be even. if (Rt + 1 != Rt2 || (Rt & 1)) { - Error(Op2.getStartLoc(), isLoad - ? "destination operands must be sequential" - : "source operands must be sequential"); - return true; + return Error(Op2.getStartLoc(), + isLoad ? "destination operands must be sequential" + : "source operands must be sequential"); } unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0, &(MRI->getRegClass(ARM::GPRPairRegClassID))); @@ -6188,18 +6339,11 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, // NOTE: BKPT and HLT instructions have the interesting property of being // allowed in IT blocks, but not being predicable. They just always execute. if (inITBlock() && !instIsBreakpoint(Inst)) { - unsigned Bit = 1; - if (ITState.FirstCond) - ITState.FirstCond = false; - else - Bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1; // The instruction must be predicable. if (!MCID.isPredicable()) return Error(Loc, "instructions in IT block must be predicable"); unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm(); - unsigned ITCond = Bit ? ITState.Cond : - ARMCC::getOppositeCondition(ITState.Cond); - if (Cond != ITCond) { + if (Cond != currentITCond()) { // Find the condition code Operand to get its SMLoc information. SMLoc CondLoc; for (unsigned I = 1; I < Operands.size(); ++I) @@ -6208,14 +6352,19 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, return Error(CondLoc, "incorrect condition in IT block; got '" + StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) + "', but expected '" + - ARMCondCodeToString(ARMCC::CondCodes(ITCond)) + "'"); + ARMCondCodeToString(ARMCC::CondCodes(currentITCond())) + "'"); } // Check for non-'al' condition codes outside of the IT block. } else if (isThumbTwo() && MCID.isPredicable() && Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() != ARMCC::AL && Inst.getOpcode() != ARM::tBcc && - Inst.getOpcode() != ARM::t2Bcc) + Inst.getOpcode() != ARM::t2Bcc) { return Error(Loc, "predicated instructions must be in IT block"); + } else if (!isThumb() && !useImplicitITARM() && MCID.isPredicable() && + Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() != + ARMCC::AL) { + return Warning(Loc, "predicated instructions should be in IT block"); + } const unsigned Opcode = Inst.getOpcode(); switch (Opcode) { @@ -6520,6 +6669,12 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, return Error(Operands[Op]->getStartLoc(), "branch target out of range"); break; } + case ARM::tCBZ: + case ARM::tCBNZ: { + if (!static_cast<ARMOperand &>(*Operands[2]).isUnsignedOffset<6, 1>()) + return Error(Operands[2]->getStartLoc(), "branch target out of range"); + break; + } case ARM::MOVi16: case ARM::t2MOVi16: case ARM::t2MOVTi16: @@ -8639,27 +8794,15 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, } case ARM::ITasm: case ARM::t2IT: { - // The mask bits for all but the first condition are represented as - // the low bit of the condition code value implies 't'. We currently - // always have 1 implies 't', so XOR toggle the bits if the low bit - // of the condition code is zero. MCOperand &MO = Inst.getOperand(1); unsigned Mask = MO.getImm(); - unsigned OrigMask = Mask; - unsigned TZ = countTrailingZeros(Mask); - if ((Inst.getOperand(0).getImm() & 1) == 0) { - assert(Mask && TZ <= 3 && "illegal IT mask value!"); - Mask ^= (0xE << TZ) & 0xF; - } - MO.setImm(Mask); + ARMCC::CondCodes Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm()); // Set up the IT block state according to the IT instruction we just // matched. assert(!inITBlock() && "nested IT blocks?!"); - ITState.Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm()); - ITState.Mask = OrigMask; // Use the original mask, not the updated one. - ITState.CurPosition = 0; - ITState.FirstCond = true; + startExplicitITBlock(Cond, Mask); + MO.setImm(getITMaskEncoding()); break; } case ARM::t2LSLrr: @@ -8766,7 +8909,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { ; // If we're parsing Thumb1, reject it completely. if (isThumbOne() && Inst.getOperand(OpNo).getReg() != ARM::CPSR) - return Match_MnemonicFail; + return Match_RequiresFlagSetting; // If we're parsing Thumb2, which form is legal depends on whether we're // in an IT block. if (isThumbTwo() && Inst.getOperand(OpNo).getReg() != ARM::CPSR && @@ -8807,6 +8950,132 @@ template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) { } } +// Returns true if Inst is unpredictable if it is in and IT block, but is not +// the last instruction in the block. +bool ARMAsmParser::isITBlockTerminator(MCInst &Inst) const { + const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); + + // All branch & call instructions terminate IT blocks. + if (MCID.isTerminator() || MCID.isCall() || MCID.isReturn() || + MCID.isBranch() || MCID.isIndirectBranch()) + return true; + + // Any arithmetic instruction which writes to the PC also terminates the IT + // block. + for (unsigned OpIdx = 0; OpIdx < MCID.getNumDefs(); ++OpIdx) { + MCOperand &Op = Inst.getOperand(OpIdx); + if (Op.isReg() && Op.getReg() == ARM::PC) + return true; + } + + if (MCID.hasImplicitDefOfPhysReg(ARM::PC, MRI)) + return true; + + // Instructions with variable operand lists, which write to the variable + // operands. We only care about Thumb instructions here, as ARM instructions + // obviously can't be in an IT block. + switch (Inst.getOpcode()) { + case ARM::t2LDMIA: + case ARM::t2LDMIA_UPD: + case ARM::t2LDMDB: + case ARM::t2LDMDB_UPD: + if (listContainsReg(Inst, 3, ARM::PC)) + return true; + break; + case ARM::tPOP: + if (listContainsReg(Inst, 2, ARM::PC)) + return true; + break; + } + + return false; +} + +unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst, + uint64_t &ErrorInfo, + bool MatchingInlineAsm, + bool &EmitInITBlock, + MCStreamer &Out) { + // If we can't use an implicit IT block here, just match as normal. + if (inExplicitITBlock() || !isThumbTwo() || !useImplicitITThumb()) + return MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); + + // Try to match the instruction in an extension of the current IT block (if + // there is one). + if (inImplicitITBlock()) { + extendImplicitITBlock(ITState.Cond); + if (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm) == + Match_Success) { + // The match succeded, but we still have to check that the instruction is + // valid in this implicit IT block. + const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); + if (MCID.isPredicable()) { + ARMCC::CondCodes InstCond = + (ARMCC::CondCodes)Inst.getOperand(MCID.findFirstPredOperandIdx()) + .getImm(); + ARMCC::CondCodes ITCond = currentITCond(); + if (InstCond == ITCond) { + EmitInITBlock = true; + return Match_Success; + } else if (InstCond == ARMCC::getOppositeCondition(ITCond)) { + invertCurrentITCondition(); + EmitInITBlock = true; + return Match_Success; + } + } + } + rewindImplicitITPosition(); + } + + // Finish the current IT block, and try to match outside any IT block. + flushPendingInstructions(Out); + unsigned PlainMatchResult = + MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); + if (PlainMatchResult == Match_Success) { + const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); + if (MCID.isPredicable()) { + ARMCC::CondCodes InstCond = + (ARMCC::CondCodes)Inst.getOperand(MCID.findFirstPredOperandIdx()) + .getImm(); + // Some forms of the branch instruction have their own condition code + // fields, so can be conditionally executed without an IT block. + if (Inst.getOpcode() == ARM::tBcc || Inst.getOpcode() == ARM::t2Bcc) { + EmitInITBlock = false; + return Match_Success; + } + if (InstCond == ARMCC::AL) { + EmitInITBlock = false; + return Match_Success; + } + } else { + EmitInITBlock = false; + return Match_Success; + } + } + + // Try to match in a new IT block. The matcher doesn't check the actual + // condition, so we create an IT block with a dummy condition, and fix it up + // once we know the actual condition. + startImplicitITBlock(); + if (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm) == + Match_Success) { + const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); + if (MCID.isPredicable()) { + ITState.Cond = + (ARMCC::CondCodes)Inst.getOperand(MCID.findFirstPredOperandIdx()) + .getImm(); + EmitInITBlock = true; + return Match_Success; + } + } + discardImplicitITBlock(); + + // If none of these succeed, return the error we got when trying to match + // outside any IT blocks. + EmitInITBlock = false; + return PlainMatchResult; +} + static const char *getSubtargetFeatureName(uint64_t Val); bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, @@ -8814,9 +9083,11 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, bool MatchingInlineAsm) { MCInst Inst; unsigned MatchResult; + bool PendConditionalInstruction = false; + + MatchResult = MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm, + PendConditionalInstruction, Out); - MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, - MatchingInlineAsm); switch (MatchResult) { case Match_Success: // Context sensitive operand constraints aren't handled by the matcher, @@ -8856,7 +9127,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return false; Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst, getSTI()); + if (PendConditionalInstruction) { + PendingConditionalInsts.push_back(Inst); + if (isITBlockFull() || isITBlockTerminator(Inst)) + flushPendingInstructions(Out); + } else { + Out.EmitInstruction(Inst, getSTI()); + } return false; case Match_MissingFeature: { assert(ErrorInfo && "Unknown missing feature!"); @@ -8898,6 +9175,8 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, "instruction variant requires Thumb2"); case Match_RequiresV8: return Error(IDLoc, "instruction variant requires ARMv8 or later"); + case Match_RequiresFlagSetting: + return Error(IDLoc, "no flag-preserving variant of this instruction available"); case Match_ImmRange0_15: { SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; @@ -8958,78 +9237,79 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") - return parseLiteralValues(4, DirectiveID.getLoc()); + parseLiteralValues(4, DirectiveID.getLoc()); else if (IDVal == ".short" || IDVal == ".hword") - return parseLiteralValues(2, DirectiveID.getLoc()); + parseLiteralValues(2, DirectiveID.getLoc()); else if (IDVal == ".thumb") - return parseDirectiveThumb(DirectiveID.getLoc()); + parseDirectiveThumb(DirectiveID.getLoc()); else if (IDVal == ".arm") - return parseDirectiveARM(DirectiveID.getLoc()); + parseDirectiveARM(DirectiveID.getLoc()); else if (IDVal == ".thumb_func") - return parseDirectiveThumbFunc(DirectiveID.getLoc()); + parseDirectiveThumbFunc(DirectiveID.getLoc()); else if (IDVal == ".code") - return parseDirectiveCode(DirectiveID.getLoc()); + parseDirectiveCode(DirectiveID.getLoc()); else if (IDVal == ".syntax") - return parseDirectiveSyntax(DirectiveID.getLoc()); + parseDirectiveSyntax(DirectiveID.getLoc()); else if (IDVal == ".unreq") - return parseDirectiveUnreq(DirectiveID.getLoc()); + parseDirectiveUnreq(DirectiveID.getLoc()); else if (IDVal == ".fnend") - return parseDirectiveFnEnd(DirectiveID.getLoc()); + parseDirectiveFnEnd(DirectiveID.getLoc()); else if (IDVal == ".cantunwind") - return parseDirectiveCantUnwind(DirectiveID.getLoc()); + parseDirectiveCantUnwind(DirectiveID.getLoc()); else if (IDVal == ".personality") - return parseDirectivePersonality(DirectiveID.getLoc()); + parseDirectivePersonality(DirectiveID.getLoc()); else if (IDVal == ".handlerdata") - return parseDirectiveHandlerData(DirectiveID.getLoc()); + parseDirectiveHandlerData(DirectiveID.getLoc()); else if (IDVal == ".setfp") - return parseDirectiveSetFP(DirectiveID.getLoc()); + parseDirectiveSetFP(DirectiveID.getLoc()); else if (IDVal == ".pad") - return parseDirectivePad(DirectiveID.getLoc()); + parseDirectivePad(DirectiveID.getLoc()); else if (IDVal == ".save") - return parseDirectiveRegSave(DirectiveID.getLoc(), false); + parseDirectiveRegSave(DirectiveID.getLoc(), false); else if (IDVal == ".vsave") - return parseDirectiveRegSave(DirectiveID.getLoc(), true); + parseDirectiveRegSave(DirectiveID.getLoc(), true); else if (IDVal == ".ltorg" || IDVal == ".pool") - return parseDirectiveLtorg(DirectiveID.getLoc()); + parseDirectiveLtorg(DirectiveID.getLoc()); else if (IDVal == ".even") - return parseDirectiveEven(DirectiveID.getLoc()); + parseDirectiveEven(DirectiveID.getLoc()); else if (IDVal == ".personalityindex") - return parseDirectivePersonalityIndex(DirectiveID.getLoc()); + parseDirectivePersonalityIndex(DirectiveID.getLoc()); else if (IDVal == ".unwind_raw") - return parseDirectiveUnwindRaw(DirectiveID.getLoc()); + parseDirectiveUnwindRaw(DirectiveID.getLoc()); else if (IDVal == ".movsp") - return parseDirectiveMovSP(DirectiveID.getLoc()); + parseDirectiveMovSP(DirectiveID.getLoc()); else if (IDVal == ".arch_extension") - return parseDirectiveArchExtension(DirectiveID.getLoc()); + parseDirectiveArchExtension(DirectiveID.getLoc()); else if (IDVal == ".align") - return parseDirectiveAlign(DirectiveID.getLoc()); + return parseDirectiveAlign(DirectiveID.getLoc()); // Use Generic on failure. else if (IDVal == ".thumb_set") - return parseDirectiveThumbSet(DirectiveID.getLoc()); - - if (!IsMachO && !IsCOFF) { + parseDirectiveThumbSet(DirectiveID.getLoc()); + else if (!IsMachO && !IsCOFF) { if (IDVal == ".arch") - return parseDirectiveArch(DirectiveID.getLoc()); + parseDirectiveArch(DirectiveID.getLoc()); else if (IDVal == ".cpu") - return parseDirectiveCPU(DirectiveID.getLoc()); + parseDirectiveCPU(DirectiveID.getLoc()); else if (IDVal == ".eabi_attribute") - return parseDirectiveEabiAttr(DirectiveID.getLoc()); + parseDirectiveEabiAttr(DirectiveID.getLoc()); else if (IDVal == ".fpu") - return parseDirectiveFPU(DirectiveID.getLoc()); + parseDirectiveFPU(DirectiveID.getLoc()); else if (IDVal == ".fnstart") - return parseDirectiveFnStart(DirectiveID.getLoc()); + parseDirectiveFnStart(DirectiveID.getLoc()); else if (IDVal == ".inst") - return parseDirectiveInst(DirectiveID.getLoc()); + parseDirectiveInst(DirectiveID.getLoc()); else if (IDVal == ".inst.n") - return parseDirectiveInst(DirectiveID.getLoc(), 'n'); + parseDirectiveInst(DirectiveID.getLoc(), 'n'); else if (IDVal == ".inst.w") - return parseDirectiveInst(DirectiveID.getLoc(), 'w'); + parseDirectiveInst(DirectiveID.getLoc(), 'w'); else if (IDVal == ".object_arch") - return parseDirectiveObjectArch(DirectiveID.getLoc()); + parseDirectiveObjectArch(DirectiveID.getLoc()); else if (IDVal == ".tlsdescseq") - return parseDirectiveTLSDescSeq(DirectiveID.getLoc()); - } - - return true; + parseDirectiveTLSDescSeq(DirectiveID.getLoc()); + else + return true; + } else + return true; + return false; } /// parseLiteralValues @@ -9037,47 +9317,22 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { /// ::= .short expression [, expression]* /// ::= .word expression [, expression]* bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) { - MCAsmParser &Parser = getParser(); - if (getLexer().isNot(AsmToken::EndOfStatement)) { - for (;;) { - const MCExpr *Value; - if (getParser().parseExpression(Value)) { - Parser.eatToEndOfStatement(); - return false; - } - - getParser().getStreamer().EmitValue(Value, Size, L); - - if (getLexer().is(AsmToken::EndOfStatement)) - break; - - // FIXME: Improve diagnostic. - if (getLexer().isNot(AsmToken::Comma)) { - Error(L, "unexpected token in directive"); - return false; - } - Parser.Lex(); - } - } - - Parser.Lex(); - return false; + auto parseOne = [&]() -> bool { + const MCExpr *Value; + if (getParser().parseExpression(Value)) + return true; + getParser().getStreamer().EmitValue(Value, Size, L); + return false; + }; + return (parseMany(parseOne)); } /// parseDirectiveThumb /// ::= .thumb bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { - MCAsmParser &Parser = getParser(); - if (getLexer().isNot(AsmToken::EndOfStatement)) { - Error(L, "unexpected token in directive"); - return false; - } - Parser.Lex(); - - if (!hasThumb()) { - Error(L, "target does not support Thumb mode"); - return false; - } + if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive") || + check(!hasThumb(), L, "target does not support Thumb mode")) + return true; if (!isThumb()) SwitchMode(); @@ -9089,26 +9344,20 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { /// parseDirectiveARM /// ::= .arm bool ARMAsmParser::parseDirectiveARM(SMLoc L) { - MCAsmParser &Parser = getParser(); - if (getLexer().isNot(AsmToken::EndOfStatement)) { - Error(L, "unexpected token in directive"); - return false; - } - Parser.Lex(); - - if (!hasARM()) { - Error(L, "target does not support ARM mode"); - return false; - } + if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive") || + check(!hasARM(), L, "target does not support ARM mode")) + return true; if (isThumb()) SwitchMode(); - getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); return false; } void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) { + // We need to flush the current implicit IT block on a label, because it is + // not legal to branch into an IT block. + flushPendingInstructions(getStreamer()); if (NextSymbolIsThumb) { getParser().getStreamer().EmitThumbFunc(Symbol); NextSymbolIsThumb = false; @@ -9124,27 +9373,24 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { // Darwin asm has (optionally) function name after .thumb_func direction // ELF doesn't - if (IsMachO) { - const AsmToken &Tok = Parser.getTok(); - if (Tok.isNot(AsmToken::EndOfStatement)) { - if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) { - Error(L, "unexpected token in .thumb_func directive"); - return false; - } - MCSymbol *Func = - getParser().getContext().getOrCreateSymbol(Tok.getIdentifier()); + if (IsMachO) { + if (Parser.getTok().is(AsmToken::Identifier) || + Parser.getTok().is(AsmToken::String)) { + MCSymbol *Func = getParser().getContext().getOrCreateSymbol( + Parser.getTok().getIdentifier()); getParser().getStreamer().EmitThumbFunc(Func); - Parser.Lex(); // Consume the identifier token. + Parser.Lex(); + if (parseToken(AsmToken::EndOfStatement, + "unexpected token in '.thumb_func' directive")) + return true; return false; } } - if (getLexer().isNot(AsmToken::EndOfStatement)) { - Error(Parser.getTok().getLoc(), "unexpected token in directive"); - Parser.eatToEndOfStatement(); - return false; - } + if (parseToken(AsmToken::EndOfStatement, + "unexpected token in '.thumb_func' directive")) + return true; NextSymbolIsThumb = true; return false; @@ -9161,21 +9407,13 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) { } StringRef Mode = Tok.getString(); - if (Mode == "unified" || Mode == "UNIFIED") { - Parser.Lex(); - } else if (Mode == "divided" || Mode == "DIVIDED") { - Error(L, "'.syntax divided' arm asssembly not supported"); - return false; - } else { - Error(L, "unrecognized syntax mode in .syntax directive"); - return false; - } - - if (getLexer().isNot(AsmToken::EndOfStatement)) { - Error(Parser.getTok().getLoc(), "unexpected token in directive"); - return false; - } Parser.Lex(); + if (check(Mode == "divided" || Mode == "DIVIDED", L, + "'.syntax divided' arm assembly not supported") || + check(Mode != "unified" && Mode != "UNIFIED", L, + "unrecognized syntax mode in .syntax directive") || + parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + return true; // TODO tell the MC streamer the mode // getParser().getStreamer().Emit???(); @@ -9187,10 +9425,8 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) { bool ARMAsmParser::parseDirectiveCode(SMLoc L) { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); - if (Tok.isNot(AsmToken::Integer)) { - Error(L, "unexpected token in .code directive"); - return false; - } + if (Tok.isNot(AsmToken::Integer)) + return Error(L, "unexpected token in .code directive"); int64_t Val = Parser.getTok().getIntVal(); if (Val != 16 && Val != 32) { Error(L, "invalid operand to .code directive"); @@ -9198,26 +9434,19 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) { } Parser.Lex(); - if (getLexer().isNot(AsmToken::EndOfStatement)) { - Error(Parser.getTok().getLoc(), "unexpected token in directive"); - return false; - } - Parser.Lex(); + if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + return true; if (Val == 16) { - if (!hasThumb()) { - Error(L, "target does not support Thumb mode"); - return false; - } + if (!hasThumb()) + return Error(L, "target does not support Thumb mode"); if (!isThumb()) SwitchMode(); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); } else { - if (!hasARM()) { - Error(L, "target does not support ARM mode"); - return false; - } + if (!hasARM()) + return Error(L, "target does not support ARM mode"); if (isThumb()) SwitchMode(); @@ -9234,25 +9463,15 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { Parser.Lex(); // Eat the '.req' token. unsigned Reg; SMLoc SRegLoc, ERegLoc; - if (ParseRegister(Reg, SRegLoc, ERegLoc)) { - Parser.eatToEndOfStatement(); - Error(SRegLoc, "register name expected"); - return false; - } - - // Shouldn't be anything else. - if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { - Parser.eatToEndOfStatement(); - Error(Parser.getTok().getLoc(), "unexpected input in .req directive."); - return false; - } - - Parser.Lex(); // Consume the EndOfStatement + if (check(ParseRegister(Reg, SRegLoc, ERegLoc), SRegLoc, + "register name expected") || + parseToken(AsmToken::EndOfStatement, + "unexpected input in .req directive.")) + return true; - if (RegisterReqs.insert(std::make_pair(Name, Reg)).first->second != Reg) { - Error(SRegLoc, "redefinition of '" + Name + "' does not match original."); - return false; - } + if (RegisterReqs.insert(std::make_pair(Name, Reg)).first->second != Reg) + return Error(SRegLoc, + "redefinition of '" + Name + "' does not match original."); return false; } @@ -9261,13 +9480,13 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { /// ::= .unreq registername bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { MCAsmParser &Parser = getParser(); - if (Parser.getTok().isNot(AsmToken::Identifier)) { - Parser.eatToEndOfStatement(); - Error(L, "unexpected input in .unreq directive."); - return false; - } + if (Parser.getTok().isNot(AsmToken::Identifier)) + return Error(L, "unexpected input in .unreq directive."); RegisterReqs.erase(Parser.getTok().getIdentifier().lower()); Parser.Lex(); // Eat the identifier. + if (parseToken(AsmToken::EndOfStatement, + "unexpected input in '.unreq' directive")) + return true; return false; } @@ -9300,13 +9519,10 @@ void ARMAsmParser::FixModeAfterArchChange(bool WasThumb, SMLoc Loc) { /// ::= .arch token bool ARMAsmParser::parseDirectiveArch(SMLoc L) { StringRef Arch = getParser().parseStringToEndOfStatement().trim(); - unsigned ID = ARM::parseArch(Arch); - if (ID == ARM::AK_INVALID) { - Error(L, "Unknown arch name"); - return false; - } + if (ID == ARM::AK_INVALID) + return Error(L, "Unknown arch name"); bool WasThumb = isThumb(); Triple T; @@ -9332,7 +9548,6 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { Tag = ARMBuildAttrs::AttrTypeFromString(Name); if (Tag == -1) { Error(TagLoc, "attribute name not recognised: " + Name); - Parser.eatToEndOfStatement(); return false; } Parser.Lex(); @@ -9340,27 +9555,18 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { const MCExpr *AttrExpr; TagLoc = Parser.getTok().getLoc(); - if (Parser.parseExpression(AttrExpr)) { - Parser.eatToEndOfStatement(); - return false; - } + if (Parser.parseExpression(AttrExpr)) + return true; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(AttrExpr); - if (!CE) { - Error(TagLoc, "expected numeric constant"); - Parser.eatToEndOfStatement(); - return false; - } + if (check(!CE, TagLoc, "expected numeric constant")) + return true; Tag = CE->getValue(); } - if (Parser.getTok().isNot(AsmToken::Comma)) { - Error(Parser.getTok().getLoc(), "comma expected"); - Parser.eatToEndOfStatement(); - return false; - } - Parser.Lex(); // skip comma + if (Parser.parseToken(AsmToken::Comma, "comma expected")) + return true; StringRef StringValue = ""; bool IsStringValue = false; @@ -9383,44 +9589,32 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { if (IsIntegerValue) { const MCExpr *ValueExpr; SMLoc ValueExprLoc = Parser.getTok().getLoc(); - if (Parser.parseExpression(ValueExpr)) { - Parser.eatToEndOfStatement(); - return false; - } + if (Parser.parseExpression(ValueExpr)) + return true; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ValueExpr); - if (!CE) { - Error(ValueExprLoc, "expected numeric constant"); - Parser.eatToEndOfStatement(); - return false; - } - + if (!CE) + return Error(ValueExprLoc, "expected numeric constant"); IntegerValue = CE->getValue(); } if (Tag == ARMBuildAttrs::compatibility) { - if (Parser.getTok().isNot(AsmToken::Comma)) - IsStringValue = false; - if (Parser.getTok().isNot(AsmToken::Comma)) { - Error(Parser.getTok().getLoc(), "comma expected"); - Parser.eatToEndOfStatement(); - return false; - } else { - Parser.Lex(); - } + if (Parser.parseToken(AsmToken::Comma, "comma expected")) + return true; } if (IsStringValue) { - if (Parser.getTok().isNot(AsmToken::String)) { - Error(Parser.getTok().getLoc(), "bad string constant"); - Parser.eatToEndOfStatement(); - return false; - } + if (Parser.getTok().isNot(AsmToken::String)) + return Error(Parser.getTok().getLoc(), "bad string constant"); StringValue = Parser.getTok().getStringContents(); Parser.Lex(); } + if (Parser.parseToken(AsmToken::EndOfStatement, + "unexpected token in '.eabi_attribute' directive")) + return true; + if (IsIntegerValue && IsStringValue) { assert(Tag == ARMBuildAttrs::compatibility); getTargetStreamer().emitIntTextAttribute(Tag, IntegerValue, StringValue); @@ -9439,10 +9633,8 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { // FIXME: This is using table-gen data, but should be moved to // ARMTargetParser once that is table-gen'd. - if (!getSTI().isCPUStringValid(CPU)) { - Error(L, "Unknown CPU name"); - return false; - } + if (!getSTI().isCPUStringValid(CPU)) + return Error(L, "Unknown CPU name"); bool WasThumb = isThumb(); MCSubtargetInfo &STI = copySTI(); @@ -9459,11 +9651,9 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { StringRef FPU = getParser().parseStringToEndOfStatement().trim(); unsigned ID = ARM::parseFPU(FPU); - std::vector<const char *> Features; - if (!ARM::getFPUFeatures(ID, Features)) { - Error(FPUNameLoc, "Unknown FPU name"); - return false; - } + std::vector<StringRef> Features; + if (!ARM::getFPUFeatures(ID, Features)) + return Error(FPUNameLoc, "Unknown FPU name"); MCSubtargetInfo &STI = copySTI(); for (auto Feature : Features) @@ -9477,10 +9667,14 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { /// parseDirectiveFnStart /// ::= .fnstart bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) { + if (parseToken(AsmToken::EndOfStatement, + "unexpected token in '.fnstart' directive")) + return true; + if (UC.hasFnStart()) { Error(L, ".fnstart starts before the end of previous one"); UC.emitFnStartLocNotes(); - return false; + return true; } // Reset the unwind directives parser state @@ -9495,11 +9689,12 @@ bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) { /// parseDirectiveFnEnd /// ::= .fnend bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) { + if (parseToken(AsmToken::EndOfStatement, + "unexpected token in '.fnend' directive")) + return true; // Check the ordering of unwind directives - if (!UC.hasFnStart()) { - Error(L, ".fnstart must precede .fnend directive"); - return false; - } + if (!UC.hasFnStart()) + return Error(L, ".fnstart must precede .fnend directive"); // Reset the unwind directives parser state getTargetStreamer().emitFnEnd(); @@ -9511,22 +9706,24 @@ bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) { /// parseDirectiveCantUnwind /// ::= .cantunwind bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) { - UC.recordCantUnwind(L); + if (parseToken(AsmToken::EndOfStatement, + "unexpected token in '.cantunwind' directive")) + return true; + UC.recordCantUnwind(L); // Check the ordering of unwind directives - if (!UC.hasFnStart()) { - Error(L, ".fnstart must precede .cantunwind directive"); - return false; - } + if (check(!UC.hasFnStart(), L, ".fnstart must precede .cantunwind directive")) + return true; + if (UC.hasHandlerData()) { Error(L, ".cantunwind can't be used with .handlerdata directive"); UC.emitHandlerDataLocNotes(); - return false; + return true; } if (UC.hasPersonality()) { Error(L, ".cantunwind can't be used with .personality directive"); UC.emitPersonalityLocNotes(); - return false; + return true; } getTargetStreamer().emitCantUnwind(); @@ -9539,38 +9736,36 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) { MCAsmParser &Parser = getParser(); bool HasExistingPersonality = UC.hasPersonality(); + // Parse the name of the personality routine + if (Parser.getTok().isNot(AsmToken::Identifier)) + return Error(L, "unexpected input in .personality directive."); + StringRef Name(Parser.getTok().getIdentifier()); + Parser.Lex(); + + if (parseToken(AsmToken::EndOfStatement, + "unexpected token in '.personality' directive")) + return true; + UC.recordPersonality(L); // Check the ordering of unwind directives - if (!UC.hasFnStart()) { - Error(L, ".fnstart must precede .personality directive"); - return false; - } + if (!UC.hasFnStart()) + return Error(L, ".fnstart must precede .personality directive"); if (UC.cantUnwind()) { Error(L, ".personality can't be used with .cantunwind directive"); UC.emitCantUnwindLocNotes(); - return false; + return true; } if (UC.hasHandlerData()) { Error(L, ".personality must precede .handlerdata directive"); UC.emitHandlerDataLocNotes(); - return false; + return true; } if (HasExistingPersonality) { - Parser.eatToEndOfStatement(); Error(L, "multiple personality directives"); UC.emitPersonalityLocNotes(); - return false; - } - - // Parse the name of the personality routine - if (Parser.getTok().isNot(AsmToken::Identifier)) { - Parser.eatToEndOfStatement(); - Error(L, "unexpected input in .personality directive."); - return false; + return true; } - StringRef Name(Parser.getTok().getIdentifier()); - Parser.Lex(); MCSymbol *PR = getParser().getContext().getOrCreateSymbol(Name); getTargetStreamer().emitPersonality(PR); @@ -9580,17 +9775,18 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) { /// parseDirectiveHandlerData /// ::= .handlerdata bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) { - UC.recordHandlerData(L); + if (parseToken(AsmToken::EndOfStatement, + "unexpected token in '.handlerdata' directive")) + return true; + UC.recordHandlerData(L); // Check the ordering of unwind directives - if (!UC.hasFnStart()) { - Error(L, ".fnstart must precede .personality directive"); - return false; - } + if (!UC.hasFnStart()) + return Error(L, ".fnstart must precede .personality directive"); if (UC.cantUnwind()) { Error(L, ".handlerdata can't be used with .cantunwind directive"); UC.emitCantUnwindLocNotes(); - return false; + return true; } getTargetStreamer().emitHandlerData(); @@ -9602,74 +9798,52 @@ bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) { bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { MCAsmParser &Parser = getParser(); // Check the ordering of unwind directives - if (!UC.hasFnStart()) { - Error(L, ".fnstart must precede .setfp directive"); - return false; - } - if (UC.hasHandlerData()) { - Error(L, ".setfp must precede .handlerdata directive"); - return false; - } + if (check(!UC.hasFnStart(), L, ".fnstart must precede .setfp directive") || + check(UC.hasHandlerData(), L, + ".setfp must precede .handlerdata directive")) + return true; // Parse fpreg SMLoc FPRegLoc = Parser.getTok().getLoc(); int FPReg = tryParseRegister(); - if (FPReg == -1) { - Error(FPRegLoc, "frame pointer register expected"); - return false; - } - // Consume comma - if (Parser.getTok().isNot(AsmToken::Comma)) { - Error(Parser.getTok().getLoc(), "comma expected"); - return false; - } - Parser.Lex(); // skip comma + if (check(FPReg == -1, FPRegLoc, "frame pointer register expected") || + Parser.parseToken(AsmToken::Comma, "comma expected")) + return true; // Parse spreg SMLoc SPRegLoc = Parser.getTok().getLoc(); int SPReg = tryParseRegister(); - if (SPReg == -1) { - Error(SPRegLoc, "stack pointer register expected"); - return false; - } - - if (SPReg != ARM::SP && SPReg != UC.getFPReg()) { - Error(SPRegLoc, "register should be either $sp or the latest fp register"); - return false; - } + if (check(SPReg == -1, SPRegLoc, "stack pointer register expected") || + check(SPReg != ARM::SP && SPReg != UC.getFPReg(), SPRegLoc, + "register should be either $sp or the latest fp register")) + return true; // Update the frame pointer register UC.saveFPReg(FPReg); // Parse offset int64_t Offset = 0; - if (Parser.getTok().is(AsmToken::Comma)) { - Parser.Lex(); // skip comma - + if (Parser.parseOptionalToken(AsmToken::Comma)) { if (Parser.getTok().isNot(AsmToken::Hash) && - Parser.getTok().isNot(AsmToken::Dollar)) { - Error(Parser.getTok().getLoc(), "'#' expected"); - return false; - } + Parser.getTok().isNot(AsmToken::Dollar)) + return Error(Parser.getTok().getLoc(), "'#' expected"); Parser.Lex(); // skip hash token. const MCExpr *OffsetExpr; SMLoc ExLoc = Parser.getTok().getLoc(); SMLoc EndLoc; - if (getParser().parseExpression(OffsetExpr, EndLoc)) { - Error(ExLoc, "malformed setfp offset"); - return false; - } + if (getParser().parseExpression(OffsetExpr, EndLoc)) + return Error(ExLoc, "malformed setfp offset"); const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr); - if (!CE) { - Error(ExLoc, "setfp offset must be an immediate"); - return false; - } - + if (check(!CE, ExLoc, "setfp offset must be an immediate")) + return true; Offset = CE->getValue(); } + if (Parser.parseToken(AsmToken::EndOfStatement)) + return true; + getTargetStreamer().emitSetFP(static_cast<unsigned>(FPReg), static_cast<unsigned>(SPReg), Offset); return false; @@ -9680,35 +9854,29 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { bool ARMAsmParser::parseDirectivePad(SMLoc L) { MCAsmParser &Parser = getParser(); // Check the ordering of unwind directives - if (!UC.hasFnStart()) { - Error(L, ".fnstart must precede .pad directive"); - return false; - } - if (UC.hasHandlerData()) { - Error(L, ".pad must precede .handlerdata directive"); - return false; - } + if (!UC.hasFnStart()) + return Error(L, ".fnstart must precede .pad directive"); + if (UC.hasHandlerData()) + return Error(L, ".pad must precede .handlerdata directive"); // Parse the offset if (Parser.getTok().isNot(AsmToken::Hash) && - Parser.getTok().isNot(AsmToken::Dollar)) { - Error(Parser.getTok().getLoc(), "'#' expected"); - return false; - } + Parser.getTok().isNot(AsmToken::Dollar)) + return Error(Parser.getTok().getLoc(), "'#' expected"); Parser.Lex(); // skip hash token. const MCExpr *OffsetExpr; SMLoc ExLoc = Parser.getTok().getLoc(); SMLoc EndLoc; - if (getParser().parseExpression(OffsetExpr, EndLoc)) { - Error(ExLoc, "malformed pad offset"); - return false; - } + if (getParser().parseExpression(OffsetExpr, EndLoc)) + return Error(ExLoc, "malformed pad offset"); const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr); - if (!CE) { - Error(ExLoc, "pad offset must be an immediate"); - return false; - } + if (!CE) + return Error(ExLoc, "pad offset must be an immediate"); + + if (parseToken(AsmToken::EndOfStatement, + "unexpected token in '.pad' directive")) + return true; getTargetStreamer().emitPad(CE->getValue()); return false; @@ -9719,30 +9887,23 @@ bool ARMAsmParser::parseDirectivePad(SMLoc L) { /// ::= .vsave { registers } bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { // Check the ordering of unwind directives - if (!UC.hasFnStart()) { - Error(L, ".fnstart must precede .save or .vsave directives"); - return false; - } - if (UC.hasHandlerData()) { - Error(L, ".save or .vsave must precede .handlerdata directive"); - return false; - } + if (!UC.hasFnStart()) + return Error(L, ".fnstart must precede .save or .vsave directives"); + if (UC.hasHandlerData()) + return Error(L, ".save or .vsave must precede .handlerdata directive"); // RAII object to make sure parsed operands are deleted. SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands; // Parse the register list - if (parseRegisterList(Operands)) - return false; + if (parseRegisterList(Operands) || + parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + return true; ARMOperand &Op = (ARMOperand &)*Operands[0]; - if (!IsVector && !Op.isRegList()) { - Error(L, ".save expects GPR registers"); - return false; - } - if (IsVector && !Op.isDPRRegList()) { - Error(L, ".vsave expects DPR registers"); - return false; - } + if (!IsVector && !Op.isRegList()) + return Error(L, ".save expects GPR registers"); + if (IsVector && !Op.isDPRRegList()) + return Error(L, ".vsave expects DPR registers"); getTargetStreamer().emitRegSave(Op.getRegList(), IsVector); return false; @@ -9753,8 +9914,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { /// ::= .inst.n opcode [, ...] /// ::= .inst.w opcode [, ...] bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) { - MCAsmParser &Parser = getParser(); - int Width; + int Width = 4; if (isThumb()) { switch (Suffix) { @@ -9762,96 +9922,68 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) { Width = 2; break; case 'w': - Width = 4; break; default: - Parser.eatToEndOfStatement(); - Error(Loc, "cannot determine Thumb instruction size, " - "use inst.n/inst.w instead"); - return false; + return Error(Loc, "cannot determine Thumb instruction size, " + "use inst.n/inst.w instead"); } } else { - if (Suffix) { - Parser.eatToEndOfStatement(); - Error(Loc, "width suffixes are invalid in ARM mode"); - return false; - } - Width = 4; - } - - if (getLexer().is(AsmToken::EndOfStatement)) { - Parser.eatToEndOfStatement(); - Error(Loc, "expected expression following directive"); - return false; + if (Suffix) + return Error(Loc, "width suffixes are invalid in ARM mode"); } - for (;;) { + auto parseOne = [&]() -> bool { const MCExpr *Expr; - - if (getParser().parseExpression(Expr)) { - Error(Loc, "expected expression"); - return false; - } - + if (getParser().parseExpression(Expr)) + return true; const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr); if (!Value) { - Error(Loc, "expected constant expression"); - return false; + return Error(Loc, "expected constant expression"); } switch (Width) { case 2: - if (Value->getValue() > 0xffff) { - Error(Loc, "inst.n operand is too big, use inst.w instead"); - return false; - } + if (Value->getValue() > 0xffff) + return Error(Loc, "inst.n operand is too big, use inst.w instead"); break; case 4: - if (Value->getValue() > 0xffffffff) { - Error(Loc, - StringRef(Suffix ? "inst.w" : "inst") + " operand is too big"); - return false; - } + if (Value->getValue() > 0xffffffff) + return Error(Loc, StringRef(Suffix ? "inst.w" : "inst") + + " operand is too big"); break; default: llvm_unreachable("only supported widths are 2 and 4"); } getTargetStreamer().emitInst(Value->getValue(), Suffix); + return false; + }; - if (getLexer().is(AsmToken::EndOfStatement)) - break; - - if (getLexer().isNot(AsmToken::Comma)) { - Error(Loc, "unexpected token in directive"); - return false; - } - - Parser.Lex(); - } - - Parser.Lex(); + if (parseOptionalToken(AsmToken::EndOfStatement)) + return Error(Loc, "expected expression following directive"); + if (parseMany(parseOne)) + return true; return false; } /// parseDirectiveLtorg /// ::= .ltorg | .pool bool ARMAsmParser::parseDirectiveLtorg(SMLoc L) { + if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + return true; getTargetStreamer().emitCurrentConstantPool(); return false; } bool ARMAsmParser::parseDirectiveEven(SMLoc L) { - const MCSection *Section = getStreamer().getCurrentSection().first; + const MCSection *Section = getStreamer().getCurrentSectionOnly(); - if (getLexer().isNot(AsmToken::EndOfStatement)) { - TokError("unexpected token in directive"); - return false; - } + if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) + return true; if (!Section) { getStreamer().InitSections(false); - Section = getStreamer().getCurrentSection().first; + Section = getStreamer().getCurrentSectionOnly(); } assert(Section && "must have section to emit alignment"); @@ -9869,51 +10001,41 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) { MCAsmParser &Parser = getParser(); bool HasExistingPersonality = UC.hasPersonality(); + const MCExpr *IndexExpression; + SMLoc IndexLoc = Parser.getTok().getLoc(); + if (Parser.parseExpression(IndexExpression) || + parseToken(AsmToken::EndOfStatement, + "unexpected token in '.personalityindex' directive")) { + return true; + } + UC.recordPersonalityIndex(L); if (!UC.hasFnStart()) { - Parser.eatToEndOfStatement(); - Error(L, ".fnstart must precede .personalityindex directive"); - return false; + return Error(L, ".fnstart must precede .personalityindex directive"); } if (UC.cantUnwind()) { - Parser.eatToEndOfStatement(); Error(L, ".personalityindex cannot be used with .cantunwind"); UC.emitCantUnwindLocNotes(); - return false; + return true; } if (UC.hasHandlerData()) { - Parser.eatToEndOfStatement(); Error(L, ".personalityindex must precede .handlerdata directive"); UC.emitHandlerDataLocNotes(); - return false; + return true; } if (HasExistingPersonality) { - Parser.eatToEndOfStatement(); Error(L, "multiple personality directives"); UC.emitPersonalityLocNotes(); - return false; - } - - const MCExpr *IndexExpression; - SMLoc IndexLoc = Parser.getTok().getLoc(); - if (Parser.parseExpression(IndexExpression)) { - Parser.eatToEndOfStatement(); - return false; + return true; } const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(IndexExpression); - if (!CE) { - Parser.eatToEndOfStatement(); - Error(IndexLoc, "index must be a constant number"); - return false; - } - if (CE->getValue() < 0 || - CE->getValue() >= ARM::EHABI::NUM_PERSONALITY_INDEX) { - Parser.eatToEndOfStatement(); - Error(IndexLoc, "personality routine index should be in range [0-3]"); - return false; - } + if (!CE) + return Error(IndexLoc, "index must be a constant number"); + if (CE->getValue() < 0 || CE->getValue() >= ARM::EHABI::NUM_PERSONALITY_INDEX) + return Error(IndexLoc, + "personality routine index should be in range [0-3]"); getTargetStreamer().emitPersonalityIndex(CE->getValue()); return false; @@ -9923,81 +10045,51 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) { /// ::= .unwind_raw offset, opcode [, opcode...] bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) { MCAsmParser &Parser = getParser(); - if (!UC.hasFnStart()) { - Parser.eatToEndOfStatement(); - Error(L, ".fnstart must precede .unwind_raw directives"); - return false; - } - int64_t StackOffset; - const MCExpr *OffsetExpr; SMLoc OffsetLoc = getLexer().getLoc(); - if (getLexer().is(AsmToken::EndOfStatement) || - getParser().parseExpression(OffsetExpr)) { - Error(OffsetLoc, "expected expression"); - Parser.eatToEndOfStatement(); - return false; - } + + if (!UC.hasFnStart()) + return Error(L, ".fnstart must precede .unwind_raw directives"); + if (getParser().parseExpression(OffsetExpr)) + return Error(OffsetLoc, "expected expression"); const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr); - if (!CE) { - Error(OffsetLoc, "offset must be a constant"); - Parser.eatToEndOfStatement(); - return false; - } + if (!CE) + return Error(OffsetLoc, "offset must be a constant"); StackOffset = CE->getValue(); - if (getLexer().isNot(AsmToken::Comma)) { - Error(getLexer().getLoc(), "expected comma"); - Parser.eatToEndOfStatement(); - return false; - } - Parser.Lex(); + if (Parser.parseToken(AsmToken::Comma, "expected comma")) + return true; SmallVector<uint8_t, 16> Opcodes; - for (;;) { - const MCExpr *OE; + auto parseOne = [&]() -> bool { + const MCExpr *OE; SMLoc OpcodeLoc = getLexer().getLoc(); - if (getLexer().is(AsmToken::EndOfStatement) || Parser.parseExpression(OE)) { - Error(OpcodeLoc, "expected opcode expression"); - Parser.eatToEndOfStatement(); - return false; - } - + if (check(getLexer().is(AsmToken::EndOfStatement) || + Parser.parseExpression(OE), + OpcodeLoc, "expected opcode expression")) + return true; const MCConstantExpr *OC = dyn_cast<MCConstantExpr>(OE); - if (!OC) { - Error(OpcodeLoc, "opcode value must be a constant"); - Parser.eatToEndOfStatement(); - return false; - } - + if (!OC) + return Error(OpcodeLoc, "opcode value must be a constant"); const int64_t Opcode = OC->getValue(); - if (Opcode & ~0xff) { - Error(OpcodeLoc, "invalid opcode"); - Parser.eatToEndOfStatement(); - return false; - } - + if (Opcode & ~0xff) + return Error(OpcodeLoc, "invalid opcode"); Opcodes.push_back(uint8_t(Opcode)); + return false; + }; - if (getLexer().is(AsmToken::EndOfStatement)) - break; - - if (getLexer().isNot(AsmToken::Comma)) { - Error(getLexer().getLoc(), "unexpected token in directive"); - Parser.eatToEndOfStatement(); - return false; - } - - Parser.Lex(); - } + // Must have at least 1 element + SMLoc OpcodeLoc = getLexer().getLoc(); + if (parseOptionalToken(AsmToken::EndOfStatement)) + return Error(OpcodeLoc, "expected opcode expression"); + if (parseMany(parseOne)) + return true; getTargetStreamer().emitUnwindRaw(StackOffset, Opcodes); - - Parser.Lex(); return false; } @@ -10006,22 +10098,17 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) { bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) { MCAsmParser &Parser = getParser(); - if (getLexer().isNot(AsmToken::Identifier)) { - TokError("expected variable after '.tlsdescseq' directive"); - Parser.eatToEndOfStatement(); - return false; - } + if (getLexer().isNot(AsmToken::Identifier)) + return TokError("expected variable after '.tlsdescseq' directive"); const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Parser.getTok().getIdentifier(), MCSymbolRefExpr::VK_ARM_TLSDESCSEQ, getContext()); Lex(); - if (getLexer().isNot(AsmToken::EndOfStatement)) { - Error(Parser.getTok().getLoc(), "unexpected token"); - Parser.eatToEndOfStatement(); - return false; - } + if (parseToken(AsmToken::EndOfStatement, + "unexpected token in '.tlsdescseq' directive")) + return true; getTargetStreamer().AnnotateTLSDescriptorSequence(SRE); return false; @@ -10031,60 +10118,40 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) { /// ::= .movsp reg [, #offset] bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) { MCAsmParser &Parser = getParser(); - if (!UC.hasFnStart()) { - Parser.eatToEndOfStatement(); - Error(L, ".fnstart must precede .movsp directives"); - return false; - } - if (UC.getFPReg() != ARM::SP) { - Parser.eatToEndOfStatement(); - Error(L, "unexpected .movsp directive"); - return false; - } + if (!UC.hasFnStart()) + return Error(L, ".fnstart must precede .movsp directives"); + if (UC.getFPReg() != ARM::SP) + return Error(L, "unexpected .movsp directive"); SMLoc SPRegLoc = Parser.getTok().getLoc(); int SPReg = tryParseRegister(); - if (SPReg == -1) { - Parser.eatToEndOfStatement(); - Error(SPRegLoc, "register expected"); - return false; - } - - if (SPReg == ARM::SP || SPReg == ARM::PC) { - Parser.eatToEndOfStatement(); - Error(SPRegLoc, "sp and pc are not permitted in .movsp directive"); - return false; - } + if (SPReg == -1) + return Error(SPRegLoc, "register expected"); + if (SPReg == ARM::SP || SPReg == ARM::PC) + return Error(SPRegLoc, "sp and pc are not permitted in .movsp directive"); int64_t Offset = 0; - if (Parser.getTok().is(AsmToken::Comma)) { - Parser.Lex(); - - if (Parser.getTok().isNot(AsmToken::Hash)) { - Error(Parser.getTok().getLoc(), "expected #constant"); - Parser.eatToEndOfStatement(); - return false; - } - Parser.Lex(); + if (Parser.parseOptionalToken(AsmToken::Comma)) { + if (Parser.parseToken(AsmToken::Hash, "expected #constant")) + return true; const MCExpr *OffsetExpr; SMLoc OffsetLoc = Parser.getTok().getLoc(); - if (Parser.parseExpression(OffsetExpr)) { - Parser.eatToEndOfStatement(); - Error(OffsetLoc, "malformed offset expression"); - return false; - } + + if (Parser.parseExpression(OffsetExpr)) + return Error(OffsetLoc, "malformed offset expression"); const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr); - if (!CE) { - Parser.eatToEndOfStatement(); - Error(OffsetLoc, "offset must be an immediate constant"); - return false; - } + if (!CE) + return Error(OffsetLoc, "offset must be an immediate constant"); Offset = CE->getValue(); } + if (parseToken(AsmToken::EndOfStatement, + "unexpected token in '.movsp' directive")) + return true; + getTargetStreamer().emitMovSP(SPReg, Offset); UC.saveFPReg(SPReg); @@ -10095,11 +10162,8 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) { /// ::= .object_arch name bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) { MCAsmParser &Parser = getParser(); - if (getLexer().isNot(AsmToken::Identifier)) { - Error(getLexer().getLoc(), "unexpected token"); - Parser.eatToEndOfStatement(); - return false; - } + if (getLexer().isNot(AsmToken::Identifier)) + return Error(getLexer().getLoc(), "unexpected token"); StringRef Arch = Parser.getTok().getString(); SMLoc ArchLoc = Parser.getTok().getLoc(); @@ -10107,19 +10171,12 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) { unsigned ID = ARM::parseArch(Arch); - if (ID == ARM::AK_INVALID) { - Error(ArchLoc, "unknown architecture '" + Arch + "'"); - Parser.eatToEndOfStatement(); - return false; - } + if (ID == ARM::AK_INVALID) + return Error(ArchLoc, "unknown architecture '" + Arch + "'"); + if (parseToken(AsmToken::EndOfStatement)) + return true; getTargetStreamer().emitObjectArch(ID); - - if (getLexer().isNot(AsmToken::EndOfStatement)) { - Error(getLexer().getLoc(), "unexpected token"); - Parser.eatToEndOfStatement(); - } - return false; } @@ -10128,18 +10185,17 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) { bool ARMAsmParser::parseDirectiveAlign(SMLoc L) { // NOTE: if this is not the end of the statement, fall back to the target // agnostic handling for this directive which will correctly handle this. - if (getLexer().isNot(AsmToken::EndOfStatement)) - return true; - - // '.align' is target specifically handled to mean 2**2 byte alignment. - const MCSection *Section = getStreamer().getCurrentSection().first; - assert(Section && "must have section to emit alignment"); - if (Section->UseCodeAlign()) - getStreamer().EmitCodeAlignment(4, 0); - else - getStreamer().EmitValueToAlignment(4, 0, 1, 0); - - return false; + if (parseOptionalToken(AsmToken::EndOfStatement)) { + // '.align' is target specifically handled to mean 2**2 byte alignment. + const MCSection *Section = getStreamer().getCurrentSectionOnly(); + assert(Section && "must have section to emit alignment"); + if (Section->UseCodeAlign()) + getStreamer().EmitCodeAlignment(4, 0); + else + getStreamer().EmitValueToAlignment(4, 0, 1, 0); + return false; + } + return true; } /// parseDirectiveThumbSet @@ -10148,18 +10204,10 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) { MCAsmParser &Parser = getParser(); StringRef Name; - if (Parser.parseIdentifier(Name)) { - TokError("expected identifier after '.thumb_set'"); - Parser.eatToEndOfStatement(); - return false; - } - - if (getLexer().isNot(AsmToken::Comma)) { - TokError("expected comma after name '" + Name + "'"); - Parser.eatToEndOfStatement(); - return false; - } - Lex(); + if (check(Parser.parseIdentifier(Name), + "expected identifier after '.thumb_set'") || + parseToken(AsmToken::Comma, "expected comma after name '" + Name + "'")) + return true; MCSymbol *Sym; const MCExpr *Value; @@ -10173,10 +10221,10 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) { /// Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { - RegisterMCAsmParser<ARMAsmParser> X(TheARMLETarget); - RegisterMCAsmParser<ARMAsmParser> Y(TheARMBETarget); - RegisterMCAsmParser<ARMAsmParser> A(TheThumbLETarget); - RegisterMCAsmParser<ARMAsmParser> B(TheThumbBETarget); + RegisterMCAsmParser<ARMAsmParser> X(getTheARMLETarget()); + RegisterMCAsmParser<ARMAsmParser> Y(getTheARMBETarget()); + RegisterMCAsmParser<ARMAsmParser> A(getTheThumbLETarget()); + RegisterMCAsmParser<ARMAsmParser> B(getTheThumbBETarget()); } #define GET_REGISTER_MATCHER @@ -10218,16 +10266,17 @@ static const struct { bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { MCAsmParser &Parser = getParser(); - if (getLexer().isNot(AsmToken::Identifier)) { - Error(getLexer().getLoc(), "unexpected token"); - Parser.eatToEndOfStatement(); - return false; - } + if (getLexer().isNot(AsmToken::Identifier)) + return Error(getLexer().getLoc(), "expected architecture extension name"); StringRef Name = Parser.getTok().getString(); SMLoc ExtLoc = Parser.getTok().getLoc(); Lex(); + if (parseToken(AsmToken::EndOfStatement, + "unexpected token in '.arch_extension' directive")) + return true; + bool EnableFeature = true; if (Name.startswith_lower("no")) { EnableFeature = false; @@ -10235,20 +10284,19 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { } unsigned FeatureKind = ARM::parseArchExt(Name); if (FeatureKind == ARM::AEK_INVALID) - Error(ExtLoc, "unknown architectural extension: " + Name); + return Error(ExtLoc, "unknown architectural extension: " + Name); for (const auto &Extension : Extensions) { if (Extension.Kind != FeatureKind) continue; if (Extension.Features.none()) - report_fatal_error("unsupported architectural extension: " + Name); + return Error(ExtLoc, "unsupported architectural extension: " + Name); - if ((getAvailableFeatures() & Extension.ArchCheck) != Extension.ArchCheck) { - Error(ExtLoc, "architectural extension '" + Name + "' is not " - "allowed for the current base architecture"); - return false; - } + if ((getAvailableFeatures() & Extension.ArchCheck) != Extension.ArchCheck) + return Error(ExtLoc, "architectural extension '" + Name + + "' is not " + "allowed for the current base architecture"); MCSubtargetInfo &STI = copySTI(); FeatureBitset ToggleFeatures = EnableFeature @@ -10261,9 +10309,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { return false; } - Error(ExtLoc, "unknown architectural extension: " + Name); - Parser.eatToEndOfStatement(); - return false; + return Error(ExtLoc, "unknown architectural extension: " + Name); } // Define this matcher function after the auto-generated include so we diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 3196a57ccc3e..ac3d8c780af2 100644 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -861,13 +861,13 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, extern "C" void LLVMInitializeARMDisassembler() { - TargetRegistry::RegisterMCDisassembler(TheARMLETarget, + TargetRegistry::RegisterMCDisassembler(getTheARMLETarget(), createARMDisassembler); - TargetRegistry::RegisterMCDisassembler(TheARMBETarget, + TargetRegistry::RegisterMCDisassembler(getTheARMBETarget(), createARMDisassembler); - TargetRegistry::RegisterMCDisassembler(TheThumbLETarget, + TargetRegistry::RegisterMCDisassembler(getTheThumbLETarget(), createThumbDisassembler); - TargetRegistry::RegisterMCDisassembler(TheThumbBETarget, + TargetRegistry::RegisterMCDisassembler(getTheThumbBETarget(), createThumbDisassembler); } @@ -1432,7 +1432,7 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, case ARM::STC_POST: case ARM::STCL_POST: imm |= U << 8; - // fall through. + LLVM_FALLTHROUGH; default: // The 'option' variant doesn't encode 'U' in the immediate since // the immediate is unsigned [0,255]. @@ -2555,6 +2555,7 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, break; } // Fall through to handle the register offset variant. + LLVM_FALLTHROUGH; case ARM::VLD1d8wb_fixed: case ARM::VLD1d16wb_fixed: case ARM::VLD1d32wb_fixed: @@ -4157,7 +4158,7 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, case 0x93: // faultmask_ns if (!(FeatureBits[ARM::HasV8MMainlineOps])) return MCDisassembler::Fail; - // fall through + LLVM_FALLTHROUGH; case 10: // msplim case 11: // psplim case 0x88: // msp_ns @@ -5310,4 +5311,3 @@ static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val, return S; } - diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index e81bb77dbdfc..3667952d44c0 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -726,6 +726,12 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { + assert(std::is_sorted(MI->begin() + OpNum, MI->end(), + [&](const MCOperand &LHS, const MCOperand &RHS) { + return MRI.getEncodingValue(LHS.getReg()) < + MRI.getEncodingValue(RHS.getReg()); + })); + O << "{"; for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) { if (i != OpNum) diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 0fc758201d47..a58d5b34131b 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -375,7 +375,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_movt_hi16: if (!IsPCRel) Value >>= 16; - // Fallthrough + LLVM_FALLTHROUGH; case ARM::fixup_arm_movw_lo16: { unsigned Hi4 = (Value & 0xF000) >> 12; unsigned Lo12 = Value & 0x0FFF; @@ -387,7 +387,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_t2_movt_hi16: if (!IsPCRel) Value >>= 16; - // Fallthrough + LLVM_FALLTHROUGH; case ARM::fixup_t2_movw_lo16: { unsigned Hi4 = (Value & 0xF000) >> 12; unsigned i = (Value & 0x800) >> 11; @@ -403,7 +403,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_ldst_pcrel_12: // ARM PC-relative values are offset by 8. Value -= 4; - // FALLTHROUGH + LLVM_FALLTHROUGH; case ARM::fixup_t2_ldst_pcrel_12: { // Offset by 4, adjusted by two due to the half-word ordering of thumb. Value -= 4; @@ -541,7 +541,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // // Note that the halfwords are stored high first, low second; so we need // to transpose the fixup value here to map properly. - if (Ctx && Value % 4 != 0) { + if (Ctx && Value % 4 != 0) { Ctx->reportError(Fixup.getLoc(), "misaligned ARM call destination"); return 0; } @@ -578,6 +578,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // Offset by 4, and don't encode the low two bits. return ((Value - 4) >> 2) & 0xff; case ARM::fixup_arm_thumb_cb: { + // CB instructions can only branch to offsets in [4, 126] in multiples of 2 + // so ensure that the raw value LSB is zero and it lies in [2, 130]. + // An offset of 2 will be relaxed to a NOP. + if (Ctx && ((int64_t)Value < 2 || Value > 0x82 || Value & 1)) { + Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + return 0; + } // Offset by 4 and don't encode the lower bit, which is always 0. // FIXME: diagnose if no Thumb2 uint32_t Binary = (Value - 4) >> 1; @@ -623,7 +630,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_pcrel_10: Value = Value - 4; // ARM fixups offset by an additional word and don't // need to adjust for the half-word ordering. - // Fall through. + LLVM_FALLTHROUGH; case ARM::fixup_t2_pcrel_10: { // Offset by 4, adjusted by two due to the half-word ordering of thumb. Value = Value - 4; @@ -650,7 +657,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_pcrel_9: Value = Value - 4; // ARM fixups offset by an additional word and don't // need to adjust for the half-word ordering. - // Fall through. + LLVM_FALLTHROUGH; case ARM::fixup_t2_pcrel_9: { // Offset by 4, adjusted by two due to the half-word ordering of thumb. Value = Value - 4; @@ -696,14 +703,16 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, bool &IsResolved) { const MCSymbolRefExpr *A = Target.getSymA(); const MCSymbol *Sym = A ? &A->getSymbol() : nullptr; - // Some fixups to thumb function symbols need the low bit (thumb bit) - // twiddled. - if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 && - (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 && - (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 && - (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 && - (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 && - (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) { + // MachO (the only user of "Value") tries to make .o files that look vaguely + // pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit + // into the addend if possible. Other relocation types don't want this bit + // though (branches couldn't encode it if it *was* present, and no other + // relocations exist) and it can interfere with checking valid expressions. + if ((unsigned)Fixup.getKind() == FK_Data_4 || + (unsigned)Fixup.getKind() == ARM::fixup_arm_movw_lo16 || + (unsigned)Fixup.getKind() == ARM::fixup_arm_movt_hi16 || + (unsigned)Fixup.getKind() == ARM::fixup_t2_movw_lo16 || + (unsigned)Fixup.getKind() == ARM::fixup_t2_movt_hi16) { if (Sym) { if (Asm.isThumbFunc(Sym)) Value |= 1; @@ -1111,6 +1120,7 @@ static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) { MCAsmBackend *llvm::createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TheTriple, StringRef CPU, + const MCTargetOptions &Options, bool isLittle) { switch (TheTriple.getObjectFormat()) { default: @@ -1131,24 +1141,28 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU) { - return createARMAsmBackend(T, MRI, TT, CPU, true); + const Triple &TT, StringRef CPU, + const MCTargetOptions &Options) { + return createARMAsmBackend(T, MRI, TT, CPU, Options, true); } MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU) { - return createARMAsmBackend(T, MRI, TT, CPU, false); + const Triple &TT, StringRef CPU, + const MCTargetOptions &Options) { + return createARMAsmBackend(T, MRI, TT, CPU, Options, false); } MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU) { - return createARMAsmBackend(T, MRI, TT, CPU, true); + const Triple &TT, StringRef CPU, + const MCTargetOptions &Options) { + return createARMAsmBackend(T, MRI, TT, CPU, Options, true); } MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU) { - return createARMAsmBackend(T, MRI, TT, CPU, false); + const Triple &TT, StringRef CPU, + const MCTargetOptions &Options) { + return createARMAsmBackend(T, MRI, TT, CPU, Options, false); } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 4118fe8e8cdb..6f19754b899e 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -140,6 +140,12 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case ARM::fixup_t2_movw_lo16: Type = ELF::R_ARM_THM_MOVW_PREL_NC; break; + case ARM::fixup_arm_thumb_br: + Type = ELF::R_ARM_THM_JUMP11; + break; + case ARM::fixup_arm_thumb_bcc: + Type = ELF::R_ARM_THM_JUMP8; + break; case ARM::fixup_arm_thumb_bl: case ARM::fixup_arm_thumb_blx: switch (Modifier) { @@ -221,6 +227,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_TLSDESC: Type = ELF::R_ARM_TLS_GOTDESC; break; + case MCSymbolRefExpr::VK_TLSLDM: + Type = ELF::R_ARM_TLS_LDM32; + break; case MCSymbolRefExpr::VK_ARM_TLSDESCSEQ: Type = ELF::R_ARM_TLS_DESCSEQ; break; @@ -239,10 +248,26 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, Type = ELF::R_ARM_JUMP24; break; case ARM::fixup_arm_movt_hi16: - Type = ELF::R_ARM_MOVT_ABS; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_MOVT_ABS; + break; + case MCSymbolRefExpr::VK_ARM_SBREL: + Type = ELF:: R_ARM_MOVT_BREL; + break; + } break; case ARM::fixup_arm_movw_lo16: - Type = ELF::R_ARM_MOVW_ABS_NC; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_MOVW_ABS_NC; + break; + case MCSymbolRefExpr::VK_ARM_SBREL: + Type = ELF:: R_ARM_MOVW_BREL_NC; + break; + } break; case ARM::fixup_t2_movt_hi16: Type = ELF::R_ARM_THM_MOVT_ABS; diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 36cb74765f3b..f6bb35d2326b 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -591,7 +591,7 @@ private: void FlushPendingOffset(); void FlushUnwindOpcodes(bool NoHandlerData); - void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags, + void SwitchToEHSection(StringRef Prefix, unsigned Type, unsigned Flags, SectionKind Kind, const MCSymbol &Fn); void SwitchToExTabSection(const MCSymbol &FnStart); void SwitchToExIdxSection(const MCSymbol &FnStart); @@ -1074,7 +1074,7 @@ void ARMELFStreamer::reset() { getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); } -inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, +inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix, unsigned Type, unsigned Flags, SectionKind Kind, diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 53cd29a6061e..1e062ad45af5 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -90,6 +90,7 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() { PrivateGlobalPrefix = "$M"; PrivateLabelPrefix = "$M"; + CommentString = ";"; } void ARMCOFFMCAsmInfoGNU::anchor() { } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 9fca13eeea93..559a4f8de75f 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -1493,7 +1493,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx, case ARM_AM::lsl: SBits = 0x0; break; case ARM_AM::lsr: SBits = 0x2; break; case ARM_AM::asr: SBits = 0x4; break; - case ARM_AM::rrx: // FALLTHROUGH + case ARM_AM::rrx: LLVM_FALLTHROUGH; case ARM_AM::ror: SBits = 0x6; break; } @@ -1545,8 +1545,15 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, else Binary |= NumRegs * 2; } else { + const MCRegisterInfo &MRI = *CTX.getRegisterInfo(); + assert(std::is_sorted(MI.begin() + Op, MI.end(), + [&](const MCOperand &LHS, const MCOperand &RHS) { + return MRI.getEncodingValue(LHS.getReg()) < + MRI.getEncodingValue(RHS.getReg()); + })); + for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) { - unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(MI.getOperand(I).getReg()); + unsigned RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg()); Binary |= 1 << RegNo; } } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index afb089ab0286..9e4d202321e6 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -204,7 +204,8 @@ static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, MCAsmBackend &MAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, - T.getArch() == Triple::thumb); + (T.getArch() == Triple::thumb || + T.getArch() == Triple::thumbeb)); } static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB, @@ -273,8 +274,8 @@ static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) { // Force static initialization. extern "C" void LLVMInitializeARMTargetMC() { - for (Target *T : {&TheARMLETarget, &TheARMBETarget, &TheThumbLETarget, - &TheThumbBETarget}) { + for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget(), + &getTheThumbLETarget(), &getTheThumbBETarget()}) { // Register the MC asm info. RegisterMCAsmInfoFn X(*T, createARMMCAsmInfo); @@ -313,16 +314,18 @@ extern "C" void LLVMInitializeARMTargetMC() { } // Register the MC Code Emitter - for (Target *T : {&TheARMLETarget, &TheThumbLETarget}) + for (Target *T : {&getTheARMLETarget(), &getTheThumbLETarget()}) TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter); - for (Target *T : {&TheARMBETarget, &TheThumbBETarget}) + for (Target *T : {&getTheARMBETarget(), &getTheThumbBETarget()}) TargetRegistry::RegisterMCCodeEmitter(*T, createARMBEMCCodeEmitter); // Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(TheARMLETarget, createARMLEAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheARMBETarget, createARMBEAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheThumbLETarget, + TargetRegistry::RegisterMCAsmBackend(getTheARMLETarget(), + createARMLEAsmBackend); + TargetRegistry::RegisterMCAsmBackend(getTheARMBETarget(), + createARMBEAsmBackend); + TargetRegistry::RegisterMCAsmBackend(getTheThumbLETarget(), createThumbLEAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheThumbBETarget, + TargetRegistry::RegisterMCAsmBackend(getTheThumbBETarget(), createThumbBEAsmBackend); } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index c2bbc8e828c4..ba834201e585 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -28,6 +28,7 @@ class MCObjectWriter; class MCRegisterInfo; class MCSubtargetInfo; class MCStreamer; +class MCTargetOptions; class MCRelocationInfo; class MCTargetStreamer; class StringRef; @@ -36,8 +37,10 @@ class Triple; class raw_ostream; class raw_pwrite_stream; -extern Target TheARMLETarget, TheThumbLETarget; -extern Target TheARMBETarget, TheThumbBETarget; +Target &getTheARMLETarget(); +Target &getTheThumbLETarget(); +Target &getTheARMBETarget(); +Target &getTheThumbBETarget(); namespace ARM_MC { std::string ParseARMTriple(const Triple &TT, StringRef CPU); @@ -66,21 +69,26 @@ MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU, + const MCTargetOptions &Options, bool IsLittleEndian); MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU); + const Triple &TT, StringRef CPU, + const MCTargetOptions &Options); MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU); + const Triple &TT, StringRef CPU, + const MCTargetOptions &Options); MCAsmBackend *createThumbLEAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU); + const Triple &TT, StringRef CPU, + const MCTargetOptions &Options); MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU); + const Triple &TT, StringRef CPU, + const MCTargetOptions &Options); // Construct a PE/COFF machine code streamer which will generate a PE/COFF // object file. diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index cfa6ce7da65e..b77181f29b2d 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -208,7 +208,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, if (Asm.isThumbFunc(A)) FixedValue &= 0xfffffffe; MovtBit = 1; - // Fallthrough + LLVM_FALLTHROUGH; case ARM::fixup_t2_movw_lo16: ThumbBit = 1; break; diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp index 7f2124033982..744761bcddb8 100644 --- a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp +++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp @@ -43,7 +43,7 @@ namespace { bool runOnMachineFunction(MachineFunction &Fn) override; - const char *getPassName() const override { + StringRef getPassName() const override { return "ARM MLA / MLS expansion pass"; } @@ -334,18 +334,15 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { unsigned Skip = 0; MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend(); while (MII != E) { - MachineInstr *MI = &*MII; + MachineInstr *MI = &*MII++; - if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy()) { - ++MII; + if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy()) continue; - } const MCInstrDesc &MCID = MI->getDesc(); if (MI->isBarrier()) { clearStack(); Skip = 0; - ++MII; continue; } @@ -365,13 +362,9 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { pushStack(MI); else { ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane); - E = MBB.rend(); // May have changed if MI was the 1st instruction. Changed = true; - continue; } } - - ++MII; } return Changed; diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp index 3f88eb818062..caa69f8d71b7 100644 --- a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp @@ -11,17 +11,31 @@ #include "llvm/Support/TargetRegistry.h" using namespace llvm; -Target llvm::TheARMLETarget, llvm::TheARMBETarget; -Target llvm::TheThumbLETarget, llvm::TheThumbBETarget; +Target &llvm::getTheARMLETarget() { + static Target TheARMLETarget; + return TheARMLETarget; +} +Target &llvm::getTheARMBETarget() { + static Target TheARMBETarget; + return TheARMBETarget; +} +Target &llvm::getTheThumbLETarget() { + static Target TheThumbLETarget; + return TheThumbLETarget; +} +Target &llvm::getTheThumbBETarget() { + static Target TheThumbBETarget; + return TheThumbBETarget; +} extern "C" void LLVMInitializeARMTargetInfo() { - RegisterTarget<Triple::arm, /*HasJIT=*/true> - X(TheARMLETarget, "arm", "ARM"); - RegisterTarget<Triple::armeb, /*HasJIT=*/true> - Y(TheARMBETarget, "armeb", "ARM (big endian)"); + RegisterTarget<Triple::arm, /*HasJIT=*/true> X(getTheARMLETarget(), "arm", + "ARM"); + RegisterTarget<Triple::armeb, /*HasJIT=*/true> Y(getTheARMBETarget(), "armeb", + "ARM (big endian)"); - RegisterTarget<Triple::thumb, /*HasJIT=*/true> - A(TheThumbLETarget, "thumb", "Thumb"); - RegisterTarget<Triple::thumbeb, /*HasJIT=*/true> - B(TheThumbBETarget, "thumbeb", "Thumb (big endian)"); + RegisterTarget<Triple::thumb, /*HasJIT=*/true> A(getTheThumbLETarget(), + "thumb", "Thumb"); + RegisterTarget<Triple::thumbeb, /*HasJIT=*/true> B( + getTheThumbBETarget(), "thumbeb", "Thumb (big endian)"); } diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index c0732e4b750a..9953c61cd89c 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -26,8 +26,8 @@ Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti) : ARMFrameLowering(sti) {} bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ - const MachineFrameInfo *FFI = MF.getFrameInfo(); - unsigned CFSize = FFI->getMaxCallFrameSize(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned CFSize = MFI.getMaxCallFrameSize(); // It's not always a good idea to include the call frame as part of the // stack frame. ARM (especially Thumb) has small immediate offset to // address the stack frame. So a large call frame can cause poor codegen @@ -35,7 +35,7 @@ bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 return false; - return !MF.getFrameInfo()->hasVarSizedObjects(); + return !MFI.hasVarSizedObjects(); } static void emitSPUpdate(MachineBasicBlock &MBB, @@ -85,7 +85,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); @@ -95,10 +95,10 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); - unsigned NumBytes = MFI->getStackSize(); + unsigned NumBytes = MFI.getStackSize(); assert(NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. @@ -110,7 +110,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. NumBytes = (NumBytes + 3) & ~3; - MFI->setStackSize(NumBytes); + MFI.setStackSize(NumBytes); // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. @@ -121,7 +121,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, MachineInstr::FrameSetup); CFAOffset -= ArgRegsSaveSize; - unsigned CFIIndex = MMI.addFrameInst( + unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) @@ -133,7 +133,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); CFAOffset -= NumBytes - ArgRegsSaveSize; - unsigned CFIIndex = MMI.addFrameInst( + unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) @@ -150,11 +150,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, case ARM::R9: case ARM::R10: case ARM::R11: - if (STI.splitFramePushPop()) { + if (STI.splitFramePushPop(MF)) { GPRCS2Size += 4; break; } - // fallthrough + LLVM_FALLTHROUGH; case ARM::R4: case ARM::R5: case ARM::R6: @@ -179,7 +179,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; bool HasFP = hasFP(MF); if (HasFP) - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + + AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); @@ -188,7 +188,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, int FramePtrOffsetInBlock = 0; unsigned adjustedGPRCS1Size = GPRCS1Size; - if (tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) { + if (GPRCS1Size > 0 && GPRCS2Size == 0 && + tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) { FramePtrOffsetInBlock = NumBytes; adjustedGPRCS1Size += NumBytes; NumBytes = 0; @@ -196,7 +197,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, if (adjustedGPRCS1Size) { CFAOffset -= adjustedGPRCS1Size; - unsigned CFIIndex = MMI.addFrameInst( + unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) @@ -212,7 +213,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop()) + if (STI.splitFramePushPop(MF)) break; // fallthough case ARM::R0: @@ -224,8 +225,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, case ARM::R6: case ARM::R7: case ARM::LR: - unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -236,20 +237,20 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, // Adjust FP so it point to the stack slot that contains the previous FP. if (HasFP) { FramePtrOffsetInBlock += - MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; + MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4) .setMIFlags(MachineInstr::FrameSetup)); if(FramePtrOffsetInBlock) { CFAOffset += FramePtrOffsetInBlock; - unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) @@ -261,13 +262,55 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, AFI->setShouldRestoreSPFromFP(true); } + // Skip past the spilling of r8-r11, which could consist of multiple tPUSH + // and tMOVr instructions. We don't need to add any call frame information + // in-between these instructions, because they do not modify the high + // registers. + while (true) { + MachineBasicBlock::iterator OldMBBI = MBBI; + // Skip a run of tMOVr instructions + while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr) + MBBI++; + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { + MBBI++; + } else { + // We have reached an instruction which is not a push, so the previous + // run of tMOVr instructions (which may have been empty) was not part of + // the prologue. Reset MBBI back to the last PUSH of the prologue. + MBBI = OldMBBI; + break; + } + } + + // Emit call frame information for the callee-saved high registers. + for (auto &I : CSI) { + unsigned Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: { + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + break; + } + default: + break; + } + } + if (NumBytes) { // Insert it after all the callee-save spills. emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); if (!HasFP) { CFAOffset -= NumBytes; - unsigned CFIIndex = MMI.addFrameInst( + unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) @@ -276,8 +319,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, } if (STI.isTargetELF() && HasFP) - MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - - AFI->getFramePtrSpillOffset()); + MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - + AFI->getFramePtrSpillOffset()); AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); @@ -299,7 +342,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. - if (MFI->hasVarSizedObjects()) + if (MFI.hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); } @@ -308,12 +351,12 @@ static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) { isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs)) return true; else if (MI.getOpcode() == ARM::tPOP) { - // The first two operands are predicates. The last two are - // imp-def and imp-use of SP. Check everything in between. - for (int i = 2, e = MI.getNumOperands() - 2; i != e; ++i) - if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs)) - return false; return true; + } else if (MI.getOpcode() == ARM::tMOVr) { + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Src = MI.getOperand(1).getReg(); + return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) && + ARM::hGPRRegClass.contains(Dst)); } return false; } @@ -322,7 +365,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const ThumbRegisterInfo *RegInfo = static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); @@ -330,7 +373,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); - int NumBytes = (int)MFI->getStackSize(); + int NumBytes = (int)MFI.getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); @@ -361,7 +404,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, // frame pointer stack slot, the target is ELF and the function has FP, or // the target uses var sized objects. if (NumBytes) { - assert(!MFI->getPristineRegs(MF).test(ARM::R4) && + assert(!MFI.getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); @@ -405,7 +448,7 @@ bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { return true; // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up. - for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo()) + for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo()) if (CSI.getReg() == ARM::LR) return true; @@ -568,6 +611,19 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, return true; } +// Return the first iteraror after CurrentReg which is present in EnabledRegs, +// or OrderEnd if no further registers are in that set. This does not advance +// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs. +template <unsigned SetSize> +static const unsigned * +findNextOrderedReg(const unsigned *CurrentReg, + SmallSet<unsigned, SetSize> &EnabledRegs, + const unsigned *OrderEnd) { + while (CurrentReg != OrderEnd && !EnabledRegs.count(*CurrentReg)) + ++CurrentReg; + return CurrentReg; +} + bool Thumb1FrameLowering:: spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -578,29 +634,114 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL; const TargetInstrInfo &TII = *STI.getInstrInfo(); + MachineFunction &MF = *MBB.getParent(); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); + + SmallSet<unsigned, 9> LoRegsToSave; // r0-r7, lr + SmallSet<unsigned, 4> HiRegsToSave; // r8-r11 + SmallSet<unsigned, 9> CopyRegs; // Registers which can be used after pushing + // LoRegs for saving HiRegs. - MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)); - AddDefaultPred(MIB); for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - bool isKill = true; - // Add the callee-saved register as live-in unless it's LR and - // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress - // then it's already added to the function and entry block live-in sets. - if (Reg == ARM::LR) { - MachineFunction &MF = *MBB.getParent(); - if (MF.getFrameInfo()->isReturnAddressTaken() && - MF.getRegInfo().isLiveIn(Reg)) - isKill = false; + if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { + LoRegsToSave.insert(Reg); + } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { + HiRegsToSave.insert(Reg); + } else { + llvm_unreachable("callee-saved register of unexpected class"); + } + + if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && + !MF.getRegInfo().isLiveIn(Reg) && + !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) + CopyRegs.insert(Reg); + } + + // Unused argument registers can be used for the high register saving. + for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) + if (!MF.getRegInfo().isLiveIn(ArgReg)) + CopyRegs.insert(ArgReg); + + // Push the low registers and lr + if (!LoRegsToSave.empty()) { + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)); + AddDefaultPred(MIB); + for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { + if (LoRegsToSave.count(Reg)) { + bool isKill = !MF.getRegInfo().isLiveIn(Reg); + if (isKill) + MBB.addLiveIn(Reg); + + MIB.addReg(Reg, getKillRegState(isKill)); + } + } + MIB.setMIFlags(MachineInstr::FrameSetup); + } + + // Push the high registers. There are no store instructions that can access + // these registers directly, so we have to move them to low registers, and + // push them. This might take multiple pushes, as it is possible for there to + // be fewer low registers available than high registers which need saving. + + // These are in reverse order so that in the case where we need to use + // multiple PUSH instructions, the order of the registers on the stack still + // matches the unwind info. They need to be swicthed back to ascending order + // before adding to the PUSH instruction. + static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6, + ARM::R5, ARM::R4, ARM::R3, + ARM::R2, ARM::R1, ARM::R0}; + static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8}; + + const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); + const unsigned *AllHighRegsEnd = std::end(AllHighRegs); + + // Find the first register to save. + const unsigned *HiRegToSave = findNextOrderedReg( + std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd); + + while (HiRegToSave != AllHighRegsEnd) { + // Find the first low register to use. + const unsigned *CopyReg = + findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); + + // Create the PUSH, but don't insert it yet (the MOVs need to come first). + MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)); + AddDefaultPred(PushMIB); + + SmallVector<unsigned, 4> RegsToPush; + while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { + if (HiRegsToSave.count(*HiRegToSave)) { + bool isKill = !MF.getRegInfo().isLiveIn(*HiRegToSave); + if (isKill) + MBB.addLiveIn(*HiRegToSave); + + // Emit a MOV from the high reg to the low reg. + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)); + MIB.addReg(*CopyReg, RegState::Define); + MIB.addReg(*HiRegToSave, getKillRegState(isKill)); + AddDefaultPred(MIB); + + // Record the register that must be added to the PUSH. + RegsToPush.push_back(*CopyReg); + + CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); + HiRegToSave = + findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd); + } } - if (isKill) - MBB.addLiveIn(Reg); + // Add the low registers to the PUSH, in ascending order. + for (unsigned Reg : reverse(RegsToPush)) + PushMIB.addReg(Reg, RegState::Kill); - MIB.addReg(Reg, getKillRegState(isKill)); + // Insert the PUSH instruction after the MOVs. + MBB.insert(MI, PushMIB); } - MIB.setMIFlags(MachineInstr::FrameSetup); + return true; } @@ -615,15 +756,101 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const TargetInstrInfo &TII = *STI.getInstrInfo(); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); + + SmallSet<unsigned, 9> LoRegsToRestore; + SmallSet<unsigned, 4> HiRegsToRestore; + // Low registers (r0-r7) which can be used to restore the high registers. + SmallSet<unsigned, 9> CopyRegs; + + for (CalleeSavedInfo I : CSI) { + unsigned Reg = I.getReg(); + + if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { + LoRegsToRestore.insert(Reg); + } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { + HiRegsToRestore.insert(Reg); + } else { + llvm_unreachable("callee-saved register of unexpected class"); + } + + // If this is a low register not used as the frame pointer, we may want to + // use it for restoring the high registers. + if ((ARM::tGPRRegClass.contains(Reg)) && + !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) + CopyRegs.insert(Reg); + } + + // If this is a return block, we may be able to use some unused return value + // registers for restoring the high regs. + auto Terminator = MBB.getFirstTerminator(); + if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { + CopyRegs.insert(ARM::R0); + CopyRegs.insert(ARM::R1); + CopyRegs.insert(ARM::R2); + CopyRegs.insert(ARM::R3); + for (auto Op : Terminator->implicit_operands()) { + if (Op.isReg()) + CopyRegs.erase(Op.getReg()); + } + } + + static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R7}; + static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11}; + + const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); + const unsigned *AllHighRegsEnd = std::end(AllHighRegs); + + // Find the first register to restore. + auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs), + HiRegsToRestore, AllHighRegsEnd); + + while (HiRegToRestore != AllHighRegsEnd) { + assert(!CopyRegs.empty()); + // Find the first low register to use. + auto CopyReg = + findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); + + // Create the POP instruction. + MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)); + AddDefaultPred(PopMIB); + + while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { + // Add the low register to the POP. + PopMIB.addReg(*CopyReg, RegState::Define); + + // Create the MOV from low to high register. + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)); + MIB.addReg(*HiRegToRestore, RegState::Define); + MIB.addReg(*CopyReg, RegState::Kill); + AddDefaultPred(MIB); + + CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); + HiRegToRestore = + findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd); + } + } + + + + MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)); AddDefaultPred(MIB); bool NeedsPop = false; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); + + // High registers (excluding lr) have already been dealt with + if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR)) + continue; + if (Reg == ARM::LR) { if (MBB.succ_empty()) { // Special epilogue for vararg functions. See emitEpilogue diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 159731d8fc72..4b4fbaab28d9 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -83,7 +83,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); @@ -109,7 +109,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp index 0c7055551632..d01fc8c40ddf 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -38,10 +38,10 @@ namespace { MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } - const char *getPassName() const override { + StringRef getPassName() const override { return "Thumb IT blocks insertion pass"; } diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index e2e6dafd218a..1c731d669eda 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -130,7 +130,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); @@ -170,7 +170,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index c4fdb9b3147d..8208e7e24770 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -148,10 +148,10 @@ namespace { MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } - const char *getPassName() const override { + StringRef getPassName() const override { return "Thumb2 instruction size reduction pass"; } @@ -430,6 +430,10 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, if (!MBB.getParent()->getFunction()->optForMinSize()) return false; + if (!MI->hasOneMemOperand() || + (*MI->memoperands_begin())->getAlignment() < 4) + return false; + // We're creating a completely different type of load/store - LDM from LDR. // For this reason we can't reuse the logic at the end of this function; we // have to implement the MI building here. @@ -651,7 +655,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, case ARM::t2ADDSri: { if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) return true; - // fallthrough + LLVM_FALLTHROUGH; } case ARM::t2ADDSrr: return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); diff --git a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp index 6c26c8843865..2efd63b84a2c 100644 --- a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -126,6 +126,7 @@ static void emitThumbRegPlusImmInReg( bool CanChangeCC, const TargetInstrInfo &TII, const ARMBaseRegisterInfo &MRI, unsigned MIFlags = MachineInstr::NoFlags) { MachineFunction &MF = *MBB.getParent(); + const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>(); bool isHigh = !isARMLowRegister(DestReg) || (BaseReg != 0 && !isARMLowRegister(BaseReg)); bool isSub = false; @@ -154,6 +155,9 @@ static void emitThumbRegPlusImmInReg( AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)) .addReg(LdReg, RegState::Kill) .setMIFlags(MIFlags); + } else if (ST.genExecuteOnly()) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), LdReg) + .addImm(NumBytes).setMIFlags(MIFlags); } else MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes, ARMCC::AL, 0, MIFlags); @@ -511,10 +515,10 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned FrameReg = ARM::SP; int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MF.getFrameInfo()->getStackSize() + SPAdj; + int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) + + MF.getFrameInfo().getStackSize() + SPAdj; - if (MF.getFrameInfo()->hasVarSizedObjects()) { + if (MF.getFrameInfo().hasVarSizedObjects()) { assert(SPAdj == 0 && STI.getFrameLowering()->hasFP(MF) && "Unexpected"); // There are alloca()'s in this function, must reference off the frame // pointer or base pointer instead. @@ -534,7 +538,7 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(STI.getFrameLowering()->hasReservedCallFrame(MF) && "Cannot use SP to access the emergency spill slot in " "functions without a reserved call frame"); - assert(!MF.getFrameInfo()->hasVarSizedObjects() && + assert(!MF.getFrameInfo().hasVarSizedObjects() && "Cannot use SP to access the emergency spill slot in " "functions with variable sized frame objects"); } @@ -570,7 +574,7 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned TmpReg = MI.getOperand(0).getReg(); bool UseRR = false; if (Opcode == ARM::tLDRspi) { - if (FrameReg == ARM::SP) + if (FrameReg == ARM::SP || STI.genExecuteOnly()) emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg, Offset, false, TII, *this); else { @@ -594,7 +598,7 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, bool UseRR = false; if (Opcode == ARM::tSTRspi) { - if (FrameReg == ARM::SP) + if (FrameReg == ARM::SP || STI.genExecuteOnly()) emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg, Offset, false, TII, *this); else { |