aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/AArch64
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-20 14:16:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-20 14:16:56 +0000
commit2cab237b5dbfe1b3e9c7aa7a3c02d2b98fcf7462 (patch)
tree524fe828571f81358bba62fdb6d04c6e5e96a2a4 /contrib/llvm/lib/Target/AArch64
parent6c7828a2807ea5e50c79ca42dbedf2b589ce63b2 (diff)
parent044eb2f6afba375a914ac9d8024f8f5142bb912e (diff)
Merge llvm trunk r321017 to contrib/llvm.
Notes
Notes: svn path=/projects/clang600-import/; revision=327023
Diffstat (limited to 'contrib/llvm/lib/Target/AArch64')
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64.h4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64.td80
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp24
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp3
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp83
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp47
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h20
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td3
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp3
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp8
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp9
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp25
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp26
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp118
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp62
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp67
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp9
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp48
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def113
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp628
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h29
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td102
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td633
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp1057
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h165
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td179
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp304
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp235
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h3
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp304
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp22
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h10
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp7
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp10
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp23
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp233
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp127
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h27
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterBanks.td2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td191
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp741
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td23
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedA53.td2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedCyclone.td2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedKryo.td2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td290
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX.td14
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td139
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp82
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h38
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td18
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp66
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h19
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h26
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp388
-rw-r--r--contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp469
-rw-r--r--contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp278
-rw-r--r--contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp31
-rw-r--r--contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h6
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp101
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp14
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp26
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h6
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp19
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h8
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp1
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp71
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h22
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp8
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp8
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp29
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h9
-rw-r--r--contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td103
-rw-r--r--contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h7
85 files changed, 5822 insertions, 2338 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.h b/contrib/llvm/lib/Target/AArch64/AArch64.h
index 1dda746a6be1..edda13ce97ef 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.h
@@ -39,7 +39,7 @@ FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
FunctionPass *createAArch64StorePairSuppressPass();
FunctionPass *createAArch64ExpandPseudoPass();
FunctionPass *createAArch64LoadStoreOptimizationPass();
-FunctionPass *createAArch64VectorByElementOptPass();
+FunctionPass *createAArch64SIMDInstrOptPass();
ModulePass *createAArch64PromoteConstantPass();
FunctionPass *createAArch64ConditionOptimizerPass();
FunctionPass *createAArch64A57FPLoadBalancing();
@@ -64,7 +64,7 @@ void initializeAArch64ConditionOptimizerPass(PassRegistry&);
void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
void initializeAArch64ExpandPseudoPass(PassRegistry&);
void initializeAArch64LoadStoreOptPass(PassRegistry&);
-void initializeAArch64VectorByElementOptPass(PassRegistry&);
+void initializeAArch64SIMDInstrOptPass(PassRegistry&);
void initializeAArch64PromoteConstantPass(PassRegistry&);
void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
void initializeAArch64StorePairSuppressPass(PassRegistry&);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td
index 436bf1193304..75fb937de9bf 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@@ -61,6 +61,12 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
"Has zero-cycle zeroing instructions">;
+/// ... but the floating-point version doesn't quite work in rare cases on older
+/// CPUs.
+def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround",
+ "HasZeroCycleZeroingFPWorkaround", "true",
+ "The zero-cycle floating-point zeroing instruction has a bug">;
+
def FeatureStrictAlign : SubtargetFeature<"strict-align",
"StrictAlign", "true",
"Disallow all unaligned memory "
@@ -94,6 +100,9 @@ def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
"Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">;
+def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "STRQroIsSlow",
+ "true", "STR of Q register with register offset is slow">;
+
def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
"alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
"true", "Use alternative pattern for sextload convert to f32">;
@@ -118,10 +127,17 @@ def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
+def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true",
+ "Enable support for RCPC extension">;
+
def FeatureUseRSqrt : SubtargetFeature<
"use-reciprocal-square-root", "UseRSqrt", "true",
"Use the reciprocal square root approximation">;
+def FeatureDotProd : SubtargetFeature<
+ "dotprod", "HasDotProd", "true",
+ "Enable dot product support">;
+
def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
"NegativeImmediates", "false",
"Convert immediates and instructions "
@@ -132,6 +148,7 @@ def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
def FeatureLSLFast : SubtargetFeature<
"lsl-fast", "HasLSLFast", "true",
"CPU has a fastpath logical shift of up to 3 places">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
@@ -142,6 +159,9 @@ def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
"Support ARM v8.2a instructions", [HasV8_1aOps, FeatureRAS]>;
+def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true",
+ "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -200,6 +220,19 @@ def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
FeatureUseAA
]>;
+def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55",
+ "Cortex-A55 ARM processors", [
+ HasV8_2aOps,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeatureFullFP16,
+ FeatureDotProd,
+ FeatureRCPC,
+ FeaturePerfMon
+ ]>;
+
def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
"Cortex-A57 ARM processors", [
FeatureBalanceFPOps,
@@ -235,19 +268,36 @@ def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
FeaturePerfMon
]>;
+def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
+ "Cortex-A75 ARM processors", [
+ HasV8_2aOps,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeatureFullFP16,
+ FeatureDotProd,
+ FeatureRCPC,
+ FeaturePerfMon
+ ]>;
+
+// Note that cyclone does not fuse AES instructions, but newer apple chips do
+// perform the fusion and cyclone is used by default when targetting apple OSes.
def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
"Cyclone", [
FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
FeatureCrypto,
FeatureDisableLatencySchedHeuristic,
FeatureFPARMv8,
- FeatureArithmeticBccFusion,
- FeatureArithmeticCbzFusion,
+ FeatureFuseAES,
FeatureNEON,
FeaturePerfMon,
FeatureSlowMisaligned128Store,
FeatureZCRegMove,
- FeatureZCZeroing
+ FeatureZCZeroing,
+ FeatureZCZeroingFPWorkaround
]>;
def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
@@ -305,9 +355,24 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
FeaturePredictableSelectIsExpensive,
FeatureRDM,
FeatureZCZeroing,
- FeatureLSLFast
+ FeatureLSLFast,
+ FeatureSlowSTRQro
]>;
+def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
+ "Qualcomm Saphira processors", [
+ FeatureCrypto,
+ FeatureCustomCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureSPE,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureZCZeroing,
+ FeatureLSLFast,
+ HasV8_3aOps]>;
+
def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
"ThunderX2T99",
"Cavium ThunderX2 processors", [
@@ -372,18 +437,21 @@ def : ProcessorModel<"generic", NoSchedModel, [
FeaturePostRAScheduler
]>;
-// FIXME: Cortex-A35 is currently modeled as a Cortex-A53.
+// FIXME: Cortex-A35 and Cortex-A55 are currently modeled as a Cortex-A53.
def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
+def : ProcessorModel<"cortex-a55", CortexA53Model, [ProcA55]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
-// FIXME: Cortex-A72 and Cortex-A73 are currently modeled as a Cortex-A57.
+// FIXME: Cortex-A72, Cortex-A73 and Cortex-A75 are currently modeled as a Cortex-A57.
def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>;
def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>;
+def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>;
def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>;
def : ProcessorModel<"exynos-m3", ExynosM1Model, [ProcExynosM2]>;
def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
+def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>;
def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
// Cavium ThunderX/ThunderX T8X Processors
def : ProcessorModel<"thunderx", ThunderXT8XModel, [ProcThunderX]>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
index e6afb42440a7..7de5d0ef66b1 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -22,9 +22,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index db1fbe069f4d..38a7e331bb97 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -161,9 +161,9 @@ namespace {
/// A Chain is a sequence of instructions that are linked together by
/// an accumulation operand. For example:
///
-/// fmul d0<def>, ?
-/// fmla d1<def>, ?, ?, d0<kill>
-/// fmla d2<def>, ?, ?, d1<kill>
+/// fmul def d0, ?
+/// fmla def d1, ?, ?, killed d0
+/// fmla def d2, ?, ?, killed d1
///
/// There may be other instructions interleaved in the sequence that
/// do not belong to the chain. These other instructions must not use
@@ -308,7 +308,7 @@ public:
//===----------------------------------------------------------------------===//
bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) {
- if (skipFunction(*F.getFunction()))
+ if (skipFunction(F.getFunction()))
return false;
if (!F.getSubtarget<AArch64Subtarget>().balanceFPOps())
@@ -538,7 +538,7 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
DEBUG(dbgs() << "Scavenging (thus coloring) failed!\n");
return false;
}
- DEBUG(dbgs() << " - Scavenged register: " << TRI->getName(Reg) << "\n");
+ DEBUG(dbgs() << " - Scavenged register: " << printReg(Reg, TRI) << "\n");
std::map<unsigned, unsigned> Substs;
for (MachineInstr &I : *G) {
@@ -611,8 +611,8 @@ void AArch64A57FPLoadBalancing::scanInstruction(
// unit.
unsigned DestReg = MI->getOperand(0).getReg();
- DEBUG(dbgs() << "New chain started for register "
- << TRI->getName(DestReg) << " at " << *MI);
+ DEBUG(dbgs() << "New chain started for register " << printReg(DestReg, TRI)
+ << " at " << *MI);
auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
ActiveChains[DestReg] = G.get();
@@ -632,7 +632,7 @@ void AArch64A57FPLoadBalancing::scanInstruction(
if (ActiveChains.find(AccumReg) != ActiveChains.end()) {
DEBUG(dbgs() << "Chain found for accumulator register "
- << TRI->getName(AccumReg) << " in MI " << *MI);
+ << printReg(AccumReg, TRI) << " in MI " << *MI);
// For simplicity we only chain together sequences of MULs/MLAs where the
// accumulator register is killed on each instruction. This means we don't
@@ -657,7 +657,7 @@ void AArch64A57FPLoadBalancing::scanInstruction(
}
DEBUG(dbgs() << "Creating new chain for dest register "
- << TRI->getName(DestReg) << "\n");
+ << printReg(DestReg, TRI) << "\n");
auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
ActiveChains[DestReg] = G.get();
AllChains.push_back(std::move(G));
@@ -685,8 +685,8 @@ maybeKillChain(MachineOperand &MO, unsigned Idx,
// If this is a KILL of a current chain, record it.
if (MO.isKill() && ActiveChains.find(MO.getReg()) != ActiveChains.end()) {
- DEBUG(dbgs() << "Kill seen for chain " << TRI->getName(MO.getReg())
- << "\n");
+ DEBUG(dbgs() << "Kill seen for chain " << printReg(MO.getReg(), TRI)
+ << "\n");
ActiveChains[MO.getReg()]->setKill(MI, Idx, /*Immutable=*/MO.isTied());
}
ActiveChains.erase(MO.getReg());
@@ -697,7 +697,7 @@ maybeKillChain(MachineOperand &MO, unsigned Idx,
I != E;) {
if (MO.clobbersPhysReg(I->first)) {
DEBUG(dbgs() << "Kill (regmask) seen for chain "
- << TRI->getName(I->first) << "\n");
+ << printReg(I->first, TRI) << "\n");
I->second->setKill(MI, Idx, /*Immutable=*/true);
ActiveChains.erase(I++);
} else
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index bc2320dd20b3..338daecb49e5 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -36,7 +36,6 @@
#include "AArch64.h"
#include "AArch64InstrInfo.h"
#include "AArch64RegisterInfo.h"
-#include "AArch64Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -394,7 +393,7 @@ bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
bool Changed = false;
DEBUG(dbgs() << "***** AArch64AdvSIMDScalar *****\n");
- if (skipFunction(*mf.getFunction()))
+ if (skipFunction(mf.getFunction()))
return false;
MRI = &mf.getRegInfo();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 5ce57926cc03..67138f41dda8 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- AArch64AsmPrinter.cpp - AArch64 LLVM assembly writer --------------===//
+//===- AArch64AsmPrinter.cpp - AArch64 LLVM assembly writer ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,32 +17,42 @@
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
+#include "AArch64TargetObjectFile.h"
#include "InstPrinter/AArch64InstPrinter.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
-#include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/StackMaps.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
-#include "llvm/MC/MCLinkerOptimizationHint.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <map>
+#include <memory>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -57,7 +67,7 @@ class AArch64AsmPrinter : public AsmPrinter {
public:
AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this),
- SM(*this), AArch64FI(nullptr) {}
+ SM(*this) {}
StringRef getPassName() const override { return "AArch64 Assembly Printer"; }
@@ -118,7 +128,8 @@ private:
MCSymbol *GetCPISymbol(unsigned CPID) const override;
void EmitEndOfAsmFile(Module &M) override;
- AArch64FunctionInfo *AArch64FI;
+
+ AArch64FunctionInfo *AArch64FI = nullptr;
/// \brief Emit the LOHs contained in AArch64FI.
void EmitLOHs();
@@ -126,13 +137,12 @@ private:
/// Emit instruction to set float register to zero.
void EmitFMov0(const MachineInstr &MI);
- typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
+ using MInstToMCSymbol = std::map<const MachineInstr *, MCSymbol *>;
+
MInstToMCSymbol LOHInstToLabel;
};
-} // end of anonymous namespace
-
-//===----------------------------------------------------------------------===//
+} // end anonymous namespace
void AArch64AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI)
{
@@ -200,6 +210,29 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
SM.serializeToStackMapSection();
}
+
+ if (TT.isOSBinFormatCOFF()) {
+ const auto &TLOF =
+ static_cast<const TargetLoweringObjectFileCOFF &>(getObjFileLowering());
+
+ std::string Flags;
+ raw_string_ostream OS(Flags);
+
+ for (const auto &Function : M)
+ TLOF.emitLinkerFlagsForGlobal(OS, &Function);
+ for (const auto &Global : M.globals())
+ TLOF.emitLinkerFlagsForGlobal(OS, &Global);
+ for (const auto &Alias : M.aliases())
+ TLOF.emitLinkerFlagsForGlobal(OS, &Alias);
+
+ OS.flush();
+
+ // Output collected flags
+ if (!Flags.empty()) {
+ OutStreamer->SwitchSection(TLOF.getDrectveSection());
+ OutStreamer->EmitBytes(Flags);
+ }
+ }
}
void AArch64AsmPrinter::EmitLOHs() {
@@ -490,11 +523,13 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
unsigned DestReg = MI.getOperand(0).getReg();
- if (STI->hasZeroCycleZeroing()) {
- // Convert S/D register to corresponding Q register
- if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31) {
+ if (STI->hasZeroCycleZeroing() && !STI->hasZeroCycleZeroingFPWorkaround()) {
+ // Convert H/S/D register to corresponding Q register
+ if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
+ DestReg = AArch64::Q0 + (DestReg - AArch64::H0);
+ else if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31)
DestReg = AArch64::Q0 + (DestReg - AArch64::S0);
- } else {
+ else {
assert(AArch64::D0 <= DestReg && DestReg <= AArch64::D31);
DestReg = AArch64::Q0 + (DestReg - AArch64::D0);
}
@@ -507,6 +542,11 @@ void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
MCInst FMov;
switch (MI.getOpcode()) {
default: llvm_unreachable("Unexpected opcode");
+ case AArch64::FMOVH0:
+ FMov.setOpcode(AArch64::FMOVWHr);
+ FMov.addOperand(MCOperand::createReg(DestReg));
+ FMov.addOperand(MCOperand::createReg(AArch64::WZR));
+ break;
case AArch64::FMOVS0:
FMov.setOpcode(AArch64::FMOVWSr);
FMov.addOperand(MCOperand::createReg(DestReg));
@@ -626,6 +666,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ case AArch64::FMOVH0:
case AArch64::FMOVS0:
case AArch64::FMOVD0:
EmitFMov0(*MI);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
index 29f6d571d6bd..08152c0d83d9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -32,14 +32,14 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -47,13 +47,10 @@
using namespace llvm;
-#ifndef LLVM_BUILD_GLOBAL_ISEL
-#error "This shouldn't be built without GISel"
-#endif
-
AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
: CallLowering(&TLI) {}
+namespace {
struct IncomingArgHandler : public CallLowering::ValueHandler {
IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn)
@@ -73,8 +70,18 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
CCValAssign &VA) override {
markPhysRegUsed(PhysReg);
- MIRBuilder.buildCopy(ValVReg, PhysReg);
- // FIXME: assert extension
+ switch (VA.getLocInfo()) {
+ default:
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ break;
+ case CCValAssign::LocInfo::SExt:
+ case CCValAssign::LocInfo::ZExt:
+ case CCValAssign::LocInfo::AExt: {
+ auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
+ MIRBuilder.buildTrunc(ValVReg, Copy);
+ break;
+ }
+ }
}
void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
@@ -171,10 +178,11 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
CCAssignFn *AssignFnVarArg;
uint64_t StackSize;
};
+} // namespace
void AArch64CallLowering::splitToValueTypes(
const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
- const DataLayout &DL, MachineRegisterInfo &MRI,
+ const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
const SplitArgTy &PerformArgSplit) const {
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
LLVMContext &Ctx = OrigArg.Ty->getContext();
@@ -192,14 +200,19 @@ void AArch64CallLowering::splitToValueTypes(
}
unsigned FirstRegIdx = SplitArgs.size();
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+ OrigArg.Ty, CallConv, false);
for (auto SplitVT : SplitVTs) {
- // FIXME: set split flags if they're actually used (e.g. i128 on AAPCS).
Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
SplitArgs.push_back(
ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)),
SplitTy, OrigArg.Flags, OrigArg.IsFixed});
+ if (NeedsRegBlock)
+ SplitArgs.back().Flags.setInConsecutiveRegs();
}
+ SplitArgs.back().Flags.setInConsecutiveRegsLast();
+
for (unsigned i = 0; i < Offsets.size(); ++i)
PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8);
}
@@ -207,7 +220,7 @@ void AArch64CallLowering::splitToValueTypes(
bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val, unsigned VReg) const {
MachineFunction &MF = MIRBuilder.getMF();
- const Function &F = *MF.getFunction();
+ const Function &F = MF.getFunction();
auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg");
@@ -222,7 +235,7 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
SmallVector<ArgInfo, 8> SplitArgs;
- splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
+ splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(),
[&](unsigned Reg, uint64_t Offset) {
MIRBuilder.buildExtract(Reg, VReg, Offset);
});
@@ -246,13 +259,15 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 8> SplitArgs;
unsigned i = 0;
for (auto &Arg : F.args()) {
+ if (DL.getTypeStoreSize(Arg.getType()) == 0)
+ continue;
ArgInfo OrigArg{VRegs[i], Arg.getType()};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
bool Split = false;
LLT Ty = MRI.getType(VRegs[i]);
unsigned Dst = VRegs[i];
- splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
+ splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(),
[&](unsigned Reg, uint64_t Offset) {
if (!Split) {
Split = true;
@@ -307,13 +322,13 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
const ArgInfo &OrigRet,
ArrayRef<ArgInfo> OrigArgs) const {
MachineFunction &MF = MIRBuilder.getMF();
- const Function &F = *MF.getFunction();
+ const Function &F = MF.getFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
auto &DL = F.getParent()->getDataLayout();
SmallVector<ArgInfo, 8> SplitArgs;
for (auto &OrigArg : OrigArgs) {
- splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
+ splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CallConv,
[&](unsigned Reg, uint64_t Offset) {
MIRBuilder.buildExtract(Reg, OrigArg.Reg, Offset);
});
@@ -366,7 +381,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<uint64_t, 8> RegOffsets;
SmallVector<unsigned, 8> SplitRegs;
- splitToValueTypes(OrigRet, SplitArgs, DL, MRI,
+ splitToValueTypes(OrigRet, SplitArgs, DL, MRI, F.getCallingConv(),
[&](unsigned Reg, uint64_t Offset) {
RegOffsets.push_back(Offset);
SplitRegs.push_back(Reg);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
index d96ce95c4de0..68c127fc42e5 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
@@ -1,4 +1,4 @@
-//===--- AArch64CallLowering.h - Call lowering ------------------*- C++ -*-===//
+//===- AArch64CallLowering.h - Call lowering --------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,12 +17,18 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/IR/CallingConv.h"
#include <cstdint>
#include <functional>
namespace llvm {
class AArch64TargetLowering;
+class CCValAssign;
+class DataLayout;
+class MachineIRBuilder;
+class MachineRegisterInfo;
+class Type;
class AArch64CallLowering: public CallLowering {
public:
@@ -39,18 +45,18 @@ public:
ArrayRef<ArgInfo> OrigArgs) const override;
private:
- typedef std::function<void(MachineIRBuilder &, Type *, unsigned,
- CCValAssign &)>
- RegHandler;
+ using RegHandler = std::function<void(MachineIRBuilder &, Type *, unsigned,
+ CCValAssign &)>;
- typedef std::function<void(MachineIRBuilder &, int, CCValAssign &)>
- MemHandler;
+ using MemHandler =
+ std::function<void(MachineIRBuilder &, int, CCValAssign &)>;
- typedef std::function<void(unsigned, uint64_t)> SplitArgTy;
+ using SplitArgTy = std::function<void(unsigned, uint64_t)>;
void splitToValueTypes(const ArgInfo &OrigArgInfo,
SmallVectorImpl<ArgInfo> &SplitArgs,
const DataLayout &DL, MachineRegisterInfo &MRI,
+ CallingConv::ID CallConv,
const SplitArgTy &SplitArg) const;
};
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
index bc44bc5f2461..461c01318d4e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
@@ -19,8 +19,8 @@
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/CallingConv.h"
-#include "llvm/Target/TargetInstrInfo.h"
namespace {
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 291bc5ea858e..93a68449de8d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -49,6 +49,9 @@ def CC_AArch64_AAPCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
+ // A SwiftError is passed in X21.
+ CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index b3b738584b40..b88fba4452a1 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -25,7 +25,6 @@
#include "AArch64.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
-#include "AArch64TargetMachine.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -43,7 +42,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
}
bool runOnMachineFunction(MachineFunction &MF) override {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
index 17aafa0c3d6e..0a9167edcdb3 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -101,23 +101,19 @@
#include "AArch64.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
-#include "AArch64Subtarget.h"
-#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-collect-loh"
@@ -486,7 +482,7 @@ static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) {
}
bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n"
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
index 51700f905979..30cefbad884c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -32,13 +32,12 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -291,7 +290,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
}
bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
DEBUG(dbgs() << "********** AArch64 Conditional Branch Tuning **********\n"
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index 2dfcd2d1c393..d14bde33d94e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -60,20 +60,26 @@
#include "AArch64.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
#include <cstdlib>
#include <tuple>
@@ -84,6 +90,7 @@ using namespace llvm;
STATISTIC(NumConditionsAdjusted, "Number of conditions adjusted");
namespace {
+
class AArch64ConditionOptimizer : public MachineFunctionPass {
const TargetInstrInfo *TII;
MachineDominatorTree *DomTree;
@@ -92,12 +99,14 @@ class AArch64ConditionOptimizer : public MachineFunctionPass {
public:
// Stores immediate, compare instruction opcode and branch condition (in this
// order) of adjusted comparison.
- typedef std::tuple<int, unsigned, AArch64CC::CondCode> CmpInfo;
+ using CmpInfo = std::tuple<int, unsigned, AArch64CC::CondCode>;
static char ID;
+
AArch64ConditionOptimizer() : MachineFunctionPass(ID) {
initializeAArch64ConditionOptimizerPass(*PassRegistry::getPassRegistry());
}
+
void getAnalysisUsage(AnalysisUsage &AU) const override;
MachineInstr *findSuitableCompare(MachineBasicBlock *MBB);
CmpInfo adjustCmp(MachineInstr *CmpMI, AArch64CC::CondCode Cmp);
@@ -105,10 +114,12 @@ public:
bool adjustTo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp, MachineInstr *To,
int ToImm);
bool runOnMachineFunction(MachineFunction &MF) override;
+
StringRef getPassName() const override {
return "AArch64 Condition Optimizer";
}
};
+
} // end anonymous namespace
char AArch64ConditionOptimizer::ID = 0;
@@ -196,7 +207,7 @@ MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
return nullptr;
}
}
- DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n');
+ DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB) << '\n');
return nullptr;
}
@@ -316,7 +327,7 @@ bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI,
bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
<< "********** Function: " << MF.getName() << '\n');
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
TII = MF.getSubtarget().getInstrInfo();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 9eda56c825a9..b0bda7c43c15 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -31,12 +31,12 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -369,7 +369,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
return nullptr;
}
}
- DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n');
+ DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB) << '\n');
return nullptr;
}
@@ -383,7 +383,7 @@ bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
// Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
// get right.
if (!MBB->livein_empty()) {
- DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
+ DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
return false;
}
@@ -396,7 +396,7 @@ bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
continue;
if (++InstrCount > BlockInstrLimit && !Stress) {
- DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
+ DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
<< BlockInstrLimit << " instructions.\n");
return false;
}
@@ -458,8 +458,9 @@ bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
return false;
// The CFG topology checks out.
- DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() << " -> BB#"
- << CmpBB->getNumber() << " -> BB#" << Tail->getNumber() << '\n');
+ DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
+ << printMBBReference(*CmpBB) << " -> "
+ << printMBBReference(*Tail) << '\n');
++NumConsidered;
// Tail is allowed to have many predecessors, but we can't handle PHIs yet.
@@ -562,8 +563,9 @@ bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
}
void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
- DEBUG(dbgs() << "Merging BB#" << CmpBB->getNumber() << " into BB#"
- << Head->getNumber() << ":\n" << *CmpBB);
+ DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into "
+ << printMBBReference(*Head) << ":\n"
+ << *CmpBB);
// All CmpBB instructions are moved into Head, and CmpBB is deleted.
// Update the CFG first.
@@ -922,7 +924,7 @@ bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
<< "********** Function: " << MF.getName() << '\n');
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
TII = MF.getSubtarget().getInstrInfo();
@@ -934,7 +936,7 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
- MinSize = MF.getFunction()->optForMinSize();
+ MinSize = MF.getFunction().optForMinSize();
bool Changed = false;
CmpConv.runOnMachineFunction(MF, MBPI);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index b72f23b109d9..8e7e740da6f6 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -20,10 +20,10 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-dead-defs"
@@ -55,6 +55,8 @@ public:
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
+
+ bool shouldSkip(const MachineInstr &MI, const MachineFunction &MF) const;
};
char AArch64DeadRegisterDefinitions::ID = 0;
} // end anonymous namespace
@@ -69,6 +71,63 @@ static bool usesFrameIndex(const MachineInstr &MI) {
return false;
}
+bool
+AArch64DeadRegisterDefinitions::shouldSkip(const MachineInstr &MI,
+ const MachineFunction &MF) const {
+ if (!MF.getSubtarget<AArch64Subtarget>().hasLSE())
+ return false;
+
+#define CASE_AARCH64_ATOMIC_(PREFIX) \
+ case AArch64::PREFIX##X: \
+ case AArch64::PREFIX##W: \
+ case AArch64::PREFIX##H: \
+ case AArch64::PREFIX##B
+
+ for (const MachineMemOperand *MMO : MI.memoperands()) {
+ if (MMO->isAtomic()) {
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ default:
+ return false;
+ break;
+
+ CASE_AARCH64_ATOMIC_(LDADDA):
+ CASE_AARCH64_ATOMIC_(LDADDAL):
+
+ CASE_AARCH64_ATOMIC_(LDCLRA):
+ CASE_AARCH64_ATOMIC_(LDCLRAL):
+
+ CASE_AARCH64_ATOMIC_(LDEORA):
+ CASE_AARCH64_ATOMIC_(LDEORAL):
+
+ CASE_AARCH64_ATOMIC_(LDSETA):
+ CASE_AARCH64_ATOMIC_(LDSETAL):
+
+ CASE_AARCH64_ATOMIC_(LDSMAXA):
+ CASE_AARCH64_ATOMIC_(LDSMAXAL):
+
+ CASE_AARCH64_ATOMIC_(LDSMINA):
+ CASE_AARCH64_ATOMIC_(LDSMINAL):
+
+ CASE_AARCH64_ATOMIC_(LDUMAXA):
+ CASE_AARCH64_ATOMIC_(LDUMAXAL):
+
+ CASE_AARCH64_ATOMIC_(LDUMINA):
+ CASE_AARCH64_ATOMIC_(LDUMINAL):
+
+ CASE_AARCH64_ATOMIC_(SWPA):
+ CASE_AARCH64_ATOMIC_(SWPAL):
+ return true;
+ break;
+ }
+ }
+ }
+
+#undef CASE_AARCH64_ATOMIC_
+
+ return false;
+}
+
void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
@@ -86,55 +145,12 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
DEBUG(dbgs() << " Ignoring, XZR or WZR already used by the instruction\n");
continue;
}
- if (MF.getSubtarget<AArch64Subtarget>().hasLSE()) {
- // XZ/WZ for LSE can only be used when acquire semantics are not used,
- // LDOPAL WZ is an invalid opcode.
- switch (MI.getOpcode()) {
- case AArch64::CASALb:
- case AArch64::CASALh:
- case AArch64::CASALs:
- case AArch64::CASALd:
- case AArch64::SWPALb:
- case AArch64::SWPALh:
- case AArch64::SWPALs:
- case AArch64::SWPALd:
- case AArch64::LDADDALb:
- case AArch64::LDADDALh:
- case AArch64::LDADDALs:
- case AArch64::LDADDALd:
- case AArch64::LDCLRALb:
- case AArch64::LDCLRALh:
- case AArch64::LDCLRALs:
- case AArch64::LDCLRALd:
- case AArch64::LDEORALb:
- case AArch64::LDEORALh:
- case AArch64::LDEORALs:
- case AArch64::LDEORALd:
- case AArch64::LDSETALb:
- case AArch64::LDSETALh:
- case AArch64::LDSETALs:
- case AArch64::LDSETALd:
- case AArch64::LDSMINALb:
- case AArch64::LDSMINALh:
- case AArch64::LDSMINALs:
- case AArch64::LDSMINALd:
- case AArch64::LDSMAXALb:
- case AArch64::LDSMAXALh:
- case AArch64::LDSMAXALs:
- case AArch64::LDSMAXALd:
- case AArch64::LDUMINALb:
- case AArch64::LDUMINALh:
- case AArch64::LDUMINALs:
- case AArch64::LDUMINALd:
- case AArch64::LDUMAXALb:
- case AArch64::LDUMAXALh:
- case AArch64::LDUMAXALs:
- case AArch64::LDUMAXALd:
- continue;
- default:
- break;
- }
+
+ if (shouldSkip(MI, MF)) {
+ DEBUG(dbgs() << " Ignoring, Atomic instruction with acquire semantics using WZR/XZR\n");
+ continue;
}
+
const MCInstrDesc &Desc = MI.getDesc();
for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
MachineOperand &MO = MI.getOperand(I);
@@ -182,7 +198,7 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
// Scan the function for instructions that have a dead definition of a
// register. Replace that register with the zero register when possible.
bool AArch64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
TRI = MF.getSubtarget().getRegisterInfo();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index d52cd84246a1..c3842785f2be 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1,4 +1,4 @@
-//==-- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions --*- C++ -*-=//
+//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,24 +18,44 @@
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <utility>
+
using namespace llvm;
#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
namespace {
+
class AArch64ExpandPseudo : public MachineFunctionPass {
public:
+ const AArch64InstrInfo *TII;
+
static char ID;
+
AArch64ExpandPseudo() : MachineFunctionPass(ID) {
initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
}
- const AArch64InstrInfo *TII;
-
bool runOnMachineFunction(MachineFunction &Fn) override;
StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
@@ -55,8 +75,10 @@ private:
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
};
+
+} // end anonymous namespace
+
char AArch64ExpandPseudo::ID = 0;
-}
INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
AARCH64_EXPAND_PSEUDO_NAME, false, false)
@@ -151,12 +173,12 @@ static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
/// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
/// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
/// an ORR instruction.
-///
static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const AArch64InstrInfo *TII) {
- typedef DenseMap<uint64_t, unsigned> CountMap;
+ using CountMap = DenseMap<uint64_t, unsigned>;
+
CountMap Counts;
// Scan the constant and count how often every chunk occurs.
@@ -242,7 +264,7 @@ static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
/// starts a contiguous sequence of ones if we look at the bits from the LSB
/// towards the MSB.
static bool isStartChunk(uint64_t Chunk) {
- if (Chunk == 0 || Chunk == UINT64_MAX)
+ if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
return false;
return isMask_64(~Chunk);
@@ -252,7 +274,7 @@ static bool isStartChunk(uint64_t Chunk) {
/// ends a contiguous sequence of ones if we look at the bits from the LSB
/// towards the MSB.
static bool isEndChunk(uint64_t Chunk) {
- if (Chunk == 0 || Chunk == UINT64_MAX)
+ if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
return false;
return isMask_64(Chunk);
@@ -285,7 +307,6 @@ static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
///
/// We are also looking for constants like |S|A|B|E| where the contiguous
/// sequence of ones wraps around the MSB into the LSB.
-///
static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
@@ -651,16 +672,15 @@ bool AArch64ExpandPseudo::expandCMP_SWAP(
MI.eraseFromParent();
// Recompute livein lists.
- const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
LivePhysRegs LiveRegs;
- computeLiveIns(LiveRegs, MRI, *DoneBB);
- computeLiveIns(LiveRegs, MRI, *StoreBB);
- computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+ computeAndAddLiveIns(LiveRegs, *DoneBB);
+ computeAndAddLiveIns(LiveRegs, *StoreBB);
+ computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
// Do an extra pass around the loop to get loop carried registers right.
StoreBB->clearLiveIns();
- computeLiveIns(LiveRegs, MRI, *StoreBB);
+ computeAndAddLiveIns(LiveRegs, *StoreBB);
LoadCmpBB->clearLiveIns();
- computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+ computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
return true;
}
@@ -668,7 +688,6 @@ bool AArch64ExpandPseudo::expandCMP_SWAP(
bool AArch64ExpandPseudo::expandCMP_SWAP_128(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
-
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
MachineOperand &DestLo = MI.getOperand(0);
@@ -746,16 +765,15 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
MI.eraseFromParent();
// Recompute liveness bottom up.
- const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
LivePhysRegs LiveRegs;
- computeLiveIns(LiveRegs, MRI, *DoneBB);
- computeLiveIns(LiveRegs, MRI, *StoreBB);
- computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+ computeAndAddLiveIns(LiveRegs, *DoneBB);
+ computeAndAddLiveIns(LiveRegs, *StoreBB);
+ computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
// Do an extra pass in the loop to get the loop carried dependencies right.
StoreBB->clearLiveIns();
- computeLiveIns(LiveRegs, MRI, *StoreBB);
+ computeAndAddLiveIns(LiveRegs, *StoreBB);
LoadCmpBB->clearLiveIns();
- computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+ computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
return true;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index 2c887a9ca5db..d1ddb2e3ef70 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -1,4 +1,4 @@
-//===-- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor--===//
+//===- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor --===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,21 +15,41 @@
#include "AArch64.h"
#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
using namespace llvm;
@@ -60,6 +80,7 @@ private:
class FalkorMarkStridedAccessesLegacy : public FunctionPass {
public:
static char ID; // Pass ID, replacement for typeid
+
FalkorMarkStridedAccessesLegacy() : FunctionPass(ID) {
initializeFalkorMarkStridedAccessesLegacyPass(
*PassRegistry::getPassRegistry());
@@ -71,16 +92,16 @@ public:
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
- // FIXME: For some reason, preserving SE here breaks LSR (even if
- // this pass changes nothing).
- // AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
}
bool runOnFunction(Function &F) override;
};
-} // namespace
+
+} // end anonymous namespace
char FalkorMarkStridedAccessesLegacy::ID = 0;
+
INITIALIZE_PASS_BEGIN(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
"Falkor HW Prefetch Fix", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
@@ -165,7 +186,7 @@ public:
bool runOnMachineFunction(MachineFunction &Fn) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -186,17 +207,16 @@ private:
/// Bits from load opcodes used to compute HW prefetcher instruction tags.
struct LoadInfo {
- LoadInfo()
- : DestReg(0), BaseReg(0), BaseRegIdx(-1), OffsetOpnd(nullptr),
- IsPrePost(false) {}
- unsigned DestReg;
- unsigned BaseReg;
- int BaseRegIdx;
- const MachineOperand *OffsetOpnd;
- bool IsPrePost;
+ LoadInfo() = default;
+
+ unsigned DestReg = 0;
+ unsigned BaseReg = 0;
+ int BaseRegIdx = -1;
+ const MachineOperand *OffsetOpnd = nullptr;
+ bool IsPrePost = false;
};
-} // namespace
+} // end anonymous namespace
char FalkorHWPFFix::ID = 0;
@@ -618,9 +638,14 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
break;
}
+ // Loads from the stack pointer don't get prefetched.
+ unsigned BaseReg = MI.getOperand(BaseRegIdx).getReg();
+ if (BaseReg == AArch64::SP || BaseReg == AArch64::WSP)
+ return None;
+
LoadInfo LI;
LI.DestReg = DestRegIdx == -1 ? 0 : MI.getOperand(DestRegIdx).getReg();
- LI.BaseReg = MI.getOperand(BaseRegIdx).getReg();
+ LI.BaseReg = BaseReg;
LI.BaseRegIdx = BaseRegIdx;
LI.OffsetOpnd = OffsetIdx == -1 ? nullptr : &MI.getOperand(OffsetIdx);
LI.IsPrePost = IsPrePost;
@@ -715,7 +740,7 @@ void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) {
if (TagMap.count(NewTag))
continue;
- DEBUG(dbgs() << "Changing base reg to: " << PrintReg(ScratchReg, TRI)
+ DEBUG(dbgs() << "Changing base reg to: " << printReg(ScratchReg, TRI)
<< '\n');
// Rewrite:
@@ -735,7 +760,7 @@ void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) {
// well to update the real base register.
if (LdI.IsPrePost) {
DEBUG(dbgs() << "Doing post MOV of incremented reg: "
- << PrintReg(ScratchReg, TRI) << '\n');
+ << printReg(ScratchReg, TRI) << '\n');
MI.getOperand(0).setReg(
ScratchReg); // Change tied operand pre/post update dest.
BuildMI(*MBB, std::next(MachineBasicBlock::iterator(MI)), DL,
@@ -773,7 +798,7 @@ bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) {
if (ST.getProcFamily() != AArch64Subtarget::Falkor)
return false;
- if (skipFunction(*Fn.getFunction()))
+ if (skipFunction(Fn.getFunction()))
return false;
TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 97396057dce0..fd1699fd363d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -1,4 +1,4 @@
-//===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
+//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -53,6 +53,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -63,6 +64,7 @@
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
@@ -78,10 +80,10 @@ namespace {
class AArch64FastISel final : public FastISel {
class Address {
public:
- typedef enum {
+ using BaseKind = enum {
RegBase,
FrameIndexBase
- } BaseKind;
+ };
private:
BaseKind Kind = RegBase;
@@ -944,7 +946,6 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
return false;
}
-
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(DL, Ty, true);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 7c6a99990406..73944359223a 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -110,6 +110,9 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -121,11 +124,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -155,8 +155,8 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
MI.getOpcode() == AArch64::ADDSXri)
continue;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- if (!MI.getOperand(i).isFI())
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isFI())
continue;
int Offset = 0;
@@ -174,7 +174,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
return false;
// Don't use the red zone if the function explicitly asks us not to.
// This is typically used for kernel code.
- if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone))
+ if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
return false;
const MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -459,13 +459,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
const MachineFrameInfo &MFI = MF.getFrameInfo();
- const Function *Fn = MF.getFunction();
+ const Function &F = MF.getFunction();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
+ bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry();
bool HasFP = hasFP(MF);
// Debug location must be unknown since the first debug location is used
@@ -474,7 +474,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
- if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
int NumBytes = (int)MFI.getStackSize();
@@ -507,7 +507,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
bool IsWin64 =
- Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
+ Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
@@ -716,7 +716,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
- if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
// Initial and residual are named for consistency with the prologue. Note that
@@ -765,7 +765,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// it as the 2nd argument of AArch64ISD::TC_RETURN.
bool IsWin64 =
- Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
+ Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
@@ -857,7 +857,7 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64 =
- Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
+ Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
@@ -928,7 +928,7 @@ static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
static bool produceCompactUnwindFrame(MachineFunction &MF) {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- AttributeList Attrs = MF.getFunction()->getAttributes();
+ AttributeList Attrs = MF.getFunction().getAttributes();
return Subtarget.isTargetMachO() &&
!(Subtarget.getTargetLowering()->supportSwiftError() &&
Attrs.hasAttrSomewhere(Attribute::SwiftError));
@@ -959,7 +959,7 @@ static void computeCalleeSaveRegisterPairs(
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
MachineFrameInfo &MFI = MF.getFrameInfo();
- CallingConv::ID CC = MF.getFunction()->getCallingConv();
+ CallingConv::ID CC = MF.getFunction().getCallingConv();
unsigned Count = CSI.size();
(void)CC;
// MachO's compact unwind format relies on all registers being stored in
@@ -1060,9 +1060,9 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
else
StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
- DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1);
+ DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
if (RPI.isPaired())
- dbgs() << ", " << TRI->getName(Reg2);
+ dbgs() << ", " << printReg(Reg2, TRI);
dbgs() << ") -> fi#(" << RPI.FrameIdx;
if (RPI.isPaired())
dbgs() << ", " << RPI.FrameIdx+1;
@@ -1092,7 +1092,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
bool AArch64FrameLowering::restoreCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
@@ -1123,9 +1123,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
else
LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
- DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1);
+ DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
if (RPI.isPaired())
- dbgs() << ", " << TRI->getName(Reg2);
+ dbgs() << ", " << printReg(Reg2, TRI);
dbgs() << ") -> fi#(" << RPI.FrameIdx;
if (RPI.isPaired())
dbgs() << ", " << RPI.FrameIdx+1;
@@ -1154,7 +1154,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
RegScavenger *RS) const {
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
- if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
@@ -1208,7 +1208,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
for (unsigned Reg : SavedRegs.set_bits())
- dbgs() << ' ' << PrintReg(Reg, RegInfo);
+ dbgs() << ' ' << printReg(Reg, RegInfo);
dbgs() << "\n";);
// If any callee-saved registers are used, the frame cannot be eliminated.
@@ -1233,8 +1233,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// here.
if (BigStack) {
if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
- DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo)
- << " to get a scratch register.\n");
+ DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
+ << " to get a scratch register.\n");
SavedRegs.set(UnspilledCSGPR);
// MachO's compact unwind format relies on all registers being stored in
// pairs, so if we need to spill one extra for BigStack, then we need to
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index f254ea9b70aa..55a256867fab 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -14,7 +14,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
@@ -50,7 +50,7 @@ public:
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const override;
/// \brief Can this function use the red zone for local allocations.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/contrib/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
index 8b1c9740d2ad..37720cbd32bb 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
+++ b/contrib/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
@@ -11,26 +11,24 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
-#ifndef LLVM_BUILD_GLOBAL_ISEL
-#error "You shouldn't build this"
-#endif
-
namespace llvm {
RegisterBankInfo::PartialMapping AArch64GenRegisterBankInfo::PartMappings[]{
/* StartIdx, Length, RegBank */
- // 0: FPR 32-bit value.
+ // 0: FPR 16-bit value.
+ {0, 16, AArch64::FPRRegBank},
+ // 1: FPR 32-bit value.
{0, 32, AArch64::FPRRegBank},
- // 1: FPR 64-bit value.
+ // 2: FPR 64-bit value.
{0, 64, AArch64::FPRRegBank},
- // 2: FPR 128-bit value.
+ // 3: FPR 128-bit value.
{0, 128, AArch64::FPRRegBank},
- // 3: FPR 256-bit value.
+ // 4: FPR 256-bit value.
{0, 256, AArch64::FPRRegBank},
- // 4: FPR 512-bit value.
+ // 5: FPR 512-bit value.
{0, 512, AArch64::FPRRegBank},
- // 5: GPR 32-bit value.
+ // 6: GPR 32-bit value.
{0, 32, AArch64::GPRRegBank},
- // 6: GPR 64-bit value.
+ // 7: GPR 64-bit value.
{0, 64, AArch64::GPRRegBank},
};
@@ -41,58 +39,78 @@ RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{
{nullptr, 0},
// 3-operands instructions (all binary operations should end up with one of
// those mapping).
- // 1: FPR 32-bit value. <-- This must match First3OpsIdx.
+ // 1: FPR 16-bit value. <-- This must match First3OpsIdx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+ // 4: FPR 32-bit value. <-- This must match First3OpsIdx.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
- // 4: FPR 64-bit value.
+ // 7: FPR 64-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
- // 7: FPR 128-bit value.
+ // 10: FPR 128-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
- // 10: FPR 256-bit value.
+ // 13: FPR 256-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
- // 13: FPR 512-bit value.
+ // 16: FPR 512-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
- // 16: GPR 32-bit value.
+ // 19: GPR 32-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
- // 19: GPR 64-bit value. <-- This must match Last3OpsIdx.
+ // 22: GPR 64-bit value. <-- This must match Last3OpsIdx.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
// Cross register bank copies.
- // 22: FPR 32-bit value to GPR 32-bit value. <-- This must match
- // FirstCrossRegCpyIdx.
+ // 25: FPR 16-bit value to GPR 16-bit. <-- This must match
+ // FirstCrossRegCpyIdx.
+ // Note: This is the kind of copy we see with physical registers.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ // 27: FPR 32-bit value to GPR 32-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
- // 24: FPR 64-bit value to GPR 64-bit value.
+ // 29: FPR 64-bit value to GPR 64-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
- // 26: FPR 128-bit value to GPR 128-bit value (invalid)
+ // 31: FPR 128-bit value to GPR 128-bit value (invalid)
{nullptr, 1},
{nullptr, 1},
- // 28: FPR 256-bit value to GPR 256-bit value (invalid)
+ // 33: FPR 256-bit value to GPR 256-bit value (invalid)
{nullptr, 1},
{nullptr, 1},
- // 30: FPR 512-bit value to GPR 512-bit value (invalid)
+ // 35: FPR 512-bit value to GPR 512-bit value (invalid)
{nullptr, 1},
{nullptr, 1},
- // 32: GPR 32-bit value to FPR 32-bit value.
+ // 37: GPR 32-bit value to FPR 32-bit value.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
- // 34: GPR 64-bit value to FPR 64-bit value. <-- This must match
+ // 39: GPR 64-bit value to FPR 64-bit value. <-- This must match
// LastCrossRegCpyIdx.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ // 41: FPExt: 16 to 32. <-- This must match FPExt16To32Idx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+ // 43: FPExt: 16 to 32. <-- This must match FPExt16To64Idx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1},
+ // 45: FPExt: 32 to 64. <-- This must match FPExt32To64Idx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ // 47: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
};
bool AArch64GenRegisterBankInfo::checkPartialMap(unsigned Idx,
@@ -149,16 +167,18 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
return -1;
}
if (RBIdx == PMI_FirstFPR) {
- if (Size <= 32)
+ if (Size <= 16)
return 0;
- if (Size <= 64)
+ if (Size <= 32)
return 1;
- if (Size <= 128)
+ if (Size <= 64)
return 2;
- if (Size <= 256)
+ if (Size <= 128)
return 3;
- if (Size <= 512)
+ if (Size <= 256)
return 4;
+ if (Size <= 512)
+ return 5;
return -1;
}
return -1;
@@ -210,4 +230,35 @@ AArch64GenRegisterBankInfo::getCopyMapping(unsigned DstBankID,
ValMappingIdx <= LastCrossRegCpyIdx && "Mapping out of bound");
return &ValMappings[ValMappingIdx];
}
+
+const RegisterBankInfo::ValueMapping *
+AArch64GenRegisterBankInfo::getFPExtMapping(unsigned DstSize,
+ unsigned SrcSize) {
+ // We support:
+ // - For Scalar:
+ // - 16 to 32.
+ // - 16 to 64.
+ // - 32 to 64.
+ // => FPR 16 to FPR 32|64
+ // => FPR 32 to FPR 64
+ // - For vectors:
+ // - v4f16 to v4f32
+ // - v2f32 to v2f64
+ // => FPR 64 to FPR 128
+
+ // Check that we have been asked sensible sizes.
+ if (SrcSize == 16) {
+ assert((DstSize == 32 || DstSize == 64) && "Unexpected half extension");
+ if (DstSize == 32)
+ return &ValMappings[FPExt16To32Idx];
+ return &ValMappings[FPExt16To64Idx];
+ }
+
+ if (SrcSize == 32) {
+ assert(DstSize == 64 && "Unexpected float extension");
+ return &ValMappings[FPExt32To64Idx];
+ }
+ assert((SrcSize == 64 || DstSize == 128) && "Unexpected vector extension");
+ return &ValMappings[FPExt64To128Idx];
+}
} // End llvm namespace.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 06005f6b6886..0b10246b0cc8 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -53,7 +53,7 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override {
- ForCodeSize = MF.getFunction()->optForSize();
+ ForCodeSize = MF.getFunction().optForSize();
Subtarget = &MF.getSubtarget<AArch64Subtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9c57926da5f5..1242cf5be188 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -42,6 +42,8 @@
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetCallingConv.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
@@ -70,8 +72,6 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetCallingConv.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
@@ -166,6 +166,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::SETCC, MVT::i32, Custom);
setOperationAction(ISD::SETCC, MVT::i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::f16, Custom);
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
@@ -173,14 +174,17 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
@@ -317,119 +321,118 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
-
- // f16 is a storage-only type, always promote it to f32.
- setOperationAction(ISD::SETCC, MVT::f16, Promote);
- setOperationAction(ISD::BR_CC, MVT::f16, Promote);
- setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
- setOperationAction(ISD::SELECT, MVT::f16, Promote);
- setOperationAction(ISD::FADD, MVT::f16, Promote);
- setOperationAction(ISD::FSUB, MVT::f16, Promote);
- setOperationAction(ISD::FMUL, MVT::f16, Promote);
- setOperationAction(ISD::FDIV, MVT::f16, Promote);
- setOperationAction(ISD::FREM, MVT::f16, Promote);
- setOperationAction(ISD::FMA, MVT::f16, Promote);
- setOperationAction(ISD::FNEG, MVT::f16, Promote);
- setOperationAction(ISD::FABS, MVT::f16, Promote);
- setOperationAction(ISD::FCEIL, MVT::f16, Promote);
- setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
- setOperationAction(ISD::FCOS, MVT::f16, Promote);
- setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
- setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
- setOperationAction(ISD::FPOW, MVT::f16, Promote);
- setOperationAction(ISD::FPOWI, MVT::f16, Promote);
- setOperationAction(ISD::FRINT, MVT::f16, Promote);
- setOperationAction(ISD::FSIN, MVT::f16, Promote);
- setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
- setOperationAction(ISD::FSQRT, MVT::f16, Promote);
- setOperationAction(ISD::FEXP, MVT::f16, Promote);
- setOperationAction(ISD::FEXP2, MVT::f16, Promote);
- setOperationAction(ISD::FLOG, MVT::f16, Promote);
- setOperationAction(ISD::FLOG2, MVT::f16, Promote);
- setOperationAction(ISD::FLOG10, MVT::f16, Promote);
- setOperationAction(ISD::FROUND, MVT::f16, Promote);
- setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
- setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
- setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
-
- // v4f16 is also a storage-only type, so promote it to v4f32 when that is
- // known to be safe.
- setOperationAction(ISD::FADD, MVT::v4f16, Promote);
- setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
- setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
- setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
- setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
- setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
- AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
- AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
- AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
- AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
- AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
- AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
-
- // Expand all other v4f16 operations.
- // FIXME: We could generate better code by promoting some operations to
- // a pair of v4f32s
- setOperationAction(ISD::FABS, MVT::v4f16, Expand);
- setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
- setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
- setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
- setOperationAction(ISD::FMA, MVT::v4f16, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
- setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
- setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
- setOperationAction(ISD::FREM, MVT::v4f16, Expand);
- setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
- setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
- setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
- setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
- setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
- setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
- setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
- setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
- setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
- setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
- setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
- setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
- setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
- setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
-
-
- // v8f16 is also a storage-only type, so expand it.
- setOperationAction(ISD::FABS, MVT::v8f16, Expand);
- setOperationAction(ISD::FADD, MVT::v8f16, Expand);
- setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
- setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
- setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
- setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
- setOperationAction(ISD::FMA, MVT::v8f16, Expand);
- setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
- setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
- setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
- setOperationAction(ISD::FREM, MVT::v8f16, Expand);
- setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
- setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
- setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
- setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
- setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
- setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
- setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
- setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
- setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
- setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
- setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
- setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
- setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
- setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
- setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
- setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
+ if (Subtarget->hasFullFP16())
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
+ else
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
+
+ setOperationAction(ISD::FREM, MVT::f16, Promote);
+ setOperationAction(ISD::FREM, MVT::v4f16, Promote);
+ setOperationAction(ISD::FREM, MVT::v8f16, Promote);
+ setOperationAction(ISD::FPOW, MVT::f16, Promote);
+ setOperationAction(ISD::FPOW, MVT::v4f16, Promote);
+ setOperationAction(ISD::FPOW, MVT::v8f16, Promote);
+ setOperationAction(ISD::FPOWI, MVT::f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::v4f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::v8f16, Promote);
+ setOperationAction(ISD::FSIN, MVT::f16, Promote);
+ setOperationAction(ISD::FSIN, MVT::v4f16, Promote);
+ setOperationAction(ISD::FSIN, MVT::v8f16, Promote);
+ setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote);
+ setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote);
+ setOperationAction(ISD::FEXP, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP, MVT::v4f16, Promote);
+ setOperationAction(ISD::FEXP, MVT::v8f16, Promote);
+ setOperationAction(ISD::FEXP2, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP2, MVT::v4f16, Promote);
+ setOperationAction(ISD::FEXP2, MVT::v8f16, Promote);
+ setOperationAction(ISD::FLOG, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG, MVT::v4f16, Promote);
+ setOperationAction(ISD::FLOG, MVT::v8f16, Promote);
+ setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG2, MVT::v4f16, Promote);
+ setOperationAction(ISD::FLOG2, MVT::v8f16, Promote);
+ setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG10, MVT::v4f16, Promote);
+ setOperationAction(ISD::FLOG10, MVT::v8f16, Promote);
+
+ if (!Subtarget->hasFullFP16()) {
+ setOperationAction(ISD::SELECT, MVT::f16, Promote);
+ setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
+ setOperationAction(ISD::SETCC, MVT::f16, Promote);
+ setOperationAction(ISD::BR_CC, MVT::f16, Promote);
+ setOperationAction(ISD::FADD, MVT::f16, Promote);
+ setOperationAction(ISD::FSUB, MVT::f16, Promote);
+ setOperationAction(ISD::FMUL, MVT::f16, Promote);
+ setOperationAction(ISD::FDIV, MVT::f16, Promote);
+ setOperationAction(ISD::FMA, MVT::f16, Promote);
+ setOperationAction(ISD::FNEG, MVT::f16, Promote);
+ setOperationAction(ISD::FABS, MVT::f16, Promote);
+ setOperationAction(ISD::FCEIL, MVT::f16, Promote);
+ setOperationAction(ISD::FSQRT, MVT::f16, Promote);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
+ setOperationAction(ISD::FRINT, MVT::f16, Promote);
+ setOperationAction(ISD::FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
+ setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
+
+ // promote v4f16 to v4f32 when that is known to be safe.
+ setOperationAction(ISD::FADD, MVT::v4f16, Promote);
+ setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
+ setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
+ setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
+ setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
+ AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
+
+ setOperationAction(ISD::FABS, MVT::v4f16, Expand);
+ setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
+ setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
+ setOperationAction(ISD::FMA, MVT::v4f16, Expand);
+ setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
+
+ setOperationAction(ISD::FABS, MVT::v8f16, Expand);
+ setOperationAction(ISD::FADD, MVT::v8f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
+ setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
+ setOperationAction(ISD::FMA, MVT::v8f16, Expand);
+ setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
+ setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
+ setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
+ setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
+ setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
+ setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
+ }
// AArch64 has implementations of a lot of rounding-like FP operations.
for (MVT Ty : {MVT::f32, MVT::f64}) {
@@ -445,6 +448,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMAXNAN, Ty, Legal);
}
+ if (Subtarget->hasFullFP16()) {
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f16, Legal);
+ setOperationAction(ISD::FRINT, MVT::f16, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
+ setOperationAction(ISD::FROUND, MVT::f16, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::f16, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::f16, Legal);
+ }
+
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
@@ -775,8 +791,9 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
- // F[MIN|MAX][NUM|NAN] are available for all FP NEON types (not f16 though!).
- if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::f16)
+ // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
+ if (VT.isFloatingPoint() &&
+ (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
ISD::FMINNUM, ISD::FMAXNUM})
setOperationAction(Opcode, VT, Legal);
@@ -1414,16 +1431,20 @@ static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
static bool isLegalArithImmed(uint64_t C) {
// Matches AArch64DAGToDAGISel::SelectArithImmed().
- return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
+ bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
+ DEBUG(dbgs() << "Is imm " << C << " legal: " << (IsLegal ? "yes\n" : "no\n"));
+ return IsLegal;
}
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
EVT VT = LHS.getValueType();
+ const bool FullFP16 =
+ static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
if (VT.isFloatingPoint()) {
assert(VT != MVT::f128);
- if (VT == MVT::f16) {
+ if (VT == MVT::f16 && !FullFP16) {
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
VT = MVT::f32;
@@ -1513,9 +1534,12 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
AArch64CC::CondCode OutCC,
const SDLoc &DL, SelectionDAG &DAG) {
unsigned Opcode = 0;
+ const bool FullFP16 =
+ static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
+
if (LHS.getValueType().isFloatingPoint()) {
assert(LHS.getValueType() != MVT::f128);
- if (LHS.getValueType() == MVT::f16) {
+ if (LHS.getValueType() == MVT::f16 && !FullFP16) {
LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
}
@@ -1948,10 +1972,41 @@ SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
}
+// Returns true if the given Op is the overflow flag result of an overflow
+// intrinsic operation.
+static bool isOverflowIntrOpRes(SDValue Op) {
+ unsigned Opc = Op.getOpcode();
+ return (Op.getResNo() == 1 &&
+ (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+ Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
+}
+
static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
SDValue Sel = Op.getOperand(0);
SDValue Other = Op.getOperand(1);
+ SDLoc dl(Sel);
+ // If the operand is an overflow checking operation, invert the condition
+ // code and kill the Not operation. I.e., transform:
+ // (xor (overflow_op_bool, 1))
+ // -->
+ // (csel 1, 0, invert(cc), overflow_op_bool)
+ // ... which later gets transformed to just a cset instruction with an
+ // inverted condition code, rather than a cset + eor sequence.
+ if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
+ // Only lower legal XALUO ops.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
+ return SDValue();
+
+ SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+ SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
+ AArch64CC::CondCode CC;
+ SDValue Value, Overflow;
+ std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
+ SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
+ return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
+ CCVal, Overflow);
+ }
// If neither operand is a SELECT_CC, give up.
if (Sel.getOpcode() != ISD::SELECT_CC)
std::swap(Sel, Other);
@@ -1970,7 +2025,6 @@ static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
SDValue RHS = Sel.getOperand(1);
SDValue TVal = Sel.getOperand(2);
SDValue FVal = Sel.getOperand(3);
- SDLoc dl(Sel);
// FIXME: This could be generalized to non-integer comparisons.
if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
@@ -2171,8 +2225,9 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
if (Op.getOperand(0).getValueType().isVector())
return LowerVectorFP_TO_INT(Op, DAG);
- // f16 conversions are promoted to f32.
- if (Op.getOperand(0).getValueType() == MVT::f16) {
+ // f16 conversions are promoted to f32 when full fp16 is not supported.
+ if (Op.getOperand(0).getValueType() == MVT::f16 &&
+ !Subtarget->hasFullFP16()) {
SDLoc dl(Op);
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
@@ -2227,8 +2282,9 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
if (Op.getValueType().isVector())
return LowerVectorINT_TO_FP(Op, DAG);
- // f16 conversions are promoted to f32.
- if (Op.getValueType() == MVT::f16) {
+ // f16 conversions are promoted to f32 when full fp16 is not supported.
+ if (Op.getValueType() == MVT::f16 &&
+ !Subtarget->hasFullFP16()) {
SDLoc dl(Op);
return DAG.getNode(
ISD::FP_ROUND, dl, MVT::f16,
@@ -2517,6 +2573,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
+ DEBUG(dbgs() << "Custom lowering: ");
+ DEBUG(Op.dump());
+
switch (Op.getOpcode()) {
default:
llvm_unreachable("unimplemented operand");
@@ -2640,7 +2699,7 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) const {
switch (CC) {
default:
- llvm_unreachable("Unsupported calling convention.");
+ report_fatal_error("Unsupported calling convention.");
case CallingConv::WebKit_JS:
return CC_AArch64_WebKit_JS;
case CallingConv::GHC:
@@ -2672,7 +2731,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv());
+ bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -2686,7 +2745,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// we use a special version of AnalyzeFormalArguments to pass in ValVT and
// LocVT.
unsigned NumArgs = Ins.size();
- Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
+ Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
unsigned CurArgIdx = 0;
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Ins[i].VT;
@@ -2876,7 +2935,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
MachineFrameInfo &MFI = MF.getFrameInfo();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
auto PtrVT = getPointerTy(DAG.getDataLayout());
- bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv());
+ bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
SmallVector<SDValue, 8> MemOps;
@@ -3028,15 +3087,15 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
return false;
MachineFunction &MF = DAG.getMachineFunction();
- const Function *CallerF = MF.getFunction();
- CallingConv::ID CallerCC = CallerF->getCallingConv();
+ const Function &CallerF = MF.getFunction();
+ CallingConv::ID CallerCC = CallerF.getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
// Byval parameters hand the function a pointer directly into the stack area
// we want to reuse during a tail call. Working around this *is* possible (see
// X86) but less efficient and uglier in LowerCall.
- for (Function::const_arg_iterator i = CallerF->arg_begin(),
- e = CallerF->arg_end();
+ for (Function::const_arg_iterator i = CallerF.arg_begin(),
+ e = CallerF.arg_end();
i != e; ++i)
if (i->hasByValAttr())
return false;
@@ -3187,7 +3246,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Check if it's really possible to do a tail call.
IsTailCall = isEligibleForTailCallOptimization(
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
- if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
@@ -3428,6 +3487,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
AArch64II::MO_GOT) {
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
+ } else if (Subtarget->isTargetCOFF() && GV->hasDLLImportStorageClass()) {
+ assert(Subtarget->isTargetWindows() &&
+ "Windows is the only supported COFF target");
+ Callee = getGOT(G, DAG, AArch64II::MO_DLLIMPORT);
} else {
const GlobalValue *GV = G->getGlobal();
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
@@ -3628,11 +3691,12 @@ SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
// (loadGOT sym)
template <class NodeTy>
-SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
+ unsigned Flags) const {
DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
- SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT);
+ SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes instead of using a wrapper node.
return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
@@ -3640,29 +3704,30 @@ SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG) const {
// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
template <class NodeTy>
-SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG)
- const {
+SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
+ unsigned Flags) const {
DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
const unsigned char MO_NC = AArch64II::MO_NC;
return DAG.getNode(
- AArch64ISD::WrapperLarge, DL, Ty,
- getTargetNode(N, Ty, DAG, AArch64II::MO_G3),
- getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC),
- getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC),
- getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC));
+ AArch64ISD::WrapperLarge, DL, Ty,
+ getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
+ getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
+ getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
+ getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
}
// (addlow (adrp %hi(sym)) %lo(sym))
template <class NodeTy>
-SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
+ unsigned Flags) const {
DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
- SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE);
+ SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
SDValue Lo = getTargetNode(N, Ty, DAG,
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
}
@@ -3671,6 +3736,9 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
+ const AArch64II::TOF TargetFlags =
+ (GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
+ : AArch64II::MO_NO_FLAG);
unsigned char OpFlags =
Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
@@ -3679,14 +3747,21 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
// This also catches the large code model case for Darwin.
if ((OpFlags & AArch64II::MO_GOT) != 0) {
- return getGOT(GN, DAG);
+ return getGOT(GN, DAG, TargetFlags);
}
+ SDValue Result;
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
- return getAddrLarge(GN, DAG);
+ Result = getAddrLarge(GN, DAG, TargetFlags);
} else {
- return getAddr(GN, DAG);
+ Result = getAddr(GN, DAG, TargetFlags);
}
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDLoc DL(GN);
+ if (GV->hasDLLImportStorageClass())
+ Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
+ return Result;
}
/// \brief Convert a TLS address reference into the correct sequence of loads
@@ -3720,7 +3795,8 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SDValue
AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
- assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
+ assert(Subtarget->isTargetDarwin() &&
+ "This function expects a Darwin target");
SDLoc DL(Op);
MVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -3809,9 +3885,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
- if (DAG.getTarget().Options.EmulatedTLS)
- return LowerToTLSEmulatedModel(GA, DAG);
-
if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
if (Model == TLSModel::LocalDynamic)
Model = TLSModel::GeneralDynamic;
@@ -3897,6 +3970,10 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
+
if (Subtarget->isTargetDarwin())
return LowerDarwinGlobalTLSAddress(Op, DAG);
if (Subtarget->isTargetELF())
@@ -3929,12 +4006,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
// instruction.
- unsigned Opc = LHS.getOpcode();
- if (LHS.getResNo() == 1 && isOneConstant(RHS) &&
- (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
- Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
- assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
- "Unexpected condition code.");
+ if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
return SDValue();
@@ -4017,7 +4090,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
Cmp);
}
- assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+ assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
+ LHS.getValueType() == MVT::f64);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
// clean. Some of them require two branches to implement.
@@ -4051,25 +4125,26 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
EVT VecVT;
- EVT EltVT;
uint64_t EltMask;
SDValue VecVal1, VecVal2;
- if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
- EltVT = MVT::i32;
- VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
- EltMask = 0x80000000ULL;
+ auto setVecVal = [&] (int Idx) {
if (!VT.isVector()) {
- VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
+ VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
DAG.getUNDEF(VecVT), In1);
- VecVal2 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
+ VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
DAG.getUNDEF(VecVT), In2);
} else {
VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
}
+ };
+
+ if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
+ VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
+ EltMask = 0x80000000ULL;
+ setVecVal(AArch64::ssub);
} else if (VT == MVT::f64 || VT == MVT::v2f64) {
- EltVT = MVT::i64;
VecVT = MVT::v2i64;
// We want to materialize a mask with the high bit set, but the AdvSIMD
@@ -4077,15 +4152,11 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
// 64-bit elements. Instead, materialize zero and then negate it.
EltMask = 0;
- if (!VT.isVector()) {
- VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
- DAG.getUNDEF(VecVT), In1);
- VecVal2 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
- DAG.getUNDEF(VecVT), In2);
- } else {
- VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
- VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
- }
+ setVecVal(AArch64::dsub);
+ } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
+ VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
+ EltMask = 0x8000ULL;
+ setVecVal(AArch64::hsub);
} else {
llvm_unreachable("Invalid type for copysign!");
}
@@ -4103,6 +4174,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
SDValue Sel =
DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
+ if (VT == MVT::f16)
+ return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
if (VT == MVT::f32)
return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
else if (VT == MVT::f64)
@@ -4112,7 +4185,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
}
SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
- if (DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ if (DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat))
return SDValue();
@@ -4185,7 +4258,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
}
// Now we know we're dealing with FP values.
- assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+ assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
+ LHS.getValueType() == MVT::f64);
// If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
// and do the comparison.
@@ -4235,7 +4309,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
}
// Also handle f16, for which we need to do a f32 comparison.
- if (LHS.getValueType() == MVT::f16) {
+ if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
}
@@ -4356,13 +4430,13 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
-
EVT VT = TVal.getValueType();
return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
}
// Now we know we're dealing with FP values.
- assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+ assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
+ LHS.getValueType() == MVT::f64);
assert(LHS.getValueType() == RHS.getValueType());
EVT VT = TVal.getValueType();
SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
@@ -4423,12 +4497,9 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
SDValue FVal = Op->getOperand(2);
SDLoc DL(Op);
- unsigned Opc = CCVal.getOpcode();
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
// instruction.
- if (CCVal.getResNo() == 1 &&
- (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
- Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
+ if (isOverflowIntrOpRes(CCVal)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
return SDValue();
@@ -4597,7 +4668,7 @@ SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()))
+ if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
return LowerWin64_VASTART(Op, DAG);
else if (Subtarget->isTargetDarwin())
return LowerDarwin_VASTART(Op, DAG);
@@ -4849,20 +4920,47 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
bool AArch64TargetLowering::isOffsetFoldingLegal(
const GlobalAddressSDNode *GA) const {
- // The AArch64 target doesn't support folding offsets into global addresses.
+ DEBUG(dbgs() << "Skipping offset folding global address: ");
+ DEBUG(GA->dump());
+ DEBUG(dbgs() << "AArch64 doesn't support folding offsets into global "
+ "addresses\n");
return false;
}
bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
// We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
// FIXME: We should be able to handle f128 as well with a clever lowering.
- if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32))
+ if (Imm.isPosZero() && (VT == MVT::f16 || VT == MVT::f64 || VT == MVT::f32)) {
+ DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n");
+ return true;
+ }
+
+ StringRef FPType;
+ bool IsLegal = false;
+ SmallString<128> ImmStrVal;
+ Imm.toString(ImmStrVal);
+
+ if (VT == MVT::f64) {
+ FPType = "f64";
+ IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
+ } else if (VT == MVT::f32) {
+ FPType = "f32";
+ IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
+ } else if (VT == MVT::f16 && Subtarget->hasFullFP16()) {
+ FPType = "f16";
+ IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
+ }
+
+ if (IsLegal) {
+ DEBUG(dbgs() << "Legal " << FPType << " imm value: " << ImmStrVal << "\n");
return true;
+ }
+
+ if (!FPType.empty())
+ DEBUG(dbgs() << "Illegal " << FPType << " imm value: " << ImmStrVal << "\n");
+ else
+ DEBUG(dbgs() << "Illegal fp imm " << ImmStrVal << ": unsupported fp type\n");
- if (VT == MVT::f64)
- return AArch64_AM::getFP64Imm(Imm) != -1;
- else if (VT == MVT::f32)
- return AArch64_AM::getFP32Imm(Imm) != -1;
return false;
}
@@ -4884,7 +4982,7 @@ static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
// the initial estimate is 2^-8. Thus the number of extra steps to refine
// the result for float (23 mantissa bits) is 2 and for double (52
// mantissa bits) is 3.
- ExtraSteps = VT == MVT::f64 ? 3 : 2;
+ ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
}
@@ -5301,6 +5399,7 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
+ DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n");
SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
@@ -5336,8 +5435,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
continue;
else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(V.getOperand(1))) {
- // A shuffle can only come from building a vector from various
- // elements of other vectors, provided their indices are constant.
+ DEBUG(dbgs() << "Reshuffle failed: "
+ "a shuffle can only come from building a vector from "
+ "various elements of other vectors, provided their "
+ "indices are constant\n");
return SDValue();
}
@@ -5353,10 +5454,11 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
Source->MaxElt = std::max(Source->MaxElt, EltNo);
}
- // Currently only do something sane when at most two source vectors
- // are involved.
- if (Sources.size() > 2)
+ if (Sources.size() > 2) {
+ DEBUG(dbgs() << "Reshuffle failed: currently only do something sane when at "
+ "most two source vectors are involved\n");
return SDValue();
+ }
// Find out the smallest element size among result and two sources, and use
// it as element size to build the shuffle_vector.
@@ -5400,7 +5502,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits());
if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
- // Span too large for a VEXT to cope
+ DEBUG(dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n");
return SDValue();
}
@@ -5481,8 +5583,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
}
// Final check before we try to produce nonsense...
- if (!isShuffleMaskLegal(Mask, ShuffleVT))
+ if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
+ DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n");
return SDValue();
+ }
SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
for (unsigned i = 0; i < Sources.size(); ++i)
@@ -5490,7 +5594,16 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
ShuffleOps[1], Mask);
- return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+
+ DEBUG(
+ dbgs() << "Reshuffle, creating node: ";
+ Shuffle.dump();
+ dbgs() << "Reshuffle, creating node: ";
+ V.dump();
+ );
+
+ return V;
}
// check if an EXT instruction can handle the shuffle mask when the
@@ -6703,27 +6816,36 @@ FailedModImm:
usesOnlyOneValue = false;
}
- if (!Value.getNode())
+ if (!Value.getNode()) {
+ DEBUG(dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n");
return DAG.getUNDEF(VT);
+ }
- if (isOnlyLowElement)
+ if (isOnlyLowElement) {
+ DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
+ "SCALAR_TO_VECTOR node\n");
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
+ }
- // Use DUP for non-constant splats. For f32 constant splats, reduce to
+ // Use DUP for non-constant splats. For f32 constant splats, reduce to
// i32 and try again.
if (usesOnlyOneValue) {
if (!isConstant) {
if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- Value.getValueType() != VT)
+ Value.getValueType() != VT) {
+ DEBUG(dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
+ }
// This is actually a DUPLANExx operation, which keeps everything vectory.
- // DUPLANE works on 128-bit vectors, widen it if necessary.
SDValue Lane = Value.getOperand(1);
Value = Value.getOperand(0);
- if (Value.getValueSizeInBits() == 64)
+ if (Value.getValueSizeInBits() == 64) {
+ DEBUG(dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
+ "widening it\n");
Value = WidenVector(Value, DAG);
+ }
unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
return DAG.getNode(Opcode, dl, VT, Value, Lane);
@@ -6734,11 +6856,17 @@ FailedModImm:
EVT EltTy = VT.getVectorElementType();
assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) &&
"Unsupported floating-point vector type");
+ DEBUG(dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
+ "BITCASTS, and try again\n");
MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
+ DEBUG(
+ dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";
+ Val.dump();
+ );
Val = LowerBUILD_VECTOR(Val, DAG);
if (Val.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
@@ -6764,11 +6892,12 @@ FailedModImm:
return Val;
}
- // If all elements are constants and the case above didn't get hit, fall back
- // to the default expansion, which will generate a load from the constant
- // pool.
- if (isConstant)
+ // This will generate a load from the constant pool.
+ if (isConstant) {
+ DEBUG(dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
+ "expansion\n");
return SDValue();
+ }
// Empirical tests suggest this is rarely worth it for vectors of length <= 2.
if (NumElts >= 4) {
@@ -6783,6 +6912,9 @@ FailedModImm:
// shuffle is valid for the target) and materialization element by element
// on the stack followed by a load for everything else.
if (!isConstant && !usesOnlyOneValue) {
+ DEBUG(dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
+ "of INSERT_VECTOR_ELT\n");
+
SDValue Vec = DAG.getUNDEF(VT);
SDValue Op0 = Op.getOperand(0);
unsigned i = 0;
@@ -6798,9 +6930,14 @@ FailedModImm:
// extended (i32) and it is safe to cast them to the vector type by ignoring
// the upper bits of the lowest lane (e.g. v8i8, v4i16).
if (!Op0.isUndef()) {
+ DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n");
Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
++i;
}
+ DEBUG(
+ if (i < NumElts)
+ dbgs() << "Creating nodes for the other vector elements:\n";
+ );
for (; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.isUndef())
@@ -6811,7 +6948,8 @@ FailedModImm:
return Vec;
}
- // Just use the default expansion. We failed to find a better alternative.
+ DEBUG(dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
+ "better alternative\n");
return SDValue();
}
@@ -6912,8 +7050,7 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
return SDValue();
}
-bool AArch64TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
- EVT VT) const {
+bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
if (VT.getVectorNumElements() == 4 &&
(VT.is128BitVector() || VT.is64BitVector())) {
unsigned PFIndexes[4];
@@ -7234,6 +7371,7 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
/// specified in the intrinsic calls.
bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
+ MachineFunction &MF,
unsigned Intrinsic) const {
auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
@@ -7256,9 +7394,8 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
Info.offset = 0;
Info.align = 0;
- Info.vol = false; // volatile loads with NEON intrinsics not supported
- Info.readMem = true;
- Info.writeMem = false;
+ // volatile loads with NEON intrinsics not supported
+ Info.flags = MachineMemOperand::MOLoad;
return true;
}
case Intrinsic::aarch64_neon_st2:
@@ -7283,9 +7420,8 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
Info.offset = 0;
Info.align = 0;
- Info.vol = false; // volatile stores with NEON intrinsics not supported
- Info.readMem = false;
- Info.writeMem = true;
+ // volatile stores with NEON intrinsics not supported
+ Info.flags = MachineMemOperand::MOStore;
return true;
}
case Intrinsic::aarch64_ldaxr:
@@ -7296,9 +7432,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
- Info.vol = true;
- Info.readMem = true;
- Info.writeMem = false;
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::aarch64_stlxr:
@@ -7309,9 +7443,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
- Info.vol = true;
- Info.readMem = false;
- Info.writeMem = true;
+ Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::aarch64_ldaxp:
@@ -7321,9 +7453,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = 16;
- Info.vol = true;
- Info.readMem = true;
- Info.writeMem = false;
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
case Intrinsic::aarch64_stlxp:
case Intrinsic::aarch64_stxp:
@@ -7332,9 +7462,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
Info.align = 16;
- Info.vol = true;
- Info.readMem = false;
- Info.writeMem = true;
+ Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
default:
break;
@@ -7422,7 +7550,7 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
if (isa<FPExtInst>(Ext))
return false;
- // Vector types are next free.
+ // Vector types are not free.
if (Ext->getType()->isVectorTy())
return false;
@@ -7781,9 +7909,9 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
// instruction to materialize the v2i64 zero and one store (with restrictive
// addressing mode). Just do two i64 store of zero-registers.
bool Fast;
- const Function *F = MF.getFunction();
+ const Function &F = MF.getFunction();
if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 &&
- !F->hasFnAttribute(Attribute::NoImplicitFloat) &&
+ !F.hasFnAttribute(Attribute::NoImplicitFloat) &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
(allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
return MVT::f128;
@@ -7803,12 +7931,17 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
// 12-bit optionally shifted immediates are legal for adds.
bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
- // Avoid UB for INT64_MIN.
- if (Immed == std::numeric_limits<int64_t>::min())
+ if (Immed == std::numeric_limits<int64_t>::min()) {
+ DEBUG(dbgs() << "Illegal add imm " << Immed << ": avoid UB for INT64_MIN\n");
return false;
+ }
// Same encoding for add/sub, just flip the sign.
Immed = std::abs(Immed);
- return ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
+ bool IsLegal = ((Immed >> 12) == 0 ||
+ ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
+ DEBUG(dbgs() << "Is " << Immed << " legal add imm: " <<
+ (IsLegal ? "yes" : "no") << "\n");
+ return IsLegal;
}
// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
@@ -7821,7 +7954,7 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
/// by AM is legal for this target, for a load/store of the specified type.
bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ unsigned AS, Instruction *I) const {
// AArch64 has five basic addressing modes:
// reg
// reg + 9-bit signed offset
@@ -8023,7 +8156,7 @@ SDValue
AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
std::vector<SDNode *> *Created) const {
- AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
@@ -9420,8 +9553,6 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
- if (!DCI.isBeforeLegalize())
- return SDValue();
StoreSDNode *S = cast<StoreSDNode>(N);
if (S->isVolatile() || S->isIndexed())
@@ -9446,7 +9577,7 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return SDValue();
// Don't split at -Oz.
- if (DAG.getMachineFunction().getFunction()->optForMinSize())
+ if (DAG.getMachineFunction().getFunction().optForMinSize())
return SDValue();
// Don't split v2i64 vectors. Memcpy lowering produces those and splitting
@@ -10267,6 +10398,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default:
+ DEBUG(dbgs() << "Custom combining: skipping\n");
break;
case ISD::ADD:
case ISD::SUB:
@@ -10740,7 +10872,7 @@ Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
return UseTlsOffset(IRB, 0x28);
// Fuchsia is similar.
- // <magenta/tls.h> defines MX_TLS_STACK_GUARD_OFFSET with this value.
+ // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
if (Subtarget->isTargetFuchsia())
return UseTlsOffset(IRB, -0x10);
@@ -10755,7 +10887,7 @@ Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) cons
return UseTlsOffset(IRB, 0x48);
// Fuchsia is similar.
- // <magenta/tls.h> defines MX_TLS_UNSAFE_SP_OFFSET with this value.
+ // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
if (Subtarget->isTargetFuchsia())
return UseTlsOffset(IRB, -0x8);
@@ -10772,7 +10904,7 @@ bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
if (!Mask)
return false;
- return Mask->getUniqueInteger().isPowerOf2();
+ return Mask->getValue().isPowerOf2();
}
void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
@@ -10807,7 +10939,7 @@ void AArch64TargetLowering::insertCopiesSplitCSR(
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
// nounwind. If we want to generalize this later, we may need to emit
// CFI pseudo-instructions.
- assert(Entry->getParent()->getFunction()->hasFnAttribute(
+ assert(Entry->getParent()->getFunction().hasFnAttribute(
Attribute::NoUnwind) &&
"Function should be nounwind in insertCopiesSplitCSR!");
Entry->addLiveIn(*I);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 3b0e0f1de894..8d78b5b6b5b4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -18,9 +18,9 @@
#include "AArch64.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Instruction.h"
-#include "llvm/Target/TargetLowering.h"
namespace llvm {
@@ -290,7 +290,7 @@ public:
/// Return true if the given shuffle mask can be codegen'd directly, or if it
/// should be stack expanded.
- bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override;
+ bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
/// Return the ISD::SETCC ValueType.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
@@ -306,6 +306,7 @@ public:
MachineBasicBlock *MBB) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ MachineFunction &MF,
unsigned Intrinsic) const override;
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
@@ -338,7 +339,8 @@ public:
/// Return true if the addressing mode represented by AM is legal for this
/// target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ unsigned AS,
+ Instruction *I = nullptr) const override;
/// \brief Return the cost of the scaling factor used in the addressing
/// mode represented by AM for this target, for a load/store
@@ -413,7 +415,7 @@ public:
// Do not merge to float value size (128 bytes) if no implicit
// float attribute is set.
- bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat);
if (NoFloat)
@@ -442,8 +444,8 @@ public:
}
bool supportSplitCSR(MachineFunction *MF) const override {
- return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
- MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+ return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
}
void initializeSplitCSR(MachineBasicBlock *Entry) const override;
void insertCopiesSplitCSR(
@@ -470,6 +472,9 @@ public:
MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override;
+ bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
+ CallingConv::ID CallConv,
+ bool isVarArg) const override;
private:
bool isExtFreeImpl(const Instruction *Ext) const override;
@@ -534,10 +539,12 @@ private:
unsigned Flag) const;
SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
unsigned Flag) const;
- template <class NodeTy> SDValue getGOT(NodeTy *N, SelectionDAG &DAG) const;
template <class NodeTy>
- SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG) const;
- template <class NodeTy> SDValue getAddr(NodeTy *N, SelectionDAG &DAG) const;
+ SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
+ template <class NodeTy>
+ SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
+ template <class NodeTy>
+ SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -639,10 +646,6 @@ private:
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
- bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
- CallingConv::ID CallConv,
- bool isVarArg) const override;
-
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
};
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index eec41ddbc159..153bcf75cbcd 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -30,18 +30,18 @@ def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
// A atomic load operation that actually needs acquire semantics.
class acquiring_load<PatFrag base>
- : PatFrag<(ops node:$ptr), (base node:$ptr), [{
- AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return isAcquireOrStronger(Ordering);
-}]>;
+ : PatFrag<(ops node:$ptr), (base node:$ptr)> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingAcquireOrStronger = 1;
+}
// An atomic load operation that does not need either acquire or release
// semantics.
class relaxed_load<PatFrag base>
- : PatFrag<(ops node:$ptr), (base node:$ptr), [{
- AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return !isAcquireOrStronger(Ordering);
-}]>;
+ : PatFrag<(ops node:$ptr), (base node:$ptr)> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingAcquireOrStronger = 0;
+}
// 8-bit loads
def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
@@ -113,19 +113,17 @@ def : Pat<(relaxed_load<atomic_load_64>
// A store operation that actually needs release semantics.
class releasing_store<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
- AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- assert(Ordering != AtomicOrdering::AcquireRelease &&
- "unexpected store ordering");
- return isReleaseOrStronger(Ordering);
-}]>;
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingReleaseOrStronger = 1;
+}
// An atomic store operation that doesn't actually need to be atomic on AArch64.
class relaxed_store<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
- AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return !isReleaseOrStronger(Ordering);
-}]>;
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingReleaseOrStronger = 0;
+}
// 8-bit stores
def : Pat<(releasing_store<atomic_store_8> GPR64sp:$ptr, GPR32:$val),
@@ -407,57 +405,17 @@ def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch),
Sched<[WriteAtomic]>;
// v8.1 Atomic instructions:
-def : Pat<(atomic_load_add_8 GPR64:$Rn, GPR32:$Rs), (LDADDALb GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_add_16 GPR64:$Rn, GPR32:$Rs), (LDADDALh GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_add_32 GPR64:$Rn, GPR32:$Rs), (LDADDALs GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_add_64 GPR64:$Rn, GPR64:$Rs), (LDADDALd GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_or_8 GPR64:$Rn, GPR32:$Rs), (LDSETALb GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_or_16 GPR64:$Rn, GPR32:$Rs), (LDSETALh GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_or_32 GPR64:$Rn, GPR32:$Rs), (LDSETALs GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_or_64 GPR64:$Rn, GPR64:$Rs), (LDSETALd GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_xor_8 GPR64:$Rn, GPR32:$Rs), (LDEORALb GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_xor_16 GPR64:$Rn, GPR32:$Rs), (LDEORALh GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_xor_32 GPR64:$Rn, GPR32:$Rs), (LDEORALs GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_xor_64 GPR64:$Rn, GPR64:$Rs), (LDEORALd GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_max_8 GPR64:$Rn, GPR32:$Rs), (LDSMAXALb GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_max_16 GPR64:$Rn, GPR32:$Rs), (LDSMAXALh GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_max_32 GPR64:$Rn, GPR32:$Rs), (LDSMAXALs GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_max_64 GPR64:$Rn, GPR64:$Rs), (LDSMAXALd GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_umax_8 GPR64:$Rn, GPR32:$Rs), (LDUMAXALb GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_umax_16 GPR64:$Rn, GPR32:$Rs), (LDUMAXALh GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_umax_32 GPR64:$Rn, GPR32:$Rs), (LDUMAXALs GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_umax_64 GPR64:$Rn, GPR64:$Rs), (LDUMAXALd GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_min_8 GPR64:$Rn, GPR32:$Rs), (LDSMINALb GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_min_16 GPR64:$Rn, GPR32:$Rs), (LDSMINALh GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_min_32 GPR64:$Rn, GPR32:$Rs), (LDSMINALs GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_min_64 GPR64:$Rn, GPR64:$Rs), (LDSMINALd GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_umin_8 GPR64:$Rn, GPR32:$Rs), (LDUMINALb GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_umin_16 GPR64:$Rn, GPR32:$Rs), (LDUMINALh GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_umin_32 GPR64:$Rn, GPR32:$Rs), (LDUMINALs GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_load_umin_64 GPR64:$Rn, GPR64:$Rs), (LDUMINALd GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_cmp_swap_8 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALb GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
-def : Pat<(atomic_cmp_swap_16 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALh GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
-def : Pat<(atomic_cmp_swap_32 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALs GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
-def : Pat<(atomic_cmp_swap_64 GPR64:$Rn, GPR64:$Rold, GPR64:$Rnew), (CASALd GPR64:$Rold, GPR64:$Rnew, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_swap_8 GPR64:$Rn, GPR32:$Rs), (SWPALb GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_swap_16 GPR64:$Rn, GPR32:$Rs), (SWPALh GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_swap_32 GPR64:$Rn, GPR32:$Rs), (SWPALs GPR32:$Rs, GPR64sp:$Rn)>;
-def : Pat<(atomic_swap_64 GPR64:$Rn, GPR64:$Rs), (SWPALd GPR64:$Rs, GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_sub_8 GPR64:$Rn, GPR32:$Rs), (LDADDALb (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
-def : Pat<(atomic_load_sub_16 GPR64:$Rn, GPR32:$Rs), (LDADDALh (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
-def : Pat<(atomic_load_sub_32 GPR64:$Rn, GPR32:$Rs), (LDADDALs (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
-def : Pat<(atomic_load_sub_64 GPR64:$Rn, GPR64:$Rs), (LDADDALd (SUBXrr XZR, GPR64:$Rs), GPR64sp:$Rn)>;
-
-def : Pat<(atomic_load_and_8 GPR64:$Rn, GPR32:$Rs), (LDCLRALb (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
-def : Pat<(atomic_load_and_16 GPR64:$Rn, GPR32:$Rs), (LDCLRALh (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
-def : Pat<(atomic_load_and_32 GPR64:$Rn, GPR32:$Rs), (LDCLRALs (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
-def : Pat<(atomic_load_and_64 GPR64:$Rn, GPR64:$Rs), (LDCLRALd (ORNXrr XZR, GPR64:$Rs), GPR64sp:$Rn)>;
+let Predicates = [HasLSE] in {
+ defm : LDOPregister_patterns<"LDADD", "atomic_load_add">;
+ defm : LDOPregister_patterns<"LDSET", "atomic_load_or">;
+ defm : LDOPregister_patterns<"LDEOR", "atomic_load_xor">;
+ defm : LDOPregister_patterns<"LDSMAX", "atomic_load_max">;
+ defm : LDOPregister_patterns<"LDSMIN", "atomic_load_min">;
+ defm : LDOPregister_patterns<"LDUMAX", "atomic_load_umax">;
+ defm : LDOPregister_patterns<"LDUMIN", "atomic_load_umin">;
+ defm : LDOPregister_patterns<"SWP", "atomic_swap">;
+ defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">;
+ defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
+ defm : CASregister_patterns<"CAS", "atomic_cmp_swap">;
+}
+
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index c44daf306ea9..80c5092a4eed 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -189,6 +189,11 @@ def GPR32as64 : RegisterOperand<GPR32> {
// are encoded as the eight bit value 'abcdefgh'.
def SIMDImmType10Operand : AsmOperandClass { let Name = "SIMDImmType10"; }
+// Authenticated loads for v8.3 can have scaled 10-bit immediate offsets.
+def SImm10s8Operand : AsmOperandClass {
+ let Name = "SImm10s8";
+ let DiagnosticType = "InvalidMemoryIndexedSImm10";
+}
//===----------------------------------------------------------------------===//
// Operand Definitions.
@@ -216,6 +221,12 @@ def adrlabel : Operand<i64> {
let ParserMatchClass = AdrOperand;
}
+def simm10Scaled : Operand<i64> {
+ let ParserMatchClass = SImm10s8Operand;
+ let DecoderMethod = "DecodeSImm<10>";
+ let PrintMethod = "printImmScale<8>";
+}
+
// simm9 predicate - True if the immediate is in the range [-256, 255].
def SImm9Operand : AsmOperandClass {
let Name = "SImm9";
@@ -489,14 +500,14 @@ let DiagnosticType = "LogicalSecondSource" in {
let Name = "LogicalImm64Not";
}
}
-def logical_imm32 : Operand<i32>, PatLeaf<(imm), [{
- return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 32);
+def logical_imm32 : Operand<i32>, IntImmLeaf<i32, [{
+ return AArch64_AM::isLogicalImmediate(Imm.getZExtValue(), 32);
}], logical_imm32_XFORM> {
let PrintMethod = "printLogicalImm32";
let ParserMatchClass = LogicalImm32Operand;
}
-def logical_imm64 : Operand<i64>, PatLeaf<(imm), [{
- return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 64);
+def logical_imm64 : Operand<i64>, IntImmLeaf<i64, [{
+ return AArch64_AM::isLogicalImmediate(Imm.getZExtValue(), 64);
}], logical_imm64_XFORM> {
let PrintMethod = "printLogicalImm64";
let ParserMatchClass = LogicalImm64Operand;
@@ -743,8 +754,8 @@ class arith_extended_reg32to64<ValueType Ty> : Operand<Ty>,
// Floating-point immediate.
def fpimm16 : Operand<f16>,
- PatLeaf<(f16 fpimm), [{
- return AArch64_AM::getFP16Imm(N->getValueAPF()) != -1;
+ FPImmLeaf<f16, [{
+ return AArch64_AM::getFP16Imm(Imm) != -1;
}], SDNodeXForm<fpimm, [{
APFloat InVal = N->getValueAPF();
uint32_t enc = AArch64_AM::getFP16Imm(InVal);
@@ -754,8 +765,8 @@ def fpimm16 : Operand<f16>,
let PrintMethod = "printFPImmOperand";
}
def fpimm32 : Operand<f32>,
- PatLeaf<(f32 fpimm), [{
- return AArch64_AM::getFP32Imm(N->getValueAPF()) != -1;
+ FPImmLeaf<f32, [{
+ return AArch64_AM::getFP32Imm(Imm) != -1;
}], SDNodeXForm<fpimm, [{
APFloat InVal = N->getValueAPF();
uint32_t enc = AArch64_AM::getFP32Imm(InVal);
@@ -765,8 +776,8 @@ def fpimm32 : Operand<f32>,
let PrintMethod = "printFPImmOperand";
}
def fpimm64 : Operand<f64>,
- PatLeaf<(f64 fpimm), [{
- return AArch64_AM::getFP64Imm(N->getValueAPF()) != -1;
+ FPImmLeaf<f64, [{
+ return AArch64_AM::getFP64Imm(Imm) != -1;
}], SDNodeXForm<fpimm, [{
APFloat InVal = N->getValueAPF();
uint32_t enc = AArch64_AM::getFP64Imm(InVal);
@@ -781,8 +792,8 @@ def fpimm8 : Operand<i32> {
let PrintMethod = "printFPImmOperand";
}
-def fpimm0 : PatLeaf<(fpimm), [{
- return N->isExactlyValue(+0.0);
+def fpimm0 : FPImmLeaf<fAny, [{
+ return Imm.isExactlyValue(+0.0);
}]>;
// Vector lane operands
@@ -836,10 +847,9 @@ def VectorIndexD : Operand<i64>, ImmLeaf<i64, [{
// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
// are encoded as the eight bit value 'abcdefgh'.
def simdimmtype10 : Operand<i32>,
- PatLeaf<(f64 fpimm), [{
- return AArch64_AM::isAdvSIMDModImmType10(N->getValueAPF()
- .bitcastToAPInt()
- .getZExtValue());
+ FPImmLeaf<f64, [{
+ return AArch64_AM::isAdvSIMDModImmType10(
+ Imm.bitcastToAPInt().getZExtValue());
}], SDNodeXForm<fpimm, [{
APFloat InVal = N->getValueAPF();
uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType10(N->getValueAPF()
@@ -913,6 +923,17 @@ class CRmSystemI<Operand crmtype, bits<3> opc, string asm,
let Inst{7-5} = opc;
}
+class SystemNoOperands<bits<3> op2, string asm, list<dag> pattern = []>
+ : SimpleSystemI<0, (ins), asm, "", pattern>,
+ Sched<[]> {
+ bits<4> CRm;
+ let CRm = 0b0011;
+ let Inst{31-12} = 0b11010101000000110010;
+ let Inst{11-8} = CRm;
+ let Inst{7-5} = op2;
+ let Inst{4-0} = 0b11111;
+}
+
// MRS/MSR system instructions. These have different operand classes because
// a different subset of registers can be accessed through each instruction.
def MRSSystemRegisterOperand : AsmOperandClass {
@@ -1098,6 +1119,83 @@ class SpecialReturn<bits<4> opc, string asm>
let Inst{9-5} = 0b11111;
}
+let mayLoad = 1 in
+class RCPCLoad<bits<2> sz, string asm, RegisterClass RC>
+ : I<(outs RC:$Rt), (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]", "", []>,
+ Sched<[]> {
+ bits<5> Rn;
+ bits<5> Rt;
+ let Inst{31-30} = sz;
+ let Inst{29-10} = 0b11100010111111110000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+class AuthBase<bits<1> M, dag oops, dag iops, string asm, string operands,
+ list<dag> pattern>
+ : I<oops, iops, asm, operands, "", pattern>, Sched<[]> {
+ let Inst{31-25} = 0b1101011;
+ let Inst{20-11} = 0b1111100001;
+ let Inst{10} = M;
+ let Inst{4-0} = 0b11111;
+}
+
+class AuthBranchTwoOperands<bits<1> op, bits<1> M, string asm>
+ : AuthBase<M, (outs), (ins GPR64:$Rn, GPR64sp:$Rm), asm, "\t$Rn, $Rm", []> {
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{24-22} = 0b100;
+ let Inst{21} = op;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rm;
+}
+
+class AuthOneOperand<bits<3> opc, bits<1> M, string asm>
+ : AuthBase<M, (outs), (ins GPR64:$Rn), asm, "\t$Rn", []> {
+ bits<5> Rn;
+ let Inst{24} = 0;
+ let Inst{23-21} = opc;
+ let Inst{9-5} = Rn;
+}
+
+class AuthReturn<bits<3> op, bits<1> M, string asm>
+ : AuthBase<M, (outs), (ins), asm, "", []> {
+ let Inst{24} = 0;
+ let Inst{23-21} = op;
+ let Inst{9-0} = 0b1111111111;
+}
+
+let mayLoad = 1 in
+class BaseAuthLoad<bit M, bit W, dag oops, dag iops, string asm,
+ string operands, string cstr, Operand opr>
+ : I<oops, iops, asm, operands, cstr, []>, Sched<[]> {
+ bits<10> offset;
+ bits<5> Rn;
+ bits<5> Rt;
+ let Inst{31-24} = 0b11111000;
+ let Inst{23} = M;
+ let Inst{22} = offset{9};
+ let Inst{21} = 1;
+ let Inst{20-12} = offset{8-0};
+ let Inst{11} = W;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+multiclass AuthLoad<bit M, string asm, Operand opr> {
+ def indexed : BaseAuthLoad<M, 0, (outs GPR64:$Rt),
+ (ins GPR64sp:$Rn, opr:$offset),
+ asm, "\t$Rt, [$Rn, $offset]", "", opr>;
+ def writeback : BaseAuthLoad<M, 1, (outs GPR64sp:$wback, GPR64:$Rt),
+ (ins GPR64sp:$Rn, opr:$offset),
+ asm, "\t$Rt, [$Rn, $offset]!",
+ "$Rn = $wback,@earlyclobber $wback", opr>;
+
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "indexed") GPR64:$Rt, GPR64sp:$Rn, 0)>;
+}
+
//---
// Conditional branch instruction.
//---
@@ -1320,6 +1418,46 @@ class OneXRegData<bits<3> opc, string asm, SDPatternOperator node>
let Inst{31} = 1;
}
+class SignAuthOneData<bits<3> opcode_prefix, bits<2> opcode, string asm>
+ : I<(outs GPR64:$Rd), (ins GPR64sp:$Rn), asm, "\t$Rd, $Rn", "",
+ []>,
+ Sched<[WriteI, ReadI]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-15} = 0b11011010110000010;
+ let Inst{14-12} = opcode_prefix;
+ let Inst{11-10} = opcode;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class SignAuthZero<bits<3> opcode_prefix, bits<2> opcode, string asm>
+ : I<(outs GPR64:$Rd), (ins), asm, "\t$Rd", "", []>, Sched<[]> {
+ bits<5> Rd;
+ let Inst{31-15} = 0b11011010110000010;
+ let Inst{14-12} = opcode_prefix;
+ let Inst{11-10} = opcode;
+ let Inst{9-5} = 0b11111;
+ let Inst{4-0} = Rd;
+}
+
+class SignAuthTwoOperand<bits<4> opc, string asm,
+ SDPatternOperator OpNode>
+ : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64sp:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "",
+ [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64sp:$Rm))]>,
+ Sched<[WriteI, ReadI, ReadI]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-21} = 0b10011010110;
+ let Inst{20-16} = Rm;
+ let Inst{15-14} = 0b00;
+ let Inst{13-10} = opc;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
//---
// Basic two-operand data processing instructions.
//---
@@ -2378,6 +2516,22 @@ def am_indexed32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed32", []>;
def am_indexed64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed64", []>;
def am_indexed128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed128", []>;
+def gi_am_indexed8 :
+ GIComplexOperandMatcher<s64, "selectAddrModeIndexed<8>">,
+ GIComplexPatternEquiv<am_indexed8>;
+def gi_am_indexed16 :
+ GIComplexOperandMatcher<s64, "selectAddrModeIndexed<16>">,
+ GIComplexPatternEquiv<am_indexed16>;
+def gi_am_indexed32 :
+ GIComplexOperandMatcher<s64, "selectAddrModeIndexed<32>">,
+ GIComplexPatternEquiv<am_indexed32>;
+def gi_am_indexed64 :
+ GIComplexOperandMatcher<s64, "selectAddrModeIndexed<64>">,
+ GIComplexPatternEquiv<am_indexed64>;
+def gi_am_indexed128 :
+ GIComplexOperandMatcher<s64, "selectAddrModeIndexed<128>">,
+ GIComplexPatternEquiv<am_indexed128>;
+
class UImm12OffsetOperand<int Scale> : AsmOperandClass {
let Name = "UImm12Offset" # Scale;
let RenderMethod = "addUImm12OffsetOperands<" # Scale # ">";
@@ -2449,6 +2603,23 @@ multiclass StoreUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
(!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
+// Same as StoreUI, but take a RegisterOperand. This is used by GlobalISel to
+// substitute zero-registers automatically.
+//
+// TODO: Roll out zero-register subtitution to GPR32/GPR64 and fold this back
+// into StoreUI.
+multiclass StoreUIz<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ Operand indextype, string asm, list<dag> pattern> {
+ let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ def ui : BaseLoadStoreUI<sz, V, opc, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, indextype:$offset),
+ asm, pattern>,
+ Sched<[WriteST]>;
+
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>;
+}
+
def PrefetchOperand : AsmOperandClass {
let Name = "Prefetch";
let ParserMethod = "tryParsePrefetch";
@@ -2933,22 +3104,18 @@ multiclass Load128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
multiclass Store128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
string asm, ValueType Ty, SDPatternOperator storeop> {
- let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
def roW : LoadStore128RO<sz, V, opc, regtype, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend),
- [(storeop (Ty regtype:$Rt),
- (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend128:$extend))]>,
+ []>,
Sched<[WriteSTIdx, ReadAdrBase]> {
let Inst{13} = 0b0;
}
- let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
def roX : LoadStore128RO<sz, V, opc, regtype, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend128:$extend),
- [(storeop (Ty regtype:$Rt),
- (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend128:$extend))]>,
+ []>,
Sched<[WriteSTIdx, ReadAdrBase]> {
let Inst{13} = 0b1;
}
@@ -3012,6 +3179,23 @@ def am_unscaled32 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled32", []>;
def am_unscaled64 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled64", []>;
def am_unscaled128 :ComplexPattern<i64, 2, "SelectAddrModeUnscaled128", []>;
+def gi_am_unscaled8 :
+ GIComplexOperandMatcher<s64, "selectAddrModeUnscaled8">,
+ GIComplexPatternEquiv<am_unscaled8>;
+def gi_am_unscaled16 :
+ GIComplexOperandMatcher<s64, "selectAddrModeUnscaled16">,
+ GIComplexPatternEquiv<am_unscaled16>;
+def gi_am_unscaled32 :
+ GIComplexOperandMatcher<s64, "selectAddrModeUnscaled32">,
+ GIComplexPatternEquiv<am_unscaled32>;
+def gi_am_unscaled64 :
+ GIComplexOperandMatcher<s64, "selectAddrModeUnscaled64">,
+ GIComplexPatternEquiv<am_unscaled64>;
+def gi_am_unscaled128 :
+ GIComplexOperandMatcher<s64, "selectAddrModeUnscaled128">,
+ GIComplexPatternEquiv<am_unscaled128>;
+
+
class BaseLoadStoreUnscale<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
string asm, list<dag> pattern>
: I<oops, iops, asm, "\t$Rt, [$Rn, $offset]", "", pattern> {
@@ -4374,6 +4558,12 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
let Inst{4-0} = Rd;
}
+class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1,
+ string kind2> :
+ BaseSIMDThreeSameVector<Q, U, 0b100, 0b10010, V128, asm, kind1, [] > {
+ let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
+}
+
// All operand sizes distinguished in the encoding.
multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
@@ -6801,6 +6991,16 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
let Inst{4-0} = Rd;
}
+// ARMv8.2 Index Dot product instructions
+class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind,
+ string lhs_kind, string rhs_kind> :
+ BaseSIMDIndexedTied<Q, U, 0b0, 0b10, 0b1110, V128, V128, V128, VectorIndexS,
+ asm, "", dst_kind, lhs_kind, rhs_kind, []> {
+ bits<2> idx;
+ let Inst{21} = idx{0}; // L
+ let Inst{11} = idx{1}; // H
+}
+
multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
SDPatternOperator OpNode> {
let Predicates = [HasNEON, HasFullFP16] in {
@@ -9241,6 +9441,238 @@ multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
} // let Predicates = [HasNeon, HasRDM]
//----------------------------------------------------------------------------
+// ARMv8.3 Complex ADD/MLA instructions
+//----------------------------------------------------------------------------
+
+class ComplexRotationOperand<int Angle, int Remainder, string Type>
+ : AsmOperandClass {
+ let PredicateMethod = "isComplexRotation<" # Angle # ", " # Remainder # ">";
+ let DiagnosticType = "InvalidComplexRotation" # Type;
+ let Name = "ComplexRotation" # Type;
+}
+def complexrotateop : Operand<i32> {
+ let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">;
+ let PrintMethod = "printComplexRotationOp<90, 0>";
+}
+def complexrotateopodd : Operand<i32> {
+ let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">;
+ let PrintMethod = "printComplexRotationOp<180, 90>";
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
+ RegisterOperand regtype, Operand rottype,
+ string asm, string kind, list<dag> pattern>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
+ "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
+ "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<1> rot;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = opcode;
+ // Non-tied version (FCADD) only has one rotation bit
+ let Inst{12} = rot;
+ let Inst{11} = 0;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
+ string asm, SDPatternOperator OpNode>{
+ let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDThreeSameVectorComplex<0, U, 0b01, opcode, V64, rottype,
+ asm, ".4h",
+ [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
+ (v4f16 V64:$Rn),
+ (v4f16 V64:$Rm),
+ (rottype i32:$rot)))]>;
+
+ def v8f16 : BaseSIMDThreeSameVectorComplex<1, U, 0b01, opcode, V128, rottype,
+ asm, ".8h",
+ [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
+ (v8f16 V128:$Rn),
+ (v8f16 V128:$Rm),
+ (rottype i32:$rot)))]>;
+ }
+
+ let Predicates = [HasV8_3a, HasNEON] in {
+ def v2f32 : BaseSIMDThreeSameVectorComplex<0, U, 0b10, opcode, V64, rottype,
+ asm, ".2s",
+ [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
+ (v2f32 V64:$Rn),
+ (v2f32 V64:$Rm),
+ (rottype i32:$rot)))]>;
+
+ def v4f32 : BaseSIMDThreeSameVectorComplex<1, U, 0b10, opcode, V128, rottype,
+ asm, ".4s",
+ [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
+ (v4f32 V128:$Rn),
+ (v4f32 V128:$Rm),
+ (rottype i32:$rot)))]>;
+
+ def v2f64 : BaseSIMDThreeSameVectorComplex<1, U, 0b11, opcode, V128, rottype,
+ asm, ".2d",
+ [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd),
+ (v2f64 V128:$Rn),
+ (v2f64 V128:$Rm),
+ (rottype i32:$rot)))]>;
+ }
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDThreeSameVectorTiedComplex<bit Q, bit U, bits<2> size,
+ bits<3> opcode,
+ RegisterOperand regtype,
+ Operand rottype, string asm,
+ string kind, list<dag> pattern>
+ : I<(outs regtype:$dst),
+ (ins regtype:$Rd, regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
+ "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
+ "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> rot;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = opcode;
+ let Inst{12-11} = rot;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
+ Operand rottype, string asm,
+ SDPatternOperator OpNode> {
+ let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b01, opcode, V64,
+ rottype, asm, ".4h",
+ [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
+ (v4f16 V64:$Rn),
+ (v4f16 V64:$Rm),
+ (rottype i32:$rot)))]>;
+
+ def v8f16 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b01, opcode, V128,
+ rottype, asm, ".8h",
+ [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
+ (v8f16 V128:$Rn),
+ (v8f16 V128:$Rm),
+ (rottype i32:$rot)))]>;
+ }
+
+ let Predicates = [HasV8_3a, HasNEON] in {
+ def v2f32 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b10, opcode, V64,
+ rottype, asm, ".2s",
+ [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
+ (v2f32 V64:$Rn),
+ (v2f32 V64:$Rm),
+ (rottype i32:$rot)))]>;
+
+ def v4f32 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b10, opcode, V128,
+ rottype, asm, ".4s",
+ [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
+ (v4f32 V128:$Rn),
+ (v4f32 V128:$Rm),
+ (rottype i32:$rot)))]>;
+
+ def v2f64 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b11, opcode, V128,
+ rottype, asm, ".2d",
+ [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd),
+ (v2f64 V128:$Rn),
+ (v2f64 V128:$Rm),
+ (rottype i32:$rot)))]>;
+ }
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
+ bit opc1, bit opc2, RegisterOperand dst_reg,
+ RegisterOperand lhs_reg,
+ RegisterOperand rhs_reg, Operand vec_idx,
+ Operand rottype, string asm, string apple_kind,
+ string dst_kind, string lhs_kind,
+ string rhs_kind, list<dag> pattern>
+ : I<(outs dst_reg:$dst),
+ (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx, rottype:$rot),
+ asm,
+ "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind #
+ "$idx, $rot" # "|" # apple_kind #
+ "\t$Rd, $Rn, $Rm$idx, $rot}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> rot;
+
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28} = Scalar;
+ let Inst{27-24} = 0b1111;
+ let Inst{23-22} = size;
+ // Bit 21 must be set by the derived class.
+ let Inst{20-16} = Rm;
+ let Inst{15} = opc1;
+ let Inst{14-13} = rot;
+ let Inst{12} = opc2;
+ // Bit 11 must be set by the derived class.
+ let Inst{10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+// The complex instructions index by pairs of elements, so the VectorIndexes
+// don't match the lane types, and the index bits are different to the other
+// classes.
+multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
+ string asm, SDPatternOperator OpNode> {
+ let Predicates = [HasV8_3a,HasNEON,HasFullFP16] in {
+ def v4f16_indexed : BaseSIMDIndexedTiedComplex<0, 1, 0, 0b01, opc1, opc2, V64,
+ V64, V128, VectorIndexD, rottype, asm, ".4h", ".4h",
+ ".4h", ".h", []> {
+ bits<1> idx;
+ let Inst{11} = 0;
+ let Inst{21} = idx{0};
+ }
+
+ def v8f16_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b01, opc1, opc2,
+ V128, V128, V128, VectorIndexS, rottype, asm, ".8h",
+ ".8h", ".8h", ".h", []> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+ } // Predicates = [HasV8_3a,HasNEON,HasFullFP16]
+
+ let Predicates = [HasV8_3a,HasNEON] in {
+ def v4f32_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b10, opc1, opc2,
+ V128, V128, V128, VectorIndexD, rottype, asm, ".4s",
+ ".4s", ".4s", ".s", []> {
+ bits<1> idx;
+ let Inst{11} = idx{0};
+ let Inst{21} = 0;
+ }
+ } // Predicates = [HasV8_3a,HasNEON]
+}
+
+//----------------------------------------------------------------------------
// Crypto extensions
//----------------------------------------------------------------------------
@@ -9398,10 +9830,10 @@ class BaseCAS<string order, string size, RegisterClass RC>
}
multiclass CompareAndSwap<bits<1> Acq, bits<1> Rel, string order> {
- let Sz = 0b00, Acq = Acq, Rel = Rel in def b : BaseCAS<order, "b", GPR32>;
- let Sz = 0b01, Acq = Acq, Rel = Rel in def h : BaseCAS<order, "h", GPR32>;
- let Sz = 0b10, Acq = Acq, Rel = Rel in def s : BaseCAS<order, "", GPR32>;
- let Sz = 0b11, Acq = Acq, Rel = Rel in def d : BaseCAS<order, "", GPR64>;
+ let Sz = 0b00, Acq = Acq, Rel = Rel in def B : BaseCAS<order, "b", GPR32>;
+ let Sz = 0b01, Acq = Acq, Rel = Rel in def H : BaseCAS<order, "h", GPR32>;
+ let Sz = 0b10, Acq = Acq, Rel = Rel in def W : BaseCAS<order, "", GPR32>;
+ let Sz = 0b11, Acq = Acq, Rel = Rel in def X : BaseCAS<order, "", GPR64>;
}
class BaseCASP<string order, string size, RegisterOperand RC>
@@ -9413,10 +9845,10 @@ class BaseCASP<string order, string size, RegisterOperand RC>
}
multiclass CompareAndSwapPair<bits<1> Acq, bits<1> Rel, string order> {
- let Sz = 0b00, Acq = Acq, Rel = Rel in
- def s : BaseCASP<order, "", WSeqPairClassOperand>;
- let Sz = 0b01, Acq = Acq, Rel = Rel in
- def d : BaseCASP<order, "", XSeqPairClassOperand>;
+ let Sz = 0b00, Acq = Acq, Rel = Rel in
+ def W : BaseCASP<order, "", WSeqPairClassOperand>;
+ let Sz = 0b01, Acq = Acq, Rel = Rel in
+ def X : BaseCASP<order, "", XSeqPairClassOperand>;
}
let Predicates = [HasLSE] in
@@ -9446,10 +9878,10 @@ class BaseSWP<string order, string size, RegisterClass RC>
}
multiclass Swap<bits<1> Acq, bits<1> Rel, string order> {
- let Sz = 0b00, Acq = Acq, Rel = Rel in def b : BaseSWP<order, "b", GPR32>;
- let Sz = 0b01, Acq = Acq, Rel = Rel in def h : BaseSWP<order, "h", GPR32>;
- let Sz = 0b10, Acq = Acq, Rel = Rel in def s : BaseSWP<order, "", GPR32>;
- let Sz = 0b11, Acq = Acq, Rel = Rel in def d : BaseSWP<order, "", GPR64>;
+ let Sz = 0b00, Acq = Acq, Rel = Rel in def B : BaseSWP<order, "b", GPR32>;
+ let Sz = 0b01, Acq = Acq, Rel = Rel in def H : BaseSWP<order, "h", GPR32>;
+ let Sz = 0b10, Acq = Acq, Rel = Rel in def W : BaseSWP<order, "", GPR32>;
+ let Sz = 0b11, Acq = Acq, Rel = Rel in def X : BaseSWP<order, "", GPR64>;
}
let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
@@ -9480,14 +9912,94 @@ class BaseLDOPregister<string op, string order, string size, RegisterClass RC>
multiclass LDOPregister<bits<3> opc, string op, bits<1> Acq, bits<1> Rel,
string order> {
- let Sz = 0b00, Acq = Acq, Rel = Rel, opc = opc in
- def b : BaseLDOPregister<op, order, "b", GPR32>;
- let Sz = 0b01, Acq = Acq, Rel = Rel, opc = opc in
- def h : BaseLDOPregister<op, order, "h", GPR32>;
- let Sz = 0b10, Acq = Acq, Rel = Rel, opc = opc in
- def s : BaseLDOPregister<op, order, "", GPR32>;
- let Sz = 0b11, Acq = Acq, Rel = Rel, opc = opc in
- def d : BaseLDOPregister<op, order, "", GPR64>;
+ let Sz = 0b00, Acq = Acq, Rel = Rel, opc = opc in
+ def B : BaseLDOPregister<op, order, "b", GPR32>;
+ let Sz = 0b01, Acq = Acq, Rel = Rel, opc = opc in
+ def H : BaseLDOPregister<op, order, "h", GPR32>;
+ let Sz = 0b10, Acq = Acq, Rel = Rel, opc = opc in
+ def W : BaseLDOPregister<op, order, "", GPR32>;
+ let Sz = 0b11, Acq = Acq, Rel = Rel, opc = opc in
+ def X : BaseLDOPregister<op, order, "", GPR64>;
+}
+
+// Differing SrcRHS and DstRHS allow you to cover CLR & SUB by giving a more
+// complex DAG for DstRHS.
+let Predicates = [HasLSE] in
+multiclass LDOPregister_patterns_ord_dag<string inst, string suffix, string op,
+ string size, dag SrcRHS, dag DstRHS> {
+ def : Pat<(!cast<SDNode>(op#"_"#size#"_monotonic") GPR64sp:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # suffix) DstRHS, GPR64sp:$Rn)>;
+ def : Pat<(!cast<SDNode>(op#"_"#size#"_acquire") GPR64sp:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "A" # suffix) DstRHS, GPR64sp:$Rn)>;
+ def : Pat<(!cast<SDNode>(op#"_"#size#"_release") GPR64sp:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "L" # suffix) DstRHS, GPR64sp:$Rn)>;
+ def : Pat<(!cast<SDNode>(op#"_"#size#"_acq_rel") GPR64sp:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "AL" # suffix) DstRHS, GPR64sp:$Rn)>;
+ def : Pat<(!cast<SDNode>(op#"_"#size#"_seq_cst") GPR64sp:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "AL" # suffix) DstRHS, GPR64sp:$Rn)>;
+}
+
+multiclass LDOPregister_patterns_ord<string inst, string suffix, string op,
+ string size, dag RHS> {
+ defm : LDOPregister_patterns_ord_dag<inst, suffix, op, size, RHS, RHS>;
+}
+
+multiclass LDOPregister_patterns_ord_mod<string inst, string suffix, string op,
+ string size, dag LHS, dag RHS> {
+ defm : LDOPregister_patterns_ord_dag<inst, suffix, op, size, LHS, RHS>;
+}
+
+multiclass LDOPregister_patterns<string inst, string op> {
+ defm : LDOPregister_patterns_ord<inst, "X", op, "64", (i64 GPR64:$Rm)>;
+ defm : LDOPregister_patterns_ord<inst, "W", op, "32", (i32 GPR32:$Rm)>;
+ defm : LDOPregister_patterns_ord<inst, "H", op, "16", (i32 GPR32:$Rm)>;
+ defm : LDOPregister_patterns_ord<inst, "B", op, "8", (i32 GPR32:$Rm)>;
+}
+
+multiclass LDOPregister_patterns_mod<string inst, string op, string mod> {
+ defm : LDOPregister_patterns_ord_mod<inst, "X", op, "64",
+ (i64 GPR64:$Rm),
+ (i64 (!cast<Instruction>(mod#Xrr) XZR, GPR64:$Rm))>;
+ defm : LDOPregister_patterns_ord_mod<inst, "W", op, "32",
+ (i32 GPR32:$Rm),
+ (i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
+ defm : LDOPregister_patterns_ord_mod<inst, "H", op, "16",
+ (i32 GPR32:$Rm),
+ (i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
+ defm : LDOPregister_patterns_ord_mod<inst, "B", op, "8",
+ (i32 GPR32:$Rm),
+ (i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
+}
+
+let Predicates = [HasLSE] in
+multiclass CASregister_patterns_ord_dag<string inst, string suffix, string op,
+ string size, dag OLD, dag NEW> {
+ def : Pat<(!cast<SDNode>(op#"_"#size#"_monotonic") GPR64sp:$Rn, OLD, NEW),
+ (!cast<Instruction>(inst # suffix) OLD, NEW, GPR64sp:$Rn)>;
+ def : Pat<(!cast<SDNode>(op#"_"#size#"_acquire") GPR64sp:$Rn, OLD, NEW),
+ (!cast<Instruction>(inst # "A" # suffix) OLD, NEW, GPR64sp:$Rn)>;
+ def : Pat<(!cast<SDNode>(op#"_"#size#"_release") GPR64sp:$Rn, OLD, NEW),
+ (!cast<Instruction>(inst # "L" # suffix) OLD, NEW, GPR64sp:$Rn)>;
+ def : Pat<(!cast<SDNode>(op#"_"#size#"_acq_rel") GPR64sp:$Rn, OLD, NEW),
+ (!cast<Instruction>(inst # "AL" # suffix) OLD, NEW, GPR64sp:$Rn)>;
+ def : Pat<(!cast<SDNode>(op#"_"#size#"_seq_cst") GPR64sp:$Rn, OLD, NEW),
+ (!cast<Instruction>(inst # "AL" # suffix) OLD, NEW, GPR64sp:$Rn)>;
+}
+
+multiclass CASregister_patterns_ord<string inst, string suffix, string op,
+ string size, dag OLD, dag NEW> {
+ defm : CASregister_patterns_ord_dag<inst, suffix, op, size, OLD, NEW>;
+}
+
+multiclass CASregister_patterns<string inst, string op> {
+ defm : CASregister_patterns_ord<inst, "X", op, "64",
+ (i64 GPR64:$Rold), (i64 GPR64:$Rnew)>;
+ defm : CASregister_patterns_ord<inst, "W", op, "32",
+ (i32 GPR32:$Rold), (i32 GPR32:$Rnew)>;
+ defm : CASregister_patterns_ord<inst, "H", op, "16",
+ (i32 GPR32:$Rold), (i32 GPR32:$Rnew)>;
+ defm : CASregister_patterns_ord<inst, "B", op, "8",
+ (i32 GPR32:$Rold), (i32 GPR32:$Rnew)>;
}
let Predicates = [HasLSE] in
@@ -9496,26 +10008,27 @@ class BaseSTOPregister<string asm, RegisterClass OP, Register Reg,
InstAlias<asm # "\t$Rs, [$Rn]", (inst Reg, OP:$Rs, GPR64sp:$Rn)>;
multiclass STOPregister<string asm, string instr> {
- def : BaseSTOPregister<asm # "lb", GPR32, WZR,
- !cast<Instruction>(instr # "Lb")>;
- def : BaseSTOPregister<asm # "lh", GPR32, WZR,
- !cast<Instruction>(instr # "Lh")>;
- def : BaseSTOPregister<asm # "l", GPR32, WZR,
- !cast<Instruction>(instr # "Ls")>;
- def : BaseSTOPregister<asm # "l", GPR64, XZR,
- !cast<Instruction>(instr # "Ld")>;
- def : BaseSTOPregister<asm # "b", GPR32, WZR,
- !cast<Instruction>(instr # "b")>;
- def : BaseSTOPregister<asm # "h", GPR32, WZR,
- !cast<Instruction>(instr # "h")>;
- def : BaseSTOPregister<asm, GPR32, WZR,
- !cast<Instruction>(instr # "s")>;
- def : BaseSTOPregister<asm, GPR64, XZR,
- !cast<Instruction>(instr # "d")>;
+ def : BaseSTOPregister<asm # "lb", GPR32, WZR,
+ !cast<Instruction>(instr # "LB")>;
+ def : BaseSTOPregister<asm # "lh", GPR32, WZR,
+ !cast<Instruction>(instr # "LH")>;
+ def : BaseSTOPregister<asm # "l", GPR32, WZR,
+ !cast<Instruction>(instr # "LW")>;
+ def : BaseSTOPregister<asm # "l", GPR64, XZR,
+ !cast<Instruction>(instr # "LX")>;
+ def : BaseSTOPregister<asm # "b", GPR32, WZR,
+ !cast<Instruction>(instr # "B")>;
+ def : BaseSTOPregister<asm # "h", GPR32, WZR,
+ !cast<Instruction>(instr # "H")>;
+ def : BaseSTOPregister<asm, GPR32, WZR,
+ !cast<Instruction>(instr # "W")>;
+ def : BaseSTOPregister<asm, GPR64, XZR,
+ !cast<Instruction>(instr # "X")>;
}
//----------------------------------------------------------------------------
// Allow the size specifier tokens to be upper case, not just lower.
+def : TokenAlias<".4B", ".4b">; // Add dot product
def : TokenAlias<".8B", ".8b">;
def : TokenAlias<".4H", ".4h">;
def : TokenAlias<".2S", ".2s">;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 13c80a46e5b0..c7c560a81328 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -27,7 +28,10 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCInst.h"
@@ -40,8 +44,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -52,17 +54,17 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "AArch64GenInstrInfo.inc"
-static cl::opt<unsigned>
-TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
- cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
+static cl::opt<unsigned> TBZDisplacementBits(
+ "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
+ cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
-static cl::opt<unsigned>
-CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
- cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
+static cl::opt<unsigned> CBZDisplacementBits(
+ "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
+ cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
static cl::opt<unsigned>
-BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
- cl::desc("Restrict range of Bcc instructions (DEBUG)"));
+ BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
+ cl::desc("Restrict range of Bcc instructions (DEBUG)"));
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
@@ -172,8 +174,8 @@ bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
return isIntN(Bits, BrOffset / 4);
}
-MachineBasicBlock *AArch64InstrInfo::getBranchDestBlock(
- const MachineInstr &MI) const {
+MachineBasicBlock *
+AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
llvm_unreachable("unexpected opcode!");
@@ -374,12 +376,9 @@ void AArch64InstrInfo::instantiateCondBranch(
}
}
-unsigned AArch64InstrInfo::insertBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- ArrayRef<MachineOperand> Cond,
- const DebugLoc &DL,
- int *BytesAdded) const {
+unsigned AArch64InstrInfo::insertBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
// Shouldn't be a fall through.
assert(TBB && "insertBranch must not be told to insert a fallthrough");
@@ -485,10 +484,11 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
return Opc;
}
-bool AArch64InstrInfo::canInsertSelect(
- const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
- int &FalseCycles) const {
+bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+ ArrayRef<MachineOperand> Cond,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles, int &TrueCycles,
+ int &FalseCycles) const {
// Check register classes.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC =
@@ -656,8 +656,10 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
MRI.constrainRegClass(FalseReg, RC);
// Insert the csel.
- BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
- CC);
+ BuildMI(MBB, I, DL, get(Opc), DstReg)
+ .addReg(TrueReg)
+ .addReg(FalseReg)
+ .addImm(CC);
}
/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
@@ -673,8 +675,9 @@ static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
if (!Subtarget.hasCustomCheapAsMoveHandling())
return MI.isAsCheapAsAMove();
-
- unsigned Imm;
+ if (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
+ isExynosShiftLeftFast(MI))
+ return true;
switch (MI.getOpcode()) {
default:
@@ -685,17 +688,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
case AArch64::ADDXri:
case AArch64::SUBWri:
case AArch64::SUBXri:
- return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
- MI.getOperand(3).getImm() == 0);
-
- // add/sub on register with shift
- case AArch64::ADDWrs:
- case AArch64::ADDXrs:
- case AArch64::SUBWrs:
- case AArch64::SUBXrs:
- Imm = MI.getOperand(3).getImm();
- return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
- AArch64_AM::getArithShiftValue(Imm) < 4);
+ return (MI.getOperand(3).getImm() == 0);
// logical ops on immediate
case AArch64::ANDWri:
@@ -721,24 +714,6 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
case AArch64::ORRXrr:
return true;
- // logical ops on register with shift
- case AArch64::ANDWrs:
- case AArch64::ANDXrs:
- case AArch64::BICWrs:
- case AArch64::BICXrs:
- case AArch64::EONWrs:
- case AArch64::EONXrs:
- case AArch64::EORWrs:
- case AArch64::EORXrs:
- case AArch64::ORNWrs:
- case AArch64::ORNXrs:
- case AArch64::ORRWrs:
- case AArch64::ORRXrs:
- Imm = MI.getOperand(3).getImm();
- return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
- AArch64_AM::getShiftValue(Imm) < 4 &&
- AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
-
// If MOVi32imm or MOVi64imm can be expanded into ORRWri or
// ORRXri, it is as cheap as MOV
case AArch64::MOVi32imm:
@@ -748,6 +723,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
// It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
// feature.
+ case AArch64::FMOVH0:
case AArch64::FMOVS0:
case AArch64::FMOVD0:
return Subtarget.hasZeroCycleZeroing();
@@ -760,6 +736,129 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
llvm_unreachable("Unknown opcode to check as cheap as a move!");
}
+bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
+ unsigned Imm, Shift;
+ AArch64_AM::ShiftExtendType Ext;
+
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+
+ // WriteI
+ case AArch64::ADDSWri:
+ case AArch64::ADDSXri:
+ case AArch64::ADDWri:
+ case AArch64::ADDXri:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSXri:
+ case AArch64::SUBWri:
+ case AArch64::SUBXri:
+ return true;
+
+ // WriteISReg
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXrs:
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ case AArch64::EONWrs:
+ case AArch64::EONXrs:
+ case AArch64::EORWrs:
+ case AArch64::EORXrs:
+ case AArch64::ORNWrs:
+ case AArch64::ORNXrs:
+ case AArch64::ORRWrs:
+ case AArch64::ORRXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ Imm = MI.getOperand(3).getImm();
+ Shift = AArch64_AM::getShiftValue(Imm);
+ Ext = AArch64_AM::getShiftType(Imm);
+ return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
+
+ // WriteIEReg
+ case AArch64::ADDSWrx:
+ case AArch64::ADDSXrx:
+ case AArch64::ADDSXrx64:
+ case AArch64::ADDWrx:
+ case AArch64::ADDXrx:
+ case AArch64::ADDXrx64:
+ case AArch64::SUBSWrx:
+ case AArch64::SUBSXrx:
+ case AArch64::SUBSXrx64:
+ case AArch64::SUBWrx:
+ case AArch64::SUBXrx:
+ case AArch64::SUBXrx64:
+ Imm = MI.getOperand(3).getImm();
+ Shift = AArch64_AM::getArithShiftValue(Imm);
+ Ext = AArch64_AM::getArithExtendType(Imm);
+ return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
+
+ case AArch64::PRFMroW:
+ case AArch64::PRFMroX:
+
+ // WriteLDIdx
+ case AArch64::LDRBBroW:
+ case AArch64::LDRBBroX:
+ case AArch64::LDRHHroW:
+ case AArch64::LDRHHroX:
+ case AArch64::LDRSBWroW:
+ case AArch64::LDRSBWroX:
+ case AArch64::LDRSBXroW:
+ case AArch64::LDRSBXroX:
+ case AArch64::LDRSHWroW:
+ case AArch64::LDRSHWroX:
+ case AArch64::LDRSHXroW:
+ case AArch64::LDRSHXroX:
+ case AArch64::LDRSWroW:
+ case AArch64::LDRSWroX:
+ case AArch64::LDRWroW:
+ case AArch64::LDRWroX:
+ case AArch64::LDRXroW:
+ case AArch64::LDRXroX:
+
+ case AArch64::LDRBroW:
+ case AArch64::LDRBroX:
+ case AArch64::LDRDroW:
+ case AArch64::LDRDroX:
+ case AArch64::LDRHroW:
+ case AArch64::LDRHroX:
+ case AArch64::LDRSroW:
+ case AArch64::LDRSroX:
+
+ // WriteSTIdx
+ case AArch64::STRBBroW:
+ case AArch64::STRBBroX:
+ case AArch64::STRHHroW:
+ case AArch64::STRHHroX:
+ case AArch64::STRWroW:
+ case AArch64::STRWroX:
+ case AArch64::STRXroW:
+ case AArch64::STRXroX:
+
+ case AArch64::STRBroW:
+ case AArch64::STRBroX:
+ case AArch64::STRDroW:
+ case AArch64::STRDroX:
+ case AArch64::STRHroW:
+ case AArch64::STRHroX:
+ case AArch64::STRSroW:
+ case AArch64::STRSroX:
+ Imm = MI.getOperand(3).getImm();
+ Ext = AArch64_AM::getMemExtendType(Imm);
+ return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
+ }
+}
+
bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
@@ -1084,11 +1183,7 @@ static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
}
}
-enum AccessKind {
- AK_Write = 0x01,
- AK_Read = 0x10,
- AK_All = 0x11
-};
+enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
/// True when condition flags are accessed (either by writing or reading)
/// on the instruction trace starting at From and ending at To.
@@ -1117,21 +1212,24 @@ static bool areCFlagsAccessedBetweenInstrs(
for (--To; To != From; --To) {
const MachineInstr &Instr = *To;
- if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
- ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
+ if (((AccessToCheck & AK_Write) &&
+ Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
+ ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
return true;
}
return false;
}
/// Try to optimize a compare instruction. A compare instruction is an
-/// instruction which produces AArch64::NZCV. It can be truly compare instruction
+/// instruction which produces AArch64::NZCV. It can be truly compare
+/// instruction
/// when there are no uses of its destination register.
///
/// The following steps are tried in order:
/// 1. Convert CmpInstr into an unconditional version.
/// 2. Remove CmpInstr if above there is an instruction producing a needed
-/// condition code or an instruction which can be converted into such an instruction.
+/// condition code or an instruction which can be converted into such an
+/// instruction.
/// Only comparison with zero is supported.
bool AArch64InstrInfo::optimizeCompareInstr(
MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
@@ -1193,20 +1291,34 @@ static unsigned sForm(MachineInstr &Instr) {
case AArch64::SUBSXri:
return Instr.getOpcode();
- case AArch64::ADDWrr: return AArch64::ADDSWrr;
- case AArch64::ADDWri: return AArch64::ADDSWri;
- case AArch64::ADDXrr: return AArch64::ADDSXrr;
- case AArch64::ADDXri: return AArch64::ADDSXri;
- case AArch64::ADCWr: return AArch64::ADCSWr;
- case AArch64::ADCXr: return AArch64::ADCSXr;
- case AArch64::SUBWrr: return AArch64::SUBSWrr;
- case AArch64::SUBWri: return AArch64::SUBSWri;
- case AArch64::SUBXrr: return AArch64::SUBSXrr;
- case AArch64::SUBXri: return AArch64::SUBSXri;
- case AArch64::SBCWr: return AArch64::SBCSWr;
- case AArch64::SBCXr: return AArch64::SBCSXr;
- case AArch64::ANDWri: return AArch64::ANDSWri;
- case AArch64::ANDXri: return AArch64::ANDSXri;
+ case AArch64::ADDWrr:
+ return AArch64::ADDSWrr;
+ case AArch64::ADDWri:
+ return AArch64::ADDSWri;
+ case AArch64::ADDXrr:
+ return AArch64::ADDSXrr;
+ case AArch64::ADDXri:
+ return AArch64::ADDSXri;
+ case AArch64::ADCWr:
+ return AArch64::ADCSWr;
+ case AArch64::ADCXr:
+ return AArch64::ADCSXr;
+ case AArch64::SUBWrr:
+ return AArch64::SUBSWrr;
+ case AArch64::SUBWri:
+ return AArch64::SUBSWri;
+ case AArch64::SUBXrr:
+ return AArch64::SUBSXrr;
+ case AArch64::SUBXri:
+ return AArch64::SUBSXri;
+ case AArch64::SBCWr:
+ return AArch64::SBCSWr;
+ case AArch64::SBCXr:
+ return AArch64::SBCSXr;
+ case AArch64::ANDWri:
+ return AArch64::ANDSWri;
+ case AArch64::ANDXri:
+ return AArch64::ANDSXri;
}
}
@@ -1228,7 +1340,7 @@ struct UsedNZCV {
UsedNZCV() = default;
- UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
+ UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
this->N |= UsedFlags.N;
this->Z |= UsedFlags.Z;
this->C |= UsedFlags.C;
@@ -1244,29 +1356,29 @@ struct UsedNZCV {
/// codes or we don't optimize CmpInstr in the presence of such instructions.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
switch (Instr.getOpcode()) {
- default:
- return AArch64CC::Invalid;
+ default:
+ return AArch64CC::Invalid;
- case AArch64::Bcc: {
- int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
- assert(Idx >= 2);
- return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
- }
+ case AArch64::Bcc: {
+ int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
+ assert(Idx >= 2);
+ return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
+ }
- case AArch64::CSINVWr:
- case AArch64::CSINVXr:
- case AArch64::CSINCWr:
- case AArch64::CSINCXr:
- case AArch64::CSELWr:
- case AArch64::CSELXr:
- case AArch64::CSNEGWr:
- case AArch64::CSNEGXr:
- case AArch64::FCSELSrrr:
- case AArch64::FCSELDrrr: {
- int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
- assert(Idx >= 1);
- return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
- }
+ case AArch64::CSINVWr:
+ case AArch64::CSINVXr:
+ case AArch64::CSINCWr:
+ case AArch64::CSINCXr:
+ case AArch64::CSELWr:
+ case AArch64::CSELXr:
+ case AArch64::CSNEGWr:
+ case AArch64::CSNEGXr:
+ case AArch64::FCSELSrrr:
+ case AArch64::FCSELDrrr: {
+ int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
+ assert(Idx >= 1);
+ return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
+ }
}
}
@@ -1274,42 +1386,42 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
assert(CC != AArch64CC::Invalid);
UsedNZCV UsedFlags;
switch (CC) {
- default:
- break;
+ default:
+ break;
- case AArch64CC::EQ: // Z set
- case AArch64CC::NE: // Z clear
- UsedFlags.Z = true;
- break;
+ case AArch64CC::EQ: // Z set
+ case AArch64CC::NE: // Z clear
+ UsedFlags.Z = true;
+ break;
- case AArch64CC::HI: // Z clear and C set
- case AArch64CC::LS: // Z set or C clear
- UsedFlags.Z = true;
- LLVM_FALLTHROUGH;
- case AArch64CC::HS: // C set
- case AArch64CC::LO: // C clear
- UsedFlags.C = true;
- break;
+ case AArch64CC::HI: // Z clear and C set
+ case AArch64CC::LS: // Z set or C clear
+ UsedFlags.Z = true;
+ LLVM_FALLTHROUGH;
+ case AArch64CC::HS: // C set
+ case AArch64CC::LO: // C clear
+ UsedFlags.C = true;
+ break;
- case AArch64CC::MI: // N set
- case AArch64CC::PL: // N clear
- UsedFlags.N = true;
- break;
+ case AArch64CC::MI: // N set
+ case AArch64CC::PL: // N clear
+ UsedFlags.N = true;
+ break;
- case AArch64CC::VS: // V set
- case AArch64CC::VC: // V clear
- UsedFlags.V = true;
- break;
+ case AArch64CC::VS: // V set
+ case AArch64CC::VC: // V clear
+ UsedFlags.V = true;
+ break;
- case AArch64CC::GT: // Z clear, N and V the same
- case AArch64CC::LE: // Z set, N and V differ
- UsedFlags.Z = true;
- LLVM_FALLTHROUGH;
- case AArch64CC::GE: // N and V the same
- case AArch64CC::LT: // N and V differ
- UsedFlags.N = true;
- UsedFlags.V = true;
- break;
+ case AArch64CC::GT: // Z clear, N and V the same
+ case AArch64CC::LE: // Z set, N and V differ
+ UsedFlags.Z = true;
+ LLVM_FALLTHROUGH;
+ case AArch64CC::GE: // N and V the same
+ case AArch64CC::LT: // N and V differ
+ UsedFlags.N = true;
+ UsedFlags.V = true;
+ break;
}
return UsedFlags;
}
@@ -1334,7 +1446,7 @@ static bool isSUBSRegImm(unsigned Opcode) {
/// nor uses of flags between MI and CmpInstr.
/// - and C/V flags are not used after CmpInstr
static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
- const TargetRegisterInfo *TRI) {
+ const TargetRegisterInfo *TRI) {
assert(MI);
assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
assert(CmpInstr);
@@ -1356,7 +1468,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
return false;
UsedNZCV NZCVUsedAfterCmp;
- for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end();
+ for (auto I = std::next(CmpInstr->getIterator()),
+ E = CmpInstr->getParent()->instr_end();
I != E; ++I) {
const MachineInstr &Instr = *I;
if (Instr.readsRegister(AArch64::NZCV, TRI)) {
@@ -1369,7 +1482,7 @@ static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
if (Instr.modifiesRegister(AArch64::NZCV, TRI))
break;
}
-
+
return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
}
@@ -1427,16 +1540,20 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addMemOperand(*MI.memoperands_begin());
} else if (TM.getCodeModel() == CodeModel::Large) {
BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
- .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
+ .addImm(0);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
+ .addImm(16);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
+ .addImm(32);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G3)
+ .addImm(48);
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
.addReg(Reg, RegState::Kill)
.addImm(0)
@@ -1818,7 +1935,7 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
} else
return false;
- // Get the scaling factor for the instruction and set the width for the
+ // Get the scaling factor for the instruction and set the width for the
// instruction.
unsigned Scale = 0;
int64_t Dummy1, Dummy2;
@@ -1841,10 +1958,10 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
return true;
}
-MachineOperand&
+MachineOperand &
AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
- MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands()-1);
+ MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
return OfsOp;
}
@@ -1853,7 +1970,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
unsigned &Width, int64_t &MinOffset,
int64_t &MaxOffset) const {
switch (Opcode) {
- // Not a memory operation or something we want to handle.
+ // Not a memory operation or something we want to handle.
default:
Scale = Width = 0;
MinOffset = MaxOffset = 0;
@@ -2050,8 +2167,13 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
///
/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
+ unsigned BaseReg1,
MachineInstr &SecondLdSt,
+ unsigned BaseReg2,
unsigned NumLoads) const {
+ if (BaseReg1 != BaseReg2)
+ return false;
+
// Only cluster up to a single pair.
if (NumLoads > 1)
return false;
@@ -2089,18 +2211,6 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
return Offset1 + 1 == Offset2;
}
-MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
- MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
- const MDNode *Expr, const DebugLoc &DL) const {
- MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
- .addFrameIndex(FrameIx)
- .addImm(0)
- .addImm(Offset)
- .addMetadata(Var)
- .addMetadata(Expr);
- return &*MIB;
-}
-
static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
unsigned Reg, unsigned SubIdx,
unsigned State,
@@ -2120,12 +2230,13 @@ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
return ((DestReg - SrcReg) & 0x1f) < NumRegs;
}
-void AArch64InstrInfo::copyPhysRegTuple(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
- unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
- ArrayRef<unsigned> Indices) const {
- assert(Subtarget.hasNEON() &&
- "Unexpected register copy without NEON");
+void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc,
+ unsigned Opcode,
+ ArrayRef<unsigned> Indices) const {
+ assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
const TargetRegisterInfo *TRI = &getRegisterInfo();
uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
@@ -2178,8 +2289,9 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
}
} else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
- BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
- AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
} else {
if (Subtarget.hasZeroCycleRegMove()) {
// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
@@ -2214,8 +2326,9 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
} else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
- BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
- AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
} else {
// Otherwise, expand to ORR XZR.
BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
@@ -2228,8 +2341,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a DDDD register quad by copying the individual sub-registers.
if (AArch64::DDDDRegClass.contains(DestReg) &&
AArch64::DDDDRegClass.contains(SrcReg)) {
- static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
- AArch64::dsub2, AArch64::dsub3 };
+ static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
+ AArch64::dsub2, AArch64::dsub3};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
Indices);
return;
@@ -2238,8 +2351,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a DDD register triple by copying the individual sub-registers.
if (AArch64::DDDRegClass.contains(DestReg) &&
AArch64::DDDRegClass.contains(SrcReg)) {
- static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
- AArch64::dsub2 };
+ static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
+ AArch64::dsub2};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
Indices);
return;
@@ -2248,7 +2361,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a DD register pair by copying the individual sub-registers.
if (AArch64::DDRegClass.contains(DestReg) &&
AArch64::DDRegClass.contains(SrcReg)) {
- static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
+ static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
Indices);
return;
@@ -2257,8 +2370,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a QQQQ register quad by copying the individual sub-registers.
if (AArch64::QQQQRegClass.contains(DestReg) &&
AArch64::QQQQRegClass.contains(SrcReg)) {
- static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
- AArch64::qsub2, AArch64::qsub3 };
+ static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2, AArch64::qsub3};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
Indices);
return;
@@ -2267,8 +2380,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a QQQ register triple by copying the individual sub-registers.
if (AArch64::QQQRegClass.contains(DestReg) &&
AArch64::QQQRegClass.contains(SrcReg)) {
- static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
- AArch64::qsub2 };
+ static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
Indices);
return;
@@ -2277,7 +2390,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a QQ register pair by copying the individual sub-registers.
if (AArch64::QQRegClass.contains(DestReg) &&
AArch64::QQRegClass.contains(SrcReg)) {
- static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
+ static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
Indices);
return;
@@ -2285,28 +2398,28 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR128RegClass.contains(DestReg) &&
AArch64::FPR128RegClass.contains(SrcReg)) {
- if(Subtarget.hasNEON()) {
+ if (Subtarget.hasNEON()) {
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
BuildMI(MBB, I, DL, get(AArch64::STRQpre))
- .addReg(AArch64::SP, RegState::Define)
- .addReg(SrcReg, getKillRegState(KillSrc))
- .addReg(AArch64::SP)
- .addImm(-16);
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(AArch64::SP)
+ .addImm(-16);
BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
- .addReg(AArch64::SP, RegState::Define)
- .addReg(DestReg, RegState::Define)
- .addReg(AArch64::SP)
- .addImm(16);
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(DestReg, RegState::Define)
+ .addReg(AArch64::SP)
+ .addImm(16);
}
return;
}
if (AArch64::FPR64RegClass.contains(DestReg) &&
AArch64::FPR64RegClass.contains(SrcReg)) {
- if(Subtarget.hasNEON()) {
+ if (Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
@@ -2323,7 +2436,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR32RegClass.contains(DestReg) &&
AArch64::FPR32RegClass.contains(SrcReg)) {
- if(Subtarget.hasNEON()) {
+ if (Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
@@ -2340,7 +2453,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR16RegClass.contains(DestReg) &&
AArch64::FPR16RegClass.contains(SrcReg)) {
- if(Subtarget.hasNEON()) {
+ if (Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
@@ -2361,7 +2474,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR8RegClass.contains(DestReg) &&
AArch64::FPR8RegClass.contains(SrcReg)) {
- if(Subtarget.hasNEON()) {
+ if (Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
@@ -2410,17 +2523,17 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (DestReg == AArch64::NZCV) {
assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
BuildMI(MBB, I, DL, get(AArch64::MSR))
- .addImm(AArch64SysReg::NZCV)
- .addReg(SrcReg, getKillRegState(KillSrc))
- .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
+ .addImm(AArch64SysReg::NZCV)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
return;
}
if (SrcReg == AArch64::NZCV) {
assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
- .addImm(AArch64SysReg::NZCV)
- .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
+ .addImm(AArch64SysReg::NZCV)
+ .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
return;
}
@@ -2476,45 +2589,39 @@ void AArch64InstrInfo::storeRegToStackSlot(
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
Opc = AArch64::STRQui;
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasNEON() &&
- "Unexpected register store without NEON");
+ assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Twov1d;
Offset = false;
}
break;
case 24:
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasNEON() &&
- "Unexpected register store without NEON");
+ assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Threev1d;
Offset = false;
}
break;
case 32:
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasNEON() &&
- "Unexpected register store without NEON");
+ assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Fourv1d;
Offset = false;
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasNEON() &&
- "Unexpected register store without NEON");
+ assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Twov2d;
Offset = false;
}
break;
case 48:
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasNEON() &&
- "Unexpected register store without NEON");
+ assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Threev2d;
Offset = false;
}
break;
case 64:
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasNEON() &&
- "Unexpected register store without NEON");
+ assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Fourv2d;
Offset = false;
}
@@ -2523,8 +2630,8 @@ void AArch64InstrInfo::storeRegToStackSlot(
assert(Opc && "Unknown register class");
const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI);
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI);
if (Offset)
MI.addImm(0);
@@ -2580,45 +2687,39 @@ void AArch64InstrInfo::loadRegFromStackSlot(
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRQui;
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasNEON() &&
- "Unexpected register load without NEON");
+ assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Twov1d;
Offset = false;
}
break;
case 24:
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasNEON() &&
- "Unexpected register load without NEON");
+ assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Threev1d;
Offset = false;
}
break;
case 32:
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasNEON() &&
- "Unexpected register load without NEON");
+ assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Fourv1d;
Offset = false;
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasNEON() &&
- "Unexpected register load without NEON");
+ assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Twov2d;
Offset = false;
}
break;
case 48:
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasNEON() &&
- "Unexpected register load without NEON");
+ assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Threev2d;
Offset = false;
}
break;
case 64:
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasNEON() &&
- "Unexpected register load without NEON");
+ assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Fourv2d;
Offset = false;
}
@@ -2627,8 +2728,8 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Opc && "Unknown register class");
const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
- .addReg(DestReg, getDefRegState(true))
- .addFrameIndex(FI);
+ .addReg(DestReg, getDefRegState(true))
+ .addFrameIndex(FI);
if (Offset)
MI.addImm(0);
MI.addMemOperand(MMO);
@@ -2701,14 +2802,14 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
LiveIntervals *LIS) const {
// This is a bit of a hack. Consider this instruction:
//
- // %vreg0<def> = COPY %SP; GPR64all:%vreg0
+ // %0 = COPY %sp; GPR64all:%0
//
// We explicitly chose GPR64all for the virtual register so such a copy might
// be eliminated by RegisterCoalescer. However, that may not be possible, and
- // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
+ // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
// register class, TargetInstrInfo::foldMemoryOperand() is going to try.
//
- // To prevent that, we are going to constrain the %vreg0 register class here.
+ // To prevent that, we are going to constrain the %0 register class here.
//
// <rdar://problem/11522048>
//
@@ -2730,26 +2831,26 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
// Handle the case where a copy is being spilled or filled but the source
// and destination register class don't match. For example:
//
- // %vreg0<def> = COPY %XZR; GPR64common:%vreg0
+ // %0 = COPY %xzr; GPR64common:%0
//
// In this case we can still safely fold away the COPY and generate the
// following spill code:
//
- // STRXui %XZR, <fi#0>
+ // STRXui %xzr, %stack.0
//
// This also eliminates spilled cross register class COPYs (e.g. between x and
// d regs) of the same size. For example:
//
- // %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
+ // %0 = COPY %1; GPR64:%0, FPR64:%1
//
// will be filled as
//
- // LDRDui %vreg0, fi<#0>
+ // LDRDui %0, fi<#0>
//
// instead of
//
- // LDRXui %vregTemp, fi<#0>
- // %vreg0 = FMOV %vregTemp
+ // LDRXui %Temp, fi<#0>
+ // %0 = FMOV %Temp
//
if (MI.isCopy() && Ops.size() == 1 &&
// Make sure we're only folding the explicit COPY defs/uses.
@@ -2773,7 +2874,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
- TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
+ TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
"Mismatched register size in non subreg COPY");
if (IsSpill)
storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
@@ -2786,12 +2887,12 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
// Handle cases like spilling def of:
//
- // %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0
+ // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
//
// where the physical register source can be widened and stored to the full
// virtual reg destination stack slot, in this case producing:
//
- // STRXui %XZR, <fi#0>
+ // STRXui %xzr, %stack.0
//
if (IsSpill && DstMO.isUndef() &&
TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
@@ -2834,12 +2935,12 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
// Handle cases like filling use of:
//
- // %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1
+ // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
//
// where we can load the full virtual reg source stack slot, into the subreg
// destination, in this case producing:
//
- // LDRWui %vreg0:sub_32<def,read-undef>, <fi#0>
+ // LDRWui %0:sub_32<def,read-undef>, %stack.0
//
if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
const TargetRegisterClass *FillRC;
@@ -3156,10 +3257,7 @@ void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
}
// AArch64 supports MachineCombiner.
-bool AArch64InstrInfo::useMachineCombiner() const {
-
- return true;
-}
+bool AArch64InstrInfo::useMachineCombiner() const { return true; }
// True when Opc sets flag
static bool isCombineInstrSettingFlag(unsigned Opc) {
@@ -3293,7 +3391,8 @@ static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
// 1. Other data types (integer, vectors)
// 2. Other math / logic operations (xor, or)
// 3. Other forms of the same operation (intrinsics and other variants)
-bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
+bool AArch64InstrInfo::isAssociativeAndCommutative(
+ const MachineInstr &Inst) const {
switch (Inst.getOpcode()) {
case AArch64::FADDDrr:
case AArch64::FADDSrr:
@@ -3574,6 +3673,15 @@ static bool getFMAPatterns(MachineInstr &Root,
}
break;
case AArch64::FSUBv2f32:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2f32)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
+ Found = true;
+ }
if (canCombineWithFMUL(MBB, Root.getOperand(2),
AArch64::FMULv2i32_indexed)) {
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
@@ -3585,6 +3693,15 @@ static bool getFMAPatterns(MachineInstr &Root,
}
break;
case AArch64::FSUBv2f64:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2i64_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv2f64)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
+ Found = true;
+ }
if (canCombineWithFMUL(MBB, Root.getOperand(2),
AArch64::FMULv2i64_indexed)) {
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
@@ -3596,6 +3713,15 @@ static bool getFMAPatterns(MachineInstr &Root,
}
break;
case AArch64::FSUBv4f32:
+ if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv4i32_indexed)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
+ Found = true;
+ } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
+ AArch64::FMULv4f32)) {
+ Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
+ Found = true;
+ }
if (canCombineWithFMUL(MBB, Root.getOperand(2),
AArch64::FMULv4i32_indexed)) {
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
@@ -3613,8 +3739,8 @@ static bool getFMAPatterns(MachineInstr &Root,
/// Return true when a code sequence can improve throughput. It
/// should be called only for instructions in loops.
/// \param Pattern - combiner pattern
-bool
-AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
+bool AArch64InstrInfo::isThroughputPattern(
+ MachineCombinerPattern Pattern) const {
switch (Pattern) {
default:
break;
@@ -3692,12 +3818,15 @@ enum class FMAInstKind { Default, Indexed, Accumulator };
/// \param MaddOpc the opcode fo the f|madd instruction
/// \param RC Register class of operands
/// \param kind of fma instruction (addressing mode) to be generated
+/// \param ReplacedAddend is the result register from the instruction
+/// replacing the non-combined operand, if any.
static MachineInstr *
genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
const TargetInstrInfo *TII, MachineInstr &Root,
SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
unsigned MaddOpc, const TargetRegisterClass *RC,
- FMAInstKind kind = FMAInstKind::Default) {
+ FMAInstKind kind = FMAInstKind::Default,
+ const unsigned *ReplacedAddend = nullptr) {
assert(IdxMulOpd == 1 || IdxMulOpd == 2);
unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
@@ -3707,8 +3836,17 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
bool Src0IsKill = MUL->getOperand(1).isKill();
unsigned SrcReg1 = MUL->getOperand(2).getReg();
bool Src1IsKill = MUL->getOperand(2).isKill();
- unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
- bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
+
+ unsigned SrcReg2;
+ bool Src2IsKill;
+ if (ReplacedAddend) {
+ // If we just generated a new addend, we must be it's only use.
+ SrcReg2 = *ReplacedAddend;
+ Src2IsKill = true;
+ } else {
+ SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
+ Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
+ }
if (TargetRegisterInfo::isVirtualRegister(ResultReg))
MRI.constrainRegClass(ResultReg, RC);
@@ -3765,8 +3903,8 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
const TargetInstrInfo *TII, MachineInstr &Root,
SmallVectorImpl<MachineInstr *> &InsInstrs,
- unsigned IdxMulOpd, unsigned MaddOpc,
- unsigned VR, const TargetRegisterClass *RC) {
+ unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
+ const TargetRegisterClass *RC) {
assert(IdxMulOpd == 1 || IdxMulOpd == 2);
MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
@@ -3785,11 +3923,11 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
if (TargetRegisterInfo::isVirtualRegister(VR))
MRI.constrainRegClass(VR, RC);
- MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
- ResultReg)
- .addReg(SrcReg0, getKillRegState(Src0IsKill))
- .addReg(SrcReg1, getKillRegState(Src1IsKill))
- .addReg(VR);
+ MachineInstrBuilder MIB =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
+ .addReg(SrcReg0, getKillRegState(Src0IsKill))
+ .addReg(SrcReg1, getKillRegState(Src1IsKill))
+ .addReg(VR);
// Insert the MADD
InsInstrs.push_back(MIB);
return MUL;
@@ -4228,6 +4366,66 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
FMAInstKind::Accumulator);
}
break;
+ case MachineCombinerPattern::FMLSv2f32_OP1:
+ case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
+ RC = &AArch64::FPR64RegClass;
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
+ .add(Root.getOperand(2));
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
+ Opc = AArch64::FMLAv2i32_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed, &NewVR);
+ } else {
+ Opc = AArch64::FMLAv2f32;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator, &NewVR);
+ }
+ break;
+ }
+ case MachineCombinerPattern::FMLSv4f32_OP1:
+ case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
+ RC = &AArch64::FPR128RegClass;
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
+ .add(Root.getOperand(2));
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
+ Opc = AArch64::FMLAv4i32_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed, &NewVR);
+ } else {
+ Opc = AArch64::FMLAv4f32;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator, &NewVR);
+ }
+ break;
+ }
+ case MachineCombinerPattern::FMLSv2f64_OP1:
+ case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
+ RC = &AArch64::FPR128RegClass;
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
+ .add(Root.getOperand(2));
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
+ Opc = AArch64::FMLAv2i64_indexed;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Indexed, &NewVR);
+ } else {
+ Opc = AArch64::FMLAv2f64;
+ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
+ FMAInstKind::Accumulator, &NewVR);
+ }
+ break;
+ }
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
DelInstrs.push_back(MUL);
@@ -4419,12 +4617,9 @@ AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
using namespace AArch64II;
static const std::pair<unsigned, const char *> TargetFlags[] = {
- {MO_PAGE, "aarch64-page"},
- {MO_PAGEOFF, "aarch64-pageoff"},
- {MO_G3, "aarch64-g3"},
- {MO_G2, "aarch64-g2"},
- {MO_G1, "aarch64-g1"},
- {MO_G0, "aarch64-g0"},
+ {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
+ {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
+ {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
{MO_HI12, "aarch64-hi12"}};
return makeArrayRef(TargetFlags);
}
@@ -4434,9 +4629,7 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
using namespace AArch64II;
static const std::pair<unsigned, const char *> TargetFlags[] = {
- {MO_GOT, "aarch64-got"},
- {MO_NC, "aarch64-nc"},
- {MO_TLS, "aarch64-tls"}};
+ {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
return makeArrayRef(TargetFlags);
}
@@ -4448,30 +4641,148 @@ AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
-unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize,
- size_t Occurrences,
- bool CanBeTailCall) const {
- unsigned NotOutlinedSize = SequenceSize * Occurrences;
- unsigned OutlinedSize;
-
- // Is this candidate something we can outline as a tail call?
- if (CanBeTailCall) {
- // If yes, then we just outline the sequence and replace each of its
- // occurrences with a branch instruction.
- OutlinedSize = SequenceSize + Occurrences;
- } else {
- // If no, then we outline the sequence (SequenceSize), add a return (+1),
- // and replace each occurrence with a save/restore to LR and a call
- // (3 * Occurrences)
- OutlinedSize = (SequenceSize + 1) + (3 * Occurrences);
+ /// Constants defining how certain sequences should be outlined.
+ /// This encompasses how an outlined function should be called, and what kind of
+ /// frame should be emitted for that outlined function.
+ ///
+ /// \p MachineOutlinerDefault implies that the function should be called with
+ /// a save and restore of LR to the stack.
+ ///
+ /// That is,
+ ///
+ /// I1 Save LR OUTLINED_FUNCTION:
+ /// I2 --> BL OUTLINED_FUNCTION I1
+ /// I3 Restore LR I2
+ /// I3
+ /// RET
+ ///
+ /// * Call construction overhead: 3 (save + BL + restore)
+ /// * Frame construction overhead: 1 (ret)
+ /// * Requires stack fixups? Yes
+ ///
+ /// \p MachineOutlinerTailCall implies that the function is being created from
+ /// a sequence of instructions ending in a return.
+ ///
+ /// That is,
+ ///
+ /// I1 OUTLINED_FUNCTION:
+ /// I2 --> B OUTLINED_FUNCTION I1
+ /// RET I2
+ /// RET
+ ///
+ /// * Call construction overhead: 1 (B)
+ /// * Frame construction overhead: 0 (Return included in sequence)
+ /// * Requires stack fixups? No
+ ///
+ /// \p MachineOutlinerNoLRSave implies that the function should be called using
+ /// a BL instruction, but doesn't require LR to be saved and restored. This
+ /// happens when LR is known to be dead.
+ ///
+ /// That is,
+ ///
+ /// I1 OUTLINED_FUNCTION:
+ /// I2 --> BL OUTLINED_FUNCTION I1
+ /// I3 I2
+ /// I3
+ /// RET
+ ///
+ /// * Call construction overhead: 1 (BL)
+ /// * Frame construction overhead: 1 (RET)
+ /// * Requires stack fixups? No
+ ///
+enum MachineOutlinerClass {
+ MachineOutlinerDefault, /// Emit a save, restore, call, and return.
+ MachineOutlinerTailCall, /// Only emit a branch.
+ MachineOutlinerNoLRSave /// Emit a call and return.
+};
+
+bool AArch64InstrInfo::canOutlineWithoutLRSave(
+ MachineBasicBlock::iterator &CallInsertionPt) const {
+ // Was LR saved in the function containing this basic block?
+ MachineBasicBlock &MBB = *(CallInsertionPt->getParent());
+ LiveRegUnits LRU(getRegisterInfo());
+ LRU.addLiveOuts(MBB);
+
+ // Get liveness information from the end of the block to the end of the
+ // prospective outlined region.
+ std::for_each(MBB.rbegin(),
+ (MachineBasicBlock::reverse_iterator)CallInsertionPt,
+ [&LRU](MachineInstr &MI) { LRU.stepBackward(MI); });
+
+ // If the link register is available at this point, then we can safely outline
+ // the region without saving/restoring LR. Otherwise, we must emit a save and
+ // restore.
+ return LRU.available(AArch64::LR);
+}
+
+AArch64GenInstrInfo::MachineOutlinerInfo
+AArch64InstrInfo::getOutlininingCandidateInfo(
+ std::vector<
+ std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
+ &RepeatedSequenceLocs) const {
+
+ unsigned CallID = MachineOutlinerDefault;
+ unsigned FrameID = MachineOutlinerDefault;
+ unsigned NumInstrsForCall = 3;
+ unsigned NumInstrsToCreateFrame = 1;
+
+ auto DoesntNeedLRSave =
+ [this](std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>
+ &I) { return canOutlineWithoutLRSave(I.second); };
+
+ // If the last instruction in any candidate is a terminator, then we should
+ // tail call all of the candidates.
+ if (RepeatedSequenceLocs[0].second->isTerminator()) {
+ CallID = MachineOutlinerTailCall;
+ FrameID = MachineOutlinerTailCall;
+ NumInstrsForCall = 1;
+ NumInstrsToCreateFrame = 0;
+ }
+
+ else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
+ DoesntNeedLRSave)) {
+ CallID = MachineOutlinerNoLRSave;
+ FrameID = MachineOutlinerNoLRSave;
+ NumInstrsForCall = 1;
+ NumInstrsToCreateFrame = 1;
}
- // Return the number of instructions saved by outlining this sequence.
- return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0;
+ // Check if the range contains a call. These require a save + restore of the
+ // link register.
+ if (std::any_of(RepeatedSequenceLocs[0].first, RepeatedSequenceLocs[0].second,
+ [](const MachineInstr &MI) { return MI.isCall(); }))
+ NumInstrsToCreateFrame += 2; // Save + restore the link register.
+
+ // Handle the last instruction separately. If this is a tail call, then the
+ // last instruction is a call. We don't want to save + restore in this case.
+ // However, it could be possible that the last instruction is a call without
+ // it being valid to tail call this sequence. We should consider this as well.
+ else if (RepeatedSequenceLocs[0].second->isCall() &&
+ FrameID != MachineOutlinerTailCall)
+ NumInstrsToCreateFrame += 2;
+
+ return MachineOutlinerInfo(NumInstrsForCall, NumInstrsToCreateFrame, CallID,
+ FrameID);
}
-bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
- return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
+bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
+ MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
+ const Function &F = MF.getFunction();
+
+ // If F uses a redzone, then don't outline from it because it might mess up
+ // the stack.
+ if (!F.hasFnAttribute(Attribute::NoRedZone))
+ return false;
+
+ // If anyone is using the address of this function, don't outline from it.
+ if (F.hasAddressTaken())
+ return false;
+
+ // Can F be deduplicated by the linker? If it can, don't outline from it.
+ if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
+ return false;
+
+ return true;
}
AArch64GenInstrInfo::MachineOutlinerInstrType
@@ -4493,54 +4804,121 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const {
// Is this the end of a function?
if (MI.getParent()->succ_empty())
- return MachineOutlinerInstrType::Legal;
+ return MachineOutlinerInstrType::Legal;
// It's not, so don't outline it.
return MachineOutlinerInstrType::Illegal;
}
+ // Outline calls without stack parameters or aggregate parameters.
+ if (MI.isCall()) {
+ const Module *M = MF->getFunction().getParent();
+ assert(M && "No module?");
+
+ // Get the function associated with the call. Look at each operand and find
+ // the one that represents the callee and get its name.
+ Function *Callee = nullptr;
+ for (const MachineOperand &MOP : MI.operands()) {
+ if (MOP.isSymbol()) {
+ Callee = M->getFunction(MOP.getSymbolName());
+ break;
+ }
+
+ else if (MOP.isGlobal()) {
+ Callee = M->getFunction(MOP.getGlobal()->getGlobalIdentifier());
+ break;
+ }
+ }
+
+ // Only handle functions that we have information about.
+ if (!Callee)
+ return MachineOutlinerInstrType::Illegal;
+
+ // We have a function we have information about. Check it if it's something
+ // can safely outline.
+
+ // If the callee is vararg, it passes parameters on the stack. Don't touch
+ // it.
+ // FIXME: Functions like printf are very common and we should be able to
+ // outline them.
+ if (Callee->isVarArg())
+ return MachineOutlinerInstrType::Illegal;
+
+ // Check if any of the arguments are a pointer to a struct. We don't want
+ // to outline these since they might be loaded in two instructions.
+ for (Argument &Arg : Callee->args()) {
+ if (Arg.getType()->isPointerTy() &&
+ Arg.getType()->getPointerElementType()->isAggregateType())
+ return MachineOutlinerInstrType::Illegal;
+ }
+
+ // If the thing we're calling doesn't access memory at all, then we're good
+ // to go.
+ if (Callee->doesNotAccessMemory())
+ return MachineOutlinerInstrType::Legal;
+
+ // It accesses memory. Get the machine function for the callee to see if
+ // it's safe to outline.
+ MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
+
+ // We don't know what's going on with the callee at all. Don't touch it.
+ if (!CalleeMF)
+ return MachineOutlinerInstrType::Illegal;
+
+ // Does it pass anything on the stack? If it does, don't outline it.
+ if (CalleeMF->getInfo<AArch64FunctionInfo>()->getBytesInStackArgArea() != 0)
+ return MachineOutlinerInstrType::Illegal;
+
+ // It doesn't, so it's safe to outline and we're done.
+ return MachineOutlinerInstrType::Legal;
+ }
+
// Don't outline positions.
if (MI.isPosition())
return MachineOutlinerInstrType::Illegal;
+ // Don't touch the link register or W30.
+ if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
+ MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
+ return MachineOutlinerInstrType::Illegal;
+
// Make sure none of the operands are un-outlinable.
- for (const MachineOperand &MOP : MI.operands())
+ for (const MachineOperand &MOP : MI.operands()) {
if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
MOP.isTargetIndex())
return MachineOutlinerInstrType::Illegal;
- // Don't outline anything that uses the link register.
- if (MI.modifiesRegister(AArch64::LR, &RI) ||
- MI.readsRegister(AArch64::LR, &RI))
+ // Don't outline anything that uses the link register.
+ if (MOP.isReg() && getRegisterInfo().regsOverlap(MOP.getReg(), AArch64::LR))
return MachineOutlinerInstrType::Illegal;
+ }
// Does this use the stack?
if (MI.modifiesRegister(AArch64::SP, &RI) ||
MI.readsRegister(AArch64::SP, &RI)) {
- // Is it a memory operation?
if (MI.mayLoadOrStore()) {
- unsigned Base; // Filled with the base regiser of MI.
+ unsigned Base; // Filled with the base regiser of MI.
int64_t Offset; // Filled with the offset of MI.
unsigned DummyWidth;
// Does it allow us to offset the base register and is the base SP?
if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
- Base != AArch64::SP)
+ Base != AArch64::SP)
return MachineOutlinerInstrType::Illegal;
// Find the minimum/maximum offset for this instruction and check if
// fixing it up would be in range.
- int64_t MinOffset, MaxOffset;
- unsigned DummyScale;
- getMemOpInfo(MI.getOpcode(), DummyScale, DummyWidth, MinOffset,
- MaxOffset);
+ int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
+ unsigned Scale; // The scale to multiply the offsets by.
+ getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
// TODO: We should really test what happens if an instruction overflows.
// This is tricky to test with IR tests, but when the outliner is moved
// to a MIR test, it really ought to be checked.
- if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset)
- return MachineOutlinerInstrType::Illegal;
+ Offset += 16; // Update the offset to what it would be if we outlined.
+ if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
+ return MachineOutlinerInstrType::Illegal;
// It's in range, so we can outline it.
return MachineOutlinerInstrType::Legal;
@@ -4576,17 +4954,57 @@ void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
// We've pushed the return address to the stack, so add 16 to the offset.
// This is safe, since we already checked if it would overflow when we
// checked if this instruction was legal to outline.
- int64_t NewImm = (Offset + 16)/Scale;
+ int64_t NewImm = (Offset + 16) / Scale;
StackOffsetOperand.setImm(NewImm);
}
}
-void AArch64InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
- MachineFunction &MF,
- bool IsTailCall) const {
+void AArch64InstrInfo::insertOutlinerEpilogue(
+ MachineBasicBlock &MBB, MachineFunction &MF,
+ const MachineOutlinerInfo &MInfo) const {
+
+ bool ContainsCalls = false;
+
+ for (MachineInstr &MI : MBB) {
+ if (MI.isCall()) {
+ ContainsCalls = true;
+ break;
+ }
+ }
+
+ if (ContainsCalls) {
+ // Fix up the instructions in the range, since we're going to modify the
+ // stack.
+ fixupPostOutline(MBB);
+
+ // LR has to be a live in so that we can save it.
+ MBB.addLiveIn(AArch64::LR);
+
+ MachineBasicBlock::iterator It = MBB.begin();
+ MachineBasicBlock::iterator Et = MBB.end();
+
+ if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
+ Et = std::prev(MBB.end());
+
+ // Insert a save before the outlined region
+ MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP)
+ .addImm(-16);
+ It = MBB.insert(It, STRXpre);
+
+ // Insert a restore before the terminator for the function.
+ MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR, RegState::Define)
+ .addReg(AArch64::SP)
+ .addImm(16);
+ Et = MBB.insert(Et, LDRXpost);
+ }
// If this is a tail call outlined function, then there's already a return.
- if (IsTailCall)
+ if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
return;
// It's not a tail call, so we have to insert the return ourselves.
@@ -4594,29 +5012,40 @@ void AArch64InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
.addReg(AArch64::LR, RegState::Undef);
MBB.insert(MBB.end(), ret);
+ // Did we have to modify the stack by saving the link register?
+ if (MInfo.FrameConstructionID == MachineOutlinerNoLRSave)
+ return;
+
+ // We modified the stack.
// Walk over the basic block and fix up all the stack accesses.
fixupPostOutline(MBB);
}
-void AArch64InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB,
- MachineFunction &MF,
- bool IsTailCall) const {}
+void AArch64InstrInfo::insertOutlinerPrologue(
+ MachineBasicBlock &MBB, MachineFunction &MF,
+ const MachineOutlinerInfo &MInfo) const {}
MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
- MachineFunction &MF, bool IsTailCall) const {
+ MachineFunction &MF, const MachineOutlinerInfo &MInfo) const {
// Are we tail calling?
- if (IsTailCall) {
+ if (MInfo.CallConstructionID == MachineOutlinerTailCall) {
// If yes, then we can just branch to the label.
- It = MBB.insert(It,
- BuildMI(MF, DebugLoc(), get(AArch64::B))
- .addGlobalAddress(M.getNamedValue(MF.getName())));
+ It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::B))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
return It;
}
- // We're not tail calling, so we have to save LR before the call and restore
- // it after.
+ // Are we saving the link register?
+ if (MInfo.CallConstructionID == MachineOutlinerNoLRSave) {
+ // No, so just insert the call.
+ It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
+ return It;
+ }
+
+ // We have a default call. Save the link register.
MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
.addReg(AArch64::SP, RegState::Define)
.addReg(AArch64::LR)
@@ -4626,20 +5055,18 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
It++;
// Insert the call.
- It = MBB.insert(It,
- BuildMI(MF, DebugLoc(), get(AArch64::BL))
- .addGlobalAddress(M.getNamedValue(MF.getName())));
+ It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
It++;
// Restore the link register.
MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
.addReg(AArch64::SP, RegState::Define)
- .addReg(AArch64::LR)
+ .addReg(AArch64::LR, RegState::Define)
.addReg(AArch64::SP)
.addImm(16);
It = MBB.insert(It, LDRXpost);
return It;
-}
-
+} \ No newline at end of file
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 1765a0263ea4..2f10bef1e474 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -17,7 +17,7 @@
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "AArch64GenInstrInfo.inc"
@@ -136,37 +136,88 @@ public:
default:
llvm_unreachable("Opcode has no flag setting equivalent!");
// 32-bit cases:
- case AArch64::ADDWri: Is64Bit = false; return AArch64::ADDSWri;
- case AArch64::ADDWrr: Is64Bit = false; return AArch64::ADDSWrr;
- case AArch64::ADDWrs: Is64Bit = false; return AArch64::ADDSWrs;
- case AArch64::ADDWrx: Is64Bit = false; return AArch64::ADDSWrx;
- case AArch64::ANDWri: Is64Bit = false; return AArch64::ANDSWri;
- case AArch64::ANDWrr: Is64Bit = false; return AArch64::ANDSWrr;
- case AArch64::ANDWrs: Is64Bit = false; return AArch64::ANDSWrs;
- case AArch64::BICWrr: Is64Bit = false; return AArch64::BICSWrr;
- case AArch64::BICWrs: Is64Bit = false; return AArch64::BICSWrs;
- case AArch64::SUBWri: Is64Bit = false; return AArch64::SUBSWri;
- case AArch64::SUBWrr: Is64Bit = false; return AArch64::SUBSWrr;
- case AArch64::SUBWrs: Is64Bit = false; return AArch64::SUBSWrs;
- case AArch64::SUBWrx: Is64Bit = false; return AArch64::SUBSWrx;
+ case AArch64::ADDWri:
+ Is64Bit = false;
+ return AArch64::ADDSWri;
+ case AArch64::ADDWrr:
+ Is64Bit = false;
+ return AArch64::ADDSWrr;
+ case AArch64::ADDWrs:
+ Is64Bit = false;
+ return AArch64::ADDSWrs;
+ case AArch64::ADDWrx:
+ Is64Bit = false;
+ return AArch64::ADDSWrx;
+ case AArch64::ANDWri:
+ Is64Bit = false;
+ return AArch64::ANDSWri;
+ case AArch64::ANDWrr:
+ Is64Bit = false;
+ return AArch64::ANDSWrr;
+ case AArch64::ANDWrs:
+ Is64Bit = false;
+ return AArch64::ANDSWrs;
+ case AArch64::BICWrr:
+ Is64Bit = false;
+ return AArch64::BICSWrr;
+ case AArch64::BICWrs:
+ Is64Bit = false;
+ return AArch64::BICSWrs;
+ case AArch64::SUBWri:
+ Is64Bit = false;
+ return AArch64::SUBSWri;
+ case AArch64::SUBWrr:
+ Is64Bit = false;
+ return AArch64::SUBSWrr;
+ case AArch64::SUBWrs:
+ Is64Bit = false;
+ return AArch64::SUBSWrs;
+ case AArch64::SUBWrx:
+ Is64Bit = false;
+ return AArch64::SUBSWrx;
// 64-bit cases:
- case AArch64::ADDXri: Is64Bit = true; return AArch64::ADDSXri;
- case AArch64::ADDXrr: Is64Bit = true; return AArch64::ADDSXrr;
- case AArch64::ADDXrs: Is64Bit = true; return AArch64::ADDSXrs;
- case AArch64::ADDXrx: Is64Bit = true; return AArch64::ADDSXrx;
- case AArch64::ANDXri: Is64Bit = true; return AArch64::ANDSXri;
- case AArch64::ANDXrr: Is64Bit = true; return AArch64::ANDSXrr;
- case AArch64::ANDXrs: Is64Bit = true; return AArch64::ANDSXrs;
- case AArch64::BICXrr: Is64Bit = true; return AArch64::BICSXrr;
- case AArch64::BICXrs: Is64Bit = true; return AArch64::BICSXrs;
- case AArch64::SUBXri: Is64Bit = true; return AArch64::SUBSXri;
- case AArch64::SUBXrr: Is64Bit = true; return AArch64::SUBSXrr;
- case AArch64::SUBXrs: Is64Bit = true; return AArch64::SUBSXrs;
- case AArch64::SUBXrx: Is64Bit = true; return AArch64::SUBSXrx;
+ case AArch64::ADDXri:
+ Is64Bit = true;
+ return AArch64::ADDSXri;
+ case AArch64::ADDXrr:
+ Is64Bit = true;
+ return AArch64::ADDSXrr;
+ case AArch64::ADDXrs:
+ Is64Bit = true;
+ return AArch64::ADDSXrs;
+ case AArch64::ADDXrx:
+ Is64Bit = true;
+ return AArch64::ADDSXrx;
+ case AArch64::ANDXri:
+ Is64Bit = true;
+ return AArch64::ANDSXri;
+ case AArch64::ANDXrr:
+ Is64Bit = true;
+ return AArch64::ANDSXrr;
+ case AArch64::ANDXrs:
+ Is64Bit = true;
+ return AArch64::ANDSXrs;
+ case AArch64::BICXrr:
+ Is64Bit = true;
+ return AArch64::BICSXrr;
+ case AArch64::BICXrs:
+ Is64Bit = true;
+ return AArch64::BICSXrs;
+ case AArch64::SUBXri:
+ Is64Bit = true;
+ return AArch64::SUBSXri;
+ case AArch64::SUBXrr:
+ Is64Bit = true;
+ return AArch64::SUBSXrr;
+ case AArch64::SUBXrs:
+ Is64Bit = true;
+ return AArch64::SUBSXrs;
+ case AArch64::SUBXrx:
+ Is64Bit = true;
+ return AArch64::SUBSXrx;
}
}
-
/// Return true if this is a load/store that can be potentially paired/merged.
bool isCandidateToMergeOrPair(MachineInstr &MI) const;
@@ -191,13 +242,10 @@ public:
bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
int64_t &MinOffset, int64_t &MaxOffset) const;
- bool shouldClusterMemOps(MachineInstr &FirstLdSt, MachineInstr &SecondLdSt,
+ bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
+ MachineInstr &SecondLdSt, unsigned BaseReg2,
unsigned NumLoads) const override;
- MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
- uint64_t Offset, const MDNode *Var,
- const MDNode *Expr,
- const DebugLoc &DL) const;
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc, unsigned Opcode,
@@ -275,9 +323,9 @@ public:
/// Return true when there is potentially a faster code sequence
/// for an instruction chain ending in ``Root``. All potential patterns are
/// listed in the ``Patterns`` array.
- bool getMachineCombinerPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns)
- const override;
+ bool getMachineCombinerPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const override;
/// Return true when Inst is associative and commutative so that it can be
/// reassociated.
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
@@ -302,27 +350,32 @@ public:
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
getSerializableMachineMemOperandTargetFlags() const override;
- bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;
- unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences,
- bool CanBeTailCall) const override;
+ bool
+ canOutlineWithoutLRSave(MachineBasicBlock::iterator &CallInsertionPt) const;
+ bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
+ bool OutlineFromLinkOnceODRs) const override;
+ MachineOutlinerInfo getOutlininingCandidateInfo(
+ std::vector<
+ std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
+ &RepeatedSequenceLocs) const override;
AArch64GenInstrInfo::MachineOutlinerInstrType
getOutliningType(MachineInstr &MI) const override;
- void insertOutlinerEpilogue(MachineBasicBlock &MBB,
- MachineFunction &MF,
- bool IsTailCall) const override;
- void insertOutlinerPrologue(MachineBasicBlock &MBB,
- MachineFunction &MF,
- bool isTailCall) const override;
+ void insertOutlinerEpilogue(MachineBasicBlock &MBB, MachineFunction &MF,
+ const MachineOutlinerInfo &MInfo) const override;
+ void insertOutlinerPrologue(MachineBasicBlock &MBB, MachineFunction &MF,
+ const MachineOutlinerInfo &MInfo) const override;
MachineBasicBlock::iterator
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &It,
- MachineFunction &MF,
- bool IsTailCall) const override;
+ MachineBasicBlock::iterator &It, MachineFunction &MF,
+ const MachineOutlinerInfo &MInfo) const override;
+ /// Returns true if the instruction has a shift left that can be executed
+ /// more efficiently.
+ bool isExynosShiftLeftFast(const MachineInstr &MI) const;
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
bool isFalkorShiftExtFast(const MachineInstr &MI) const;
-private:
+private:
/// \brief Sets the offsets on outlined instructions in \p MBB which use SP
/// so that they will be valid post-outlining.
///
@@ -350,8 +403,8 @@ void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
/// FP. Return false if the offset could not be handled directly in MI, and
/// return the left-over portion by reference.
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
- const AArch64InstrInfo *TII);
+ unsigned FrameReg, int &Offset,
+ const AArch64InstrInfo *TII);
/// \brief Use to report the frame offset status in isAArch64FrameOffsetLegal.
enum AArch64FrameOffsetStatus {
@@ -375,9 +428,9 @@ enum AArch64FrameOffsetStatus {
/// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that
/// is a legal offset.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
- bool *OutUseUnscaledOp = nullptr,
- unsigned *OutUnscaledOp = nullptr,
- int *EmittableOffset = nullptr);
+ bool *OutUseUnscaledOp = nullptr,
+ unsigned *OutUnscaledOp = nullptr,
+ int *EmittableOffset = nullptr);
static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; }
@@ -398,7 +451,9 @@ static inline bool isCondBranchOpcode(int Opc) {
}
}
-static inline bool isIndirectBranchOpcode(int Opc) { return Opc == AArch64::BR; }
+static inline bool isIndirectBranchOpcode(int Opc) {
+ return Opc == AArch64::BR;
+}
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 59719978a3a6..79826ca2ed8d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -18,12 +18,16 @@ def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
+def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
+ AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON", "neon">;
def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
+def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
+ AssemblerPredicate<"FeatureDotProd", "dotprod">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
def HasLSE : Predicate<"Subtarget->hasLSE()">,
@@ -42,6 +46,8 @@ def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
"fuse-aes">;
def HasSVE : Predicate<"Subtarget->hasSVE()">,
AssemblerPredicate<"FeatureSVE", "sve">;
+def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
+ AssemblerPredicate<"FeatureRCPC", "rcpc">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
@@ -322,11 +328,14 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
// the Function object through the <Target>Subtarget and objections were raised
// to that (see post-commit review comments for r301750).
let RecomputePerFunction = 1 in {
- def ForCodeSize : Predicate<"MF->getFunction()->optForSize()">;
- def NotForCodeSize : Predicate<"!MF->getFunction()->optForSize()">;
+ def ForCodeSize : Predicate<"MF->getFunction().optForSize()">;
+ def NotForCodeSize : Predicate<"!MF->getFunction().optForSize()">;
+ // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
+ def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().optForSize()">;
}
include "AArch64InstrFormats.td"
+include "SVEInstrFormats.td"
//===----------------------------------------------------------------------===//
@@ -432,6 +441,108 @@ def ISB : CRmSystemI<barrier_op, 0b110, "isb",
[(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
}
+// ARMv8.2 Dot Product
+let Predicates = [HasDotProd] in {
+def UDOT2S : BaseSIMDThreeSameVectorDot<0, 1, "udot", ".2s", ".8b">;
+def SDOT2S : BaseSIMDThreeSameVectorDot<0, 0, "sdot", ".2s", ".8b">;
+def UDOT4S : BaseSIMDThreeSameVectorDot<1, 1, "udot", ".4s", ".16b">;
+def SDOT4S : BaseSIMDThreeSameVectorDot<1, 0, "sdot", ".4s", ".16b">;
+def UDOTIDX2S : BaseSIMDThreeSameVectorDotIndex<0, 1, "udot", ".2s", ".8b", ".4b">;
+def SDOTIDX2S : BaseSIMDThreeSameVectorDotIndex<0, 0, "sdot", ".2s", ".8b", ".4b">;
+def UDOTIDX4S : BaseSIMDThreeSameVectorDotIndex<1, 1, "udot", ".4s", ".16b", ".4b">;
+def SDOTIDX4S : BaseSIMDThreeSameVectorDotIndex<1, 0, "sdot", ".4s", ".16b", ".4b">;
+}
+
+let Predicates = [HasRCPC] in {
+ // v8.3 Release Consistent Processor Consistent support, optional in v8.2.
+ def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>;
+ def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>;
+ def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>;
+ def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>;
+}
+
+// v8.3a complex add and multiply-accumulate. No predicate here, that is done
+// inside the multiclass as the FP16 versions need different predicates.
+defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
+ "fcmla", null_frag>;
+defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
+ "fcadd", null_frag>;
+defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla",
+ null_frag>;
+
+let Predicates = [HasV8_3a] in {
+ // v8.3a Pointer Authentication
+ let Uses = [LR], Defs = [LR] in {
+ def PACIAZ : SystemNoOperands<0b000, "paciaz">;
+ def PACIBZ : SystemNoOperands<0b010, "pacibz">;
+ def AUTIAZ : SystemNoOperands<0b100, "autiaz">;
+ def AUTIBZ : SystemNoOperands<0b110, "autibz">;
+ }
+ let Uses = [LR, SP], Defs = [LR] in {
+ def PACIASP : SystemNoOperands<0b001, "paciasp">;
+ def PACIBSP : SystemNoOperands<0b011, "pacibsp">;
+ def AUTIASP : SystemNoOperands<0b101, "autiasp">;
+ def AUTIBSP : SystemNoOperands<0b111, "autibsp">;
+ }
+ let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
+ def PACIA1716 : SystemNoOperands<0b000, "pacia1716">;
+ def PACIB1716 : SystemNoOperands<0b010, "pacib1716">;
+ def AUTIA1716 : SystemNoOperands<0b100, "autia1716">;
+ def AUTIB1716 : SystemNoOperands<0b110, "autib1716">;
+ }
+
+ let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
+ def XPACLRI : SystemNoOperands<0b111, "xpaclri">;
+ }
+
+ multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm> {
+ def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia")>;
+ def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib")>;
+ def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da")>;
+ def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db")>;
+ def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza")>;
+ def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza")>;
+ def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb")>;
+ def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb")>;
+ }
+
+ defm PAC : SignAuth<0b000, 0b010, "pac">;
+ defm AUT : SignAuth<0b001, 0b011, "aut">;
+
+ def XPACI : SignAuthZero<0b100, 0b00, "xpaci">;
+ def XPACD : SignAuthZero<0b100, 0b01, "xpacd">;
+ def PACGA : SignAuthTwoOperand<0b1100, "pacga", null_frag>;
+
+ // Combined Instructions
+ def BRAA : AuthBranchTwoOperands<0, 0, "braa">;
+ def BRAB : AuthBranchTwoOperands<0, 1, "brab">;
+ def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">;
+ def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">;
+
+ def BRAAZ : AuthOneOperand<0b000, 0, "braaz">;
+ def BRABZ : AuthOneOperand<0b000, 1, "brabz">;
+ def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">;
+ def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">;
+
+ let isReturn = 1 in {
+ def RETAA : AuthReturn<0b010, 0, "retaa">;
+ def RETAB : AuthReturn<0b010, 1, "retab">;
+ def ERETAA : AuthReturn<0b100, 0, "eretaa">;
+ def ERETAB : AuthReturn<0b100, 1, "eretab">;
+ }
+
+ defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>;
+ defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>;
+
+ // v8.3a floating point conversion for javascript
+ let Predicates = [HasV8_3a, HasFPARMv8] in
+ def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
+ "fjcvtzs", []> {
+ let Inst{31} = 0;
+ }
+
+} // HasV8_3A
+
def : InstAlias<"clrex", (CLREX 0xf)>;
def : InstAlias<"isb", (ISB 0xf)>;
@@ -468,8 +579,8 @@ let PostEncoderMethod = "fixMOVZ" in
defm MOVZ : MoveImmediate<0b10, "movz">;
// First group of aliases covers an implicit "lsl #0".
-def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0)>;
+def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0), 0>;
+def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0), 0>;
def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>;
def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>;
def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>;
@@ -486,10 +597,10 @@ def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0)>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48), 0>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32), 0>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16), 0>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0), 0>;
def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
@@ -497,8 +608,8 @@ def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0)>;
+def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16), 0>;
+def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0), 0>;
// Final group of aliases covers true "mov $Rd, $imm" cases.
multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
@@ -2039,6 +2150,17 @@ defm STRS : Store32RO<0b10, 1, 0b00, FPR32, "str", f32, store>;
defm STRD : Store64RO<0b11, 1, 0b00, FPR64, "str", f64, store>;
defm STRQ : Store128RO<0b00, 1, 0b10, FPR128, "str", f128, store>;
+let Predicates = [UseSTRQro], AddedComplexity = 10 in {
+ def : Pat<(store (f128 FPR128:$Rt),
+ (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend128:$extend)),
+ (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
+ def : Pat<(store (f128 FPR128:$Rt),
+ (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend128:$extend)),
+ (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
+}
+
multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
Instruction STRW, Instruction STRX> {
@@ -2086,7 +2208,7 @@ defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
// Match all store 128 bits width whose type is compatible with FPR128
-let Predicates = [IsLE] in {
+let Predicates = [IsLE, UseSTRQro] in {
// We must use ST1 to store vectors in big-endian.
defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
@@ -2127,11 +2249,11 @@ let AddedComplexity = 19 in {
//---
// (unsigned immediate)
-defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str",
- [(store GPR64:$Rt,
+defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str",
+ [(store GPR64z:$Rt,
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
-defm STRW : StoreUI<0b10, 0, 0b00, GPR32, uimm12s4, "str",
- [(store GPR32:$Rt,
+defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
+ [(store GPR32z:$Rt,
(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
defm STRB : StoreUI<0b00, 1, 0b00, FPR8, uimm12s1, "str",
[(store FPR8:$Rt,
@@ -2147,12 +2269,12 @@ defm STRD : StoreUI<0b11, 1, 0b00, FPR64, uimm12s8, "str",
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
defm STRQ : StoreUI<0b00, 1, 0b10, FPR128, uimm12s16, "str", []>;
-defm STRHH : StoreUI<0b01, 0, 0b00, GPR32, uimm12s2, "strh",
- [(truncstorei16 GPR32:$Rt,
+defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh",
+ [(truncstorei16 GPR32z:$Rt,
(am_indexed16 GPR64sp:$Rn,
uimm12s2:$offset))]>;
-defm STRBB : StoreUI<0b00, 0, 0b00, GPR32, uimm12s1, "strb",
- [(truncstorei8 GPR32:$Rt,
+defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb",
+ [(truncstorei8 GPR32z:$Rt,
(am_indexed8 GPR64sp:$Rn,
uimm12s1:$offset))]>;
@@ -2590,6 +2712,8 @@ defm FMOV : UnscaledConversion<"fmov">;
// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
+def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
+ Sched<[WriteF]>;
def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
Sched<[WriteF]>;
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
@@ -4393,20 +4517,20 @@ def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
-def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
-def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
-def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
-def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
-def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
-def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
-def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
-def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
// AdvSIMD FMOV
def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
@@ -6151,3 +6275,4 @@ def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
(TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
include "AArch64InstrAtomics.td"
+include "AArch64SVEInstrInfo.td"
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 7e275e4d2f46..c2d3ae31c624 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -20,6 +20,7 @@
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -33,14 +34,8 @@
#define DEBUG_TYPE "aarch64-isel"
-#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
-
using namespace llvm;
-#ifndef LLVM_BUILD_GLOBAL_ISEL
-#error "You shouldn't build this"
-#endif
-
namespace {
#define GET_GLOBALISEL_PREDICATE_BITSET
@@ -53,12 +48,13 @@ public:
const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI);
- bool select(MachineInstr &I) const override;
+ bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
+ static const char *getName() { return DEBUG_TYPE; }
private:
/// tblgen-erated 'select' implementation, used as the initial selector for
/// the patterns that don't require complex C++.
- bool selectImpl(MachineInstr &I) const;
+ bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
@@ -68,7 +64,33 @@ private:
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
- ComplexRendererFn selectArithImmed(MachineOperand &Root) const;
+ ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
+
+ ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
+ unsigned Size) const;
+
+ ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
+ return selectAddrModeUnscaled(Root, 1);
+ }
+ ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
+ return selectAddrModeUnscaled(Root, 2);
+ }
+ ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
+ return selectAddrModeUnscaled(Root, 4);
+ }
+ ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
+ return selectAddrModeUnscaled(Root, 8);
+ }
+ ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
+ return selectAddrModeUnscaled(Root, 16);
+ }
+
+ ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
+ unsigned Size) const;
+ template <int Width>
+ ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
+ return selectAddrModeIndexed(Root, Width / 8);
+ }
const AArch64TargetMachine &TM;
const AArch64Subtarget &STI;
@@ -321,7 +343,9 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
const TargetRegisterClass *RC = nullptr;
if (RegBank.getID() == AArch64::FPRRegBankID) {
- if (DstSize <= 32)
+ if (DstSize <= 16)
+ RC = &AArch64::FPR16RegClass;
+ else if (DstSize <= 32)
RC = &AArch64::FPR32RegClass;
else if (DstSize <= 64)
RC = &AArch64::FPR64RegClass;
@@ -513,6 +537,8 @@ bool AArch64InstructionSelector::selectCompareBranch(
const unsigned CondReg = I.getOperand(0).getReg();
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
MachineInstr *CCMI = MRI.getVRegDef(CondReg);
+ if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
+ CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
return false;
@@ -583,7 +609,8 @@ bool AArch64InstructionSelector::selectVaStartDarwin(
return true;
}
-bool AArch64InstructionSelector::select(MachineInstr &I) const {
+bool AArch64InstructionSelector::select(MachineInstr &I,
+ CodeGenCoverage &CoverageInfo) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -592,13 +619,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned Opcode = I.getOpcode();
- if (!isPreISelGenericOpcode(I.getOpcode())) {
+ // G_PHI requires same handling as PHI
+ if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
// Certain non-generic instructions also need some special handling.
if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- if (Opcode == TargetOpcode::PHI) {
+ if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
const unsigned DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI.getType(DefReg);
@@ -623,6 +651,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
}
}
}
+ I.setDesc(TII.get(TargetOpcode::PHI));
return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
}
@@ -639,7 +668,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
return false;
}
- if (selectImpl(I))
+ if (selectImpl(I, CoverageInfo))
return true;
LLT Ty =
@@ -703,8 +732,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
<< " constant on bank: " << RB << ", expected: FPR\n");
return false;
}
+
+ // The case when we have 0.0 is covered by tablegen. Reject it here so we
+ // can be sure tablegen works correctly and isn't rescued by this code.
+ if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
+ return false;
} else {
- if (Ty != s32 && Ty != s64 && Ty != p0) {
+ // s32 and s64 are covered by tablegen.
+ if (Ty != p0) {
DEBUG(dbgs() << "Unable to materialize integer " << Ty
<< " constant, expected: " << s32 << ", " << s64 << ", or "
<< p0 << '\n');
@@ -758,7 +793,55 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
+ case TargetOpcode::G_EXTRACT: {
+ LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
+ // Larger extracts are vectors, same-size extracts should be something else
+ // by now (either split up or simplified to a COPY).
+ if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
+ return false;
+
+ I.setDesc(TII.get(AArch64::UBFMXri));
+ MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
+ Ty.getSizeInBits() - 1);
+
+ unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(),
+ TII.get(AArch64::COPY))
+ .addDef(I.getOperand(0).getReg())
+ .addUse(DstReg, 0, AArch64::sub_32);
+ RBI.constrainGenericRegister(I.getOperand(0).getReg(),
+ AArch64::GPR32RegClass, MRI);
+ I.getOperand(0).setReg(DstReg);
+
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+ case TargetOpcode::G_INSERT: {
+ LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
+ // Larger inserts are vectors, same-size ones should be something else by
+ // now (split up or turned into COPYs).
+ if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
+ return false;
+
+ I.setDesc(TII.get(AArch64::BFMXri));
+ unsigned LSB = I.getOperand(3).getImm();
+ unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
+ I.getOperand(3).setImm((64 - LSB) % 64);
+ MachineInstrBuilder(MF, I).addImm(Width - 1);
+
+ unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
+ TII.get(AArch64::SUBREG_TO_REG))
+ .addDef(SrcReg)
+ .addImm(0)
+ .addUse(I.getOperand(2).getReg())
+ .addImm(AArch64::sub_32);
+ RBI.constrainGenericRegister(I.getOperand(2).getReg(),
+ AArch64::GPR32RegClass, MRI);
+ I.getOperand(2).setReg(SrcReg);
+
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
case TargetOpcode::G_FRAME_INDEX: {
// allocas and G_FRAME_INDEX are only supported in addrspace(0).
if (Ty != LLT::pointer(0, 64)) {
@@ -766,7 +849,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
<< ", expected: " << LLT::pointer(0, 64) << '\n');
return false;
}
-
I.setDesc(TII.get(AArch64::ADDXri));
// MOs for a #0 shifted immediate.
@@ -1117,62 +1199,18 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
case TargetOpcode::G_INTTOPTR:
- case TargetOpcode::G_BITCAST:
+ // The importer is currently unable to import pointer types since they
+ // didn't exist in SelectionDAG.
return selectCopy(I, TII, MRI, TRI, RBI);
- case TargetOpcode::G_FPEXT: {
- if (MRI.getType(I.getOperand(0).getReg()) != LLT::scalar(64)) {
- DEBUG(dbgs() << "G_FPEXT to type " << Ty
- << ", expected: " << LLT::scalar(64) << '\n');
- return false;
- }
-
- if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(32)) {
- DEBUG(dbgs() << "G_FPEXT from type " << Ty
- << ", expected: " << LLT::scalar(32) << '\n');
- return false;
- }
-
- const unsigned DefReg = I.getOperand(0).getReg();
- const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
-
- if (RB.getID() != AArch64::FPRRegBankID) {
- DEBUG(dbgs() << "G_FPEXT on bank: " << RB << ", expected: FPR\n");
- return false;
- }
-
- I.setDesc(TII.get(AArch64::FCVTDSr));
- constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-
- return true;
- }
-
- case TargetOpcode::G_FPTRUNC: {
- if (MRI.getType(I.getOperand(0).getReg()) != LLT::scalar(32)) {
- DEBUG(dbgs() << "G_FPTRUNC to type " << Ty
- << ", expected: " << LLT::scalar(32) << '\n');
- return false;
- }
-
- if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(64)) {
- DEBUG(dbgs() << "G_FPTRUNC from type " << Ty
- << ", expected: " << LLT::scalar(64) << '\n');
- return false;
- }
-
- const unsigned DefReg = I.getOperand(0).getReg();
- const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
-
- if (RB.getID() != AArch64::FPRRegBankID) {
- DEBUG(dbgs() << "G_FPTRUNC on bank: " << RB << ", expected: FPR\n");
- return false;
- }
-
- I.setDesc(TII.get(AArch64::FCVTSDr));
- constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-
- return true;
- }
+ case TargetOpcode::G_BITCAST:
+ // Imported SelectionDAG rules can handle every bitcast except those that
+ // bitcast from a type to the same type. Ideally, these shouldn't occur
+ // but we might not run an optimizer that deletes them.
+ if (MRI.getType(I.getOperand(0).getReg()) ==
+ MRI.getType(I.getOperand(1).getReg()))
+ return selectCopy(I, TII, MRI, TRI, RBI);
+ return false;
case TargetOpcode::G_SELECT: {
if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
@@ -1214,9 +1252,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
return true;
}
case TargetOpcode::G_ICMP: {
- if (Ty != LLT::scalar(1)) {
+ if (Ty != LLT::scalar(32)) {
DEBUG(dbgs() << "G_ICMP result has type: " << Ty
- << ", expected: " << LLT::scalar(1) << '\n');
+ << ", expected: " << LLT::scalar(32) << '\n');
return false;
}
@@ -1261,9 +1299,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
}
case TargetOpcode::G_FCMP: {
- if (Ty != LLT::scalar(1)) {
+ if (Ty != LLT::scalar(32)) {
DEBUG(dbgs() << "G_FCMP result has type: " << Ty
- << ", expected: " << LLT::scalar(1) << '\n');
+ << ", expected: " << LLT::scalar(32) << '\n');
return false;
}
@@ -1336,7 +1374,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
/// SelectArithImmed - Select an immediate value that can be represented as
/// a 12-bit value shifted left by either 0 or 12. If so, return true with
/// Val set to the 12-bit value and Shift set to the shifter operand.
-InstructionSelector::ComplexRendererFn
+InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
MachineInstr &MI = *Root.getParent();
MachineBasicBlock &MBB = *MI.getParent();
@@ -1356,13 +1394,13 @@ AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
else if (Root.isReg()) {
MachineInstr *Def = MRI.getVRegDef(Root.getReg());
if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
- return nullptr;
+ return None;
MachineOperand &Op1 = Def->getOperand(1);
if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
- return nullptr;
+ return None;
Immed = Op1.getCImm()->getZExtValue();
} else
- return nullptr;
+ return None;
unsigned ShiftAmt;
@@ -1372,10 +1410,116 @@ AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
ShiftAmt = 12;
Immed = Immed >> 12;
} else
- return nullptr;
+ return None;
unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
- return [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed).addImm(ShVal); };
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
+ }};
+}
+
+/// Select a "register plus unscaled signed 9-bit immediate" address. This
+/// should only match when there is an offset that is not valid for a scaled
+/// immediate addressing mode. The "Size" argument is the size in bytes of the
+/// memory reference, which is needed here to know what is valid for a scaled
+/// immediate.
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
+ unsigned Size) const {
+ MachineRegisterInfo &MRI =
+ Root.getParent()->getParent()->getParent()->getRegInfo();
+
+ if (!Root.isReg())
+ return None;
+
+ if (!isBaseWithConstantOffset(Root, MRI))
+ return None;
+
+ MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
+ if (!RootDef)
+ return None;
+
+ MachineOperand &OffImm = RootDef->getOperand(2);
+ if (!OffImm.isReg())
+ return None;
+ MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
+ if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
+ return None;
+ int64_t RHSC;
+ MachineOperand &RHSOp1 = RHS->getOperand(1);
+ if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
+ return None;
+ RHSC = RHSOp1.getCImm()->getSExtValue();
+
+ // If the offset is valid as a scaled immediate, don't match here.
+ if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
+ return None;
+ if (RHSC >= -256 && RHSC < 256) {
+ MachineOperand &Base = RootDef->getOperand(1);
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
+ }};
+ }
+ return None;
+}
+
+/// Select a "register plus scaled unsigned 12-bit immediate" address. The
+/// "Size" argument is the size in bytes of the memory reference, which
+/// determines the scale.
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
+ unsigned Size) const {
+ MachineRegisterInfo &MRI =
+ Root.getParent()->getParent()->getParent()->getRegInfo();
+
+ if (!Root.isReg())
+ return None;
+
+ MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
+ if (!RootDef)
+ return None;
+
+ if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
+ }};
+ }
+
+ if (isBaseWithConstantOffset(Root, MRI)) {
+ MachineOperand &LHS = RootDef->getOperand(1);
+ MachineOperand &RHS = RootDef->getOperand(2);
+ MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
+ MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
+ if (LHSDef && RHSDef) {
+ int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
+ unsigned Scale = Log2_32(Size);
+ if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
+ if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
+ }};
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
+ }};
+ }
+ }
+ }
+
+ // Before falling back to our general case, check if the unscaled
+ // instructions can handle this. If so, that's preferable.
+ if (selectAddrModeUnscaled(Root, Size).hasValue())
+ return None;
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
+ }};
}
namespace llvm {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index ffb27834c31c..05df51202229 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -13,21 +13,122 @@
//===----------------------------------------------------------------------===//
#include "AArch64LegalizerInfo.h"
+#include "AArch64Subtarget.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
-#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
-#ifndef LLVM_BUILD_GLOBAL_ISEL
-#error "You shouldn't build this"
-#endif
+/// FIXME: The following static functions are SizeChangeStrategy functions
+/// that are meant to temporarily mimic the behaviour of the old legalization
+/// based on doubling/halving non-legal types as closely as possible. This is
+/// not entirly possible as only legalizing the types that are exactly a power
+/// of 2 times the size of the legal types would require specifying all those
+/// sizes explicitly.
+/// In practice, not specifying those isn't a problem, and the below functions
+/// should disappear quickly as we add support for legalizing non-power-of-2
+/// sized types further.
+static void
+addAndInterleaveWithUnsupported(LegalizerInfo::SizeAndActionsVec &result,
+ const LegalizerInfo::SizeAndActionsVec &v) {
+ for (unsigned i = 0; i < v.size(); ++i) {
+ result.push_back(v[i]);
+ if (i + 1 < v[i].first && i + 1 < v.size() &&
+ v[i + 1].first != v[i].first + 1)
+ result.push_back({v[i].first + 1, LegalizerInfo::Unsupported});
+ }
+}
+
+static LegalizerInfo::SizeAndActionsVec
+widen_1_narrow_128_ToLargest(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 2);
+ LegalizerInfo::SizeAndActionsVec result = {{1, LegalizerInfo::WidenScalar},
+ {2, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ assert(Largest + 1 < 128);
+ result.push_back({Largest + 1, LegalizerInfo::Unsupported});
+ result.push_back({128, LegalizerInfo::NarrowScalar});
+ result.push_back({129, LegalizerInfo::Unsupported});
+ return result;
+}
+
+static LegalizerInfo::SizeAndActionsVec
+widen_16(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 17);
+ LegalizerInfo::SizeAndActionsVec result = {{1, LegalizerInfo::Unsupported},
+ {16, LegalizerInfo::WidenScalar},
+ {17, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ result.push_back({Largest + 1, LegalizerInfo::Unsupported});
+ return result;
+}
+
+static LegalizerInfo::SizeAndActionsVec
+widen_1_8(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 9);
+ LegalizerInfo::SizeAndActionsVec result = {
+ {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported},
+ {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ result.push_back({Largest + 1, LegalizerInfo::Unsupported});
+ return result;
+}
+
+static LegalizerInfo::SizeAndActionsVec
+widen_1_8_16(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 17);
+ LegalizerInfo::SizeAndActionsVec result = {
+ {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported},
+ {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported},
+ {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ result.push_back({Largest + 1, LegalizerInfo::Unsupported});
+ return result;
+}
-AArch64LegalizerInfo::AArch64LegalizerInfo() {
+static LegalizerInfo::SizeAndActionsVec
+widen_1_8_16_narrowToLargest(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 17);
+ LegalizerInfo::SizeAndActionsVec result = {
+ {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported},
+ {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported},
+ {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ result.push_back({Largest + 1, LegalizerInfo::NarrowScalar});
+ return result;
+}
+
+static LegalizerInfo::SizeAndActionsVec
+widen_1_8_16_32(const LegalizerInfo::SizeAndActionsVec &v) {
+ assert(v.size() >= 1);
+ assert(v[0].first > 33);
+ LegalizerInfo::SizeAndActionsVec result = {
+ {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported},
+ {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported},
+ {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported},
+ {32, LegalizerInfo::WidenScalar}, {33, LegalizerInfo::Unsupported}};
+ addAndInterleaveWithUnsupported(result, v);
+ auto Largest = result.back().first;
+ result.push_back({Largest + 1, LegalizerInfo::Unsupported});
+ return result;
+}
+
+AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
using namespace TargetOpcode;
const LLT p0 = LLT::pointer(0, 64);
const LLT s1 = LLT::scalar(1);
@@ -35,6 +136,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
+ const LLT s128 = LLT::scalar(128);
const LLT v2s32 = LLT::vector(2, 32);
const LLT v4s32 = LLT::vector(4, 32);
const LLT v2s64 = LLT::vector(2, 64);
@@ -42,21 +144,29 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
for (auto Ty : {p0, s1, s8, s16, s32, s64})
setAction({G_IMPLICIT_DEF, Ty}, Legal);
+ for (auto Ty : {s16, s32, s64, p0})
+ setAction({G_PHI, Ty}, Legal);
+
+ setLegalizeScalarToDifferentSizeStrategy(G_PHI, 0, widen_1_8);
+
+ for (auto Ty : { s32, s64 })
+ setAction({G_BSWAP, Ty}, Legal);
+
for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) {
// These operations naturally get the right answer when used on
// GPR32, even if the actual type is narrower.
for (auto Ty : {s32, s64, v2s32, v4s32, v2s64})
setAction({BinOp, Ty}, Legal);
- for (auto Ty : {s1, s8, s16})
- setAction({BinOp, Ty}, WidenScalar);
+ if (BinOp != G_ADD)
+ setLegalizeScalarToDifferentSizeStrategy(BinOp, 0,
+ widen_1_8_16_narrowToLargest);
}
setAction({G_GEP, p0}, Legal);
setAction({G_GEP, 1, s64}, Legal);
- for (auto Ty : {s1, s8, s16, s32})
- setAction({G_GEP, 1, Ty}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(G_GEP, 1, widen_1_8_16_32);
setAction({G_PTR_MASK, p0}, Legal);
@@ -64,16 +174,17 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
for (auto Ty : {s32, s64})
setAction({BinOp, Ty}, Legal);
- for (auto Ty : {s1, s8, s16})
- setAction({BinOp, Ty}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(BinOp, 0, widen_1_8_16);
}
for (unsigned BinOp : {G_SREM, G_UREM})
for (auto Ty : { s1, s8, s16, s32, s64 })
setAction({BinOp, Ty}, Lower);
- for (unsigned Op : {G_SMULO, G_UMULO})
- setAction({Op, s64}, Lower);
+ for (unsigned Op : {G_SMULO, G_UMULO}) {
+ setAction({Op, 0, s64}, Lower);
+ setAction({Op, 1, s1}, Legal);
+ }
for (unsigned Op : {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_SMULH, G_UMULH}) {
for (auto Ty : { s32, s64 })
@@ -95,8 +206,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_INSERT, Ty}, Legal);
setAction({G_INSERT, 1, Ty}, Legal);
}
+ setLegalizeScalarToDifferentSizeStrategy(G_INSERT, 0,
+ widen_1_8_16_narrowToLargest);
for (auto Ty : {s1, s8, s16}) {
- setAction({G_INSERT, Ty}, WidenScalar);
setAction({G_INSERT, 1, Ty}, Legal);
// FIXME: Can't widen the sources because that violates the constraints on
// G_INSERT (It seems entirely reasonable that inputs shouldn't overlap).
@@ -112,7 +224,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
for (auto Ty : {s8, s16, s32, s64, p0, v2s32})
setAction({MemOp, Ty}, Legal);
- setAction({MemOp, s1}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(MemOp, 0,
+ widen_1_narrow_128_ToLargest);
// And everything's fine in addrspace 0.
setAction({MemOp, 1, p0}, Legal);
@@ -126,21 +239,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_CONSTANT, p0}, Legal);
- for (auto Ty : {s1, s8, s16})
- setAction({TargetOpcode::G_CONSTANT, Ty}, WidenScalar);
-
- setAction({TargetOpcode::G_FCONSTANT, s16}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(G_CONSTANT, 0, widen_1_8_16);
+ setLegalizeScalarToDifferentSizeStrategy(G_FCONSTANT, 0, widen_16);
- setAction({G_ICMP, s1}, Legal);
setAction({G_ICMP, 1, s32}, Legal);
setAction({G_ICMP, 1, s64}, Legal);
setAction({G_ICMP, 1, p0}, Legal);
- for (auto Ty : {s1, s8, s16}) {
- setAction({G_ICMP, 1, Ty}, WidenScalar);
- }
+ setLegalizeScalarToDifferentSizeStrategy(G_ICMP, 0, widen_1_8_16);
+ setLegalizeScalarToDifferentSizeStrategy(G_FCMP, 0, widen_1_8_16);
+ setLegalizeScalarToDifferentSizeStrategy(G_ICMP, 1, widen_1_8_16);
- setAction({G_FCMP, s1}, Legal);
+ setAction({G_ICMP, s32}, Legal);
+ setAction({G_FCMP, s32}, Legal);
setAction({G_FCMP, 1, s32}, Legal);
setAction({G_FCMP, 1, s64}, Legal);
@@ -151,27 +262,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_ANYEXT, Ty}, Legal);
}
- for (auto Ty : { s1, s8, s16, s32 }) {
- setAction({G_ZEXT, 1, Ty}, Legal);
- setAction({G_SEXT, 1, Ty}, Legal);
- setAction({G_ANYEXT, 1, Ty}, Legal);
- }
-
- setAction({G_FPEXT, s64}, Legal);
- setAction({G_FPEXT, 1, s32}, Legal);
-
- // Truncations
- for (auto Ty : { s16, s32 })
+ // FP conversions
+ for (auto Ty : { s16, s32 }) {
setAction({G_FPTRUNC, Ty}, Legal);
+ setAction({G_FPEXT, 1, Ty}, Legal);
+ }
- for (auto Ty : { s32, s64 })
+ for (auto Ty : { s32, s64 }) {
setAction({G_FPTRUNC, 1, Ty}, Legal);
-
- for (auto Ty : { s1, s8, s16, s32 })
- setAction({G_TRUNC, Ty}, Legal);
-
- for (auto Ty : { s8, s16, s32, s64 })
- setAction({G_TRUNC, 1, Ty}, Legal);
+ setAction({G_FPEXT, Ty}, Legal);
+ }
// Conversions
for (auto Ty : { s32, s64 }) {
@@ -180,12 +280,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_SITOFP, 1, Ty}, Legal);
setAction({G_UITOFP, 1, Ty}, Legal);
}
- for (auto Ty : { s1, s8, s16 }) {
- setAction({G_FPTOSI, 0, Ty}, WidenScalar);
- setAction({G_FPTOUI, 0, Ty}, WidenScalar);
- setAction({G_SITOFP, 1, Ty}, WidenScalar);
- setAction({G_UITOFP, 1, Ty}, WidenScalar);
- }
+ setLegalizeScalarToDifferentSizeStrategy(G_FPTOSI, 0, widen_1_8_16);
+ setLegalizeScalarToDifferentSizeStrategy(G_FPTOUI, 0, widen_1_8_16);
+ setLegalizeScalarToDifferentSizeStrategy(G_SITOFP, 1, widen_1_8_16);
+ setLegalizeScalarToDifferentSizeStrategy(G_UITOFP, 1, widen_1_8_16);
for (auto Ty : { s32, s64 }) {
setAction({G_FPTOSI, 1, Ty}, Legal);
@@ -200,8 +298,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_BRINDIRECT, p0}, Legal);
// Select
- for (auto Ty : {s1, s8, s16})
- setAction({G_SELECT, Ty}, WidenScalar);
+ setLegalizeScalarToDifferentSizeStrategy(G_SELECT, 0, widen_1_8_16);
for (auto Ty : {s32, s64, p0})
setAction({G_SELECT, Ty}, Legal);
@@ -221,7 +318,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_INTTOPTR, 1, s64}, Legal);
// Casts for 32 and 64-bit width type are just copies.
- for (auto Ty : {s1, s8, s16, s32, s64}) {
+ // Same for 128-bit width type, except they are on the FPR bank.
+ for (auto Ty : {s1, s8, s16, s32, s64, s128}) {
setAction({G_BITCAST, 0, Ty}, Legal);
setAction({G_BITCAST, 1, Ty}, Legal);
}
@@ -252,6 +350,41 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
for (auto Ty : {s8, s16, s32, s64, p0})
setAction({G_VAARG, Ty}, Custom);
+ if (ST.hasLSE()) {
+ for (auto Ty : {s8, s16, s32, s64}) {
+ setAction({G_ATOMIC_CMPXCHG_WITH_SUCCESS, Ty}, Lower);
+ setAction({G_ATOMIC_CMPXCHG, Ty}, Legal);
+ }
+ setAction({G_ATOMIC_CMPXCHG, 1, p0}, Legal);
+
+ for (unsigned Op :
+ {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
+ G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
+ G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX}) {
+ for (auto Ty : {s8, s16, s32, s64}) {
+ setAction({Op, Ty}, Legal);
+ }
+ setAction({Op, 1, p0}, Legal);
+ }
+ }
+
+ // Merge/Unmerge
+ for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES})
+ for (int Sz : {8, 16, 32, 64, 128, 192, 256, 384, 512}) {
+ LLT ScalarTy = LLT::scalar(Sz);
+ setAction({Op, ScalarTy}, Legal);
+ setAction({Op, 1, ScalarTy}, Legal);
+ if (Sz < 32)
+ continue;
+ for (int EltSize = 8; EltSize <= 64; EltSize *= 2) {
+ if (EltSize >= Sz)
+ continue;
+ LLT VecTy = LLT::vector(Sz / EltSize, EltSize);
+ setAction({Op, VecTy}, Legal);
+ setAction({Op, 1, VecTy}, Legal);
+ }
+ }
+
computeTables();
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
index 42d4ac130c5c..a745b0edbc6d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
@@ -20,11 +20,12 @@
namespace llvm {
class LLVMContext;
+class AArch64Subtarget;
/// This class provides the information for the target register banks.
class AArch64LegalizerInfo : public LegalizerInfo {
public:
- AArch64LegalizerInfo();
+ AArch64LegalizerInfo(const AArch64Subtarget &ST);
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const override;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 9a7f45bde6c9..8a29456430b9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1,4 +1,4 @@
-//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=//
+//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,12 +20,14 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -33,7 +35,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -64,7 +65,7 @@ static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
namespace {
-typedef struct LdStPairFlags {
+using LdStPairFlags = struct LdStPairFlags {
// If a matching instruction is found, MergeForward is set to true if the
// merge is to remove the first instruction and replace the second with
// a pair-wise insn, and false if the reverse is true.
@@ -83,8 +84,7 @@ typedef struct LdStPairFlags {
void setSExtIdx(int V) { SExtIdx = V; }
int getSExtIdx() const { return SExtIdx; }
-
-} LdStPairFlags;
+};
struct AArch64LoadStoreOpt : public MachineFunctionPass {
static char ID;
@@ -101,7 +101,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Track which registers have been modified and used.
BitVector ModifiedRegs, UsedRegs;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AAResultsWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -168,6 +168,9 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Find and promote load instructions which read directly from store.
bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
+ // Find and merge a base register updates before or after a ld/st instruction.
+ bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
+
bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -578,6 +581,75 @@ static bool isPromotableZeroStoreInst(MachineInstr &MI) {
getLdStRegOp(MI).getReg() == AArch64::WZR;
}
+static bool isPromotableLoadFromStore(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ // Scaled instructions.
+ case AArch64::LDRBBui:
+ case AArch64::LDRHHui:
+ case AArch64::LDRWui:
+ case AArch64::LDRXui:
+ // Unscaled instructions.
+ case AArch64::LDURBBi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURWi:
+ case AArch64::LDURXi:
+ return true;
+ }
+}
+
+static bool isMergeableLdStUpdate(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ default:
+ return false;
+ // Scaled instructions.
+ case AArch64::STRSui:
+ case AArch64::STRDui:
+ case AArch64::STRQui:
+ case AArch64::STRXui:
+ case AArch64::STRWui:
+ case AArch64::STRHHui:
+ case AArch64::STRBBui:
+ case AArch64::LDRSui:
+ case AArch64::LDRDui:
+ case AArch64::LDRQui:
+ case AArch64::LDRXui:
+ case AArch64::LDRWui:
+ case AArch64::LDRHHui:
+ case AArch64::LDRBBui:
+ // Unscaled instructions.
+ case AArch64::STURSi:
+ case AArch64::STURDi:
+ case AArch64::STURQi:
+ case AArch64::STURWi:
+ case AArch64::STURXi:
+ case AArch64::LDURSi:
+ case AArch64::LDURDi:
+ case AArch64::LDURQi:
+ case AArch64::LDURWi:
+ case AArch64::LDURXi:
+ // Paired instructions.
+ case AArch64::LDPSi:
+ case AArch64::LDPSWi:
+ case AArch64::LDPDi:
+ case AArch64::LDPQi:
+ case AArch64::LDPWi:
+ case AArch64::LDPXi:
+ case AArch64::STPSi:
+ case AArch64::STPDi:
+ case AArch64::STPQi:
+ case AArch64::STPWi:
+ case AArch64::STPXi:
+ // Make sure this is a reg+imm (as opposed to an address reloc).
+ if (!getLdStOffsetOp(MI).isImm())
+ return false;
+
+ return true;
+ }
+}
+
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator MergeMI,
@@ -758,8 +830,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
if (SExtIdx != -1) {
// Generate the sign extension for the proper result of the ldp.
// I.e., with X1, that would be:
- // %W1<def> = KILL %W1, %X1<imp-def>
- // %X1<def> = SBFMXri %X1<kill>, 0, 31
+ // %w1 = KILL %w1, implicit-def %x1
+ // %x1 = SBFMXri killed %x1, 0, 31
MachineOperand &DstMO = MIB->getOperand(SExtIdx);
// Right now, DstMO has the extended register, since it comes from an
// extended opcode.
@@ -1294,10 +1366,13 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
}
(void)MIB;
- if (IsPreIdx)
+ if (IsPreIdx) {
+ ++NumPreFolded;
DEBUG(dbgs() << "Creating pre-indexed load/store.");
- else
+ } else {
+ ++NumPostFolded;
DEBUG(dbgs() << "Creating post-indexed load/store.");
+ }
DEBUG(dbgs() << " Replacing instructions:\n ");
DEBUG(I->print(dbgs()));
DEBUG(dbgs() << " ");
@@ -1558,6 +1633,60 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
return false;
}
+bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
+ (MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock::iterator E = MI.getParent()->end();
+ MachineBasicBlock::iterator Update;
+
+ // Look forward to try to form a post-index instruction. For example,
+ // ldr x0, [x20]
+ // add x20, x20, #32
+ // merged into:
+ // ldr x0, [x20], #32
+ Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
+ if (Update != E) {
+ // Merge the update into the ld/st.
+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
+ return true;
+ }
+
+ // Don't know how to handle unscaled pre/post-index versions below, so bail.
+ if (TII->isUnscaledLdSt(MI.getOpcode()))
+ return false;
+
+ // Look back to try to find a pre-index instruction. For example,
+ // add x0, x0, #8
+ // ldr x1, [x0]
+ // merged into:
+ // ldr x1, [x0, #8]!
+ Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
+ if (Update != E) {
+ // Merge the update into the ld/st.
+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
+ return true;
+ }
+
+ // The immediate in the load/store is scaled by the size of the memory
+ // operation. The immediate in the add we're looking for,
+ // however, is not, so adjust here.
+ int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
+
+ // Look forward to try to find a post-index instruction. For example,
+ // ldr x1, [x0, #64]
+ // add x0, x0, #64
+ // merged into:
+ // ldr x1, [x0, #64]!
+ Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
+ if (Update != E) {
+ // Merge the update into the ld/st.
+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
+ return true;
+ }
+
+ return false;
+}
+
bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
bool EnableNarrowZeroStOpt) {
bool Modified = false;
@@ -1573,29 +1702,10 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
// lsr w2, w1, #16
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
- MachineInstr &MI = *MBBI;
- switch (MI.getOpcode()) {
- default:
- // Just move on to the next instruction.
- ++MBBI;
- break;
- // Scaled instructions.
- case AArch64::LDRBBui:
- case AArch64::LDRHHui:
- case AArch64::LDRWui:
- case AArch64::LDRXui:
- // Unscaled instructions.
- case AArch64::LDURBBi:
- case AArch64::LDURHHi:
- case AArch64::LDURWi:
- case AArch64::LDURXi:
- if (tryToPromoteLoadFromStore(MBBI)) {
- Modified = true;
- break;
- }
+ if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
+ Modified = true;
+ else
++MBBI;
- break;
- }
}
// 2) Merge adjacent zero stores into a wider store.
// e.g.,
@@ -1608,17 +1718,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
// str wzr, [x0, #4]
// ; becomes
// str xzr, [x0]
- for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- EnableNarrowZeroStOpt && MBBI != E;) {
- if (isPromotableZeroStoreInst(*MBBI)) {
- if (tryToMergeZeroStInst(MBBI)) {
+ if (EnableNarrowZeroStOpt)
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MBBI != E;) {
+ if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
Modified = true;
- } else
+ else
++MBBI;
- } else
- ++MBBI;
- }
-
+ }
// 3) Find loads and stores that can be merged into a single load or store
// pair instruction.
// e.g.,
@@ -1642,124 +1749,17 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
// ldr x0, [x2], #4
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
- MachineInstr &MI = *MBBI;
- // Do update merging. It's simpler to keep this separate from the above
- // switchs, though not strictly necessary.
- unsigned Opc = MI.getOpcode();
- switch (Opc) {
- default:
- // Just move on to the next instruction.
- ++MBBI;
- break;
- // Scaled instructions.
- case AArch64::STRSui:
- case AArch64::STRDui:
- case AArch64::STRQui:
- case AArch64::STRXui:
- case AArch64::STRWui:
- case AArch64::STRHHui:
- case AArch64::STRBBui:
- case AArch64::LDRSui:
- case AArch64::LDRDui:
- case AArch64::LDRQui:
- case AArch64::LDRXui:
- case AArch64::LDRWui:
- case AArch64::LDRHHui:
- case AArch64::LDRBBui:
- // Unscaled instructions.
- case AArch64::STURSi:
- case AArch64::STURDi:
- case AArch64::STURQi:
- case AArch64::STURWi:
- case AArch64::STURXi:
- case AArch64::LDURSi:
- case AArch64::LDURDi:
- case AArch64::LDURQi:
- case AArch64::LDURWi:
- case AArch64::LDURXi:
- // Paired instructions.
- case AArch64::LDPSi:
- case AArch64::LDPSWi:
- case AArch64::LDPDi:
- case AArch64::LDPQi:
- case AArch64::LDPWi:
- case AArch64::LDPXi:
- case AArch64::STPSi:
- case AArch64::STPDi:
- case AArch64::STPQi:
- case AArch64::STPWi:
- case AArch64::STPXi: {
- // Make sure this is a reg+imm (as opposed to an address reloc).
- if (!getLdStOffsetOp(MI).isImm()) {
- ++MBBI;
- break;
- }
- // Look forward to try to form a post-index instruction. For example,
- // ldr x0, [x20]
- // add x20, x20, #32
- // merged into:
- // ldr x0, [x20], #32
- MachineBasicBlock::iterator Update =
- findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
- if (Update != E) {
- // Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
- Modified = true;
- ++NumPostFolded;
- break;
- }
-
- // Don't know how to handle unscaled pre/post-index versions below, so
- // move to the next instruction.
- if (TII->isUnscaledLdSt(Opc)) {
- ++MBBI;
- break;
- }
-
- // Look back to try to find a pre-index instruction. For example,
- // add x0, x0, #8
- // ldr x1, [x0]
- // merged into:
- // ldr x1, [x0, #8]!
- Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
- if (Update != E) {
- // Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
- Modified = true;
- ++NumPreFolded;
- break;
- }
- // The immediate in the load/store is scaled by the size of the memory
- // operation. The immediate in the add we're looking for,
- // however, is not, so adjust here.
- int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
-
- // Look forward to try to find a post-index instruction. For example,
- // ldr x1, [x0, #64]
- // add x0, x0, #64
- // merged into:
- // ldr x1, [x0, #64]!
- Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
- if (Update != E) {
- // Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
- Modified = true;
- ++NumPreFolded;
- break;
- }
-
- // Nothing found. Just move to the next instruction.
+ if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI))
+ Modified = true;
+ else
++MBBI;
- break;
- }
- }
}
return Modified;
}
bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- if (skipFunction(*Fn.getFunction()))
+ if (skipFunction(Fn.getFunction()))
return false;
Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
index f82b9dbc2c9f..65dae03a24db 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -18,7 +18,9 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CodeGen.h"
@@ -33,7 +35,25 @@ AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer)
MCSymbol *
AArch64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
- return Printer.getSymbol(MO.getGlobal());
+ const GlobalValue *GV = MO.getGlobal();
+ unsigned TargetFlags = MO.getTargetFlags();
+ const Triple &TheTriple = Printer.TM.getTargetTriple();
+ if (!TheTriple.isOSBinFormatCOFF())
+ return Printer.getSymbol(GV);
+
+ assert(TheTriple.isOSWindows() &&
+ "Windows is the only supported COFF target");
+
+ bool IsIndirect = (TargetFlags & AArch64II::MO_DLLIMPORT);
+ if (!IsIndirect)
+ return Printer.getSymbol(GV);
+
+ SmallString<128> Name;
+ Name = "__imp_";
+ Printer.TM.getNameWithPrefix(Name, GV,
+ Printer.getObjFileLowering().getMangler());
+
+ return Ctx.getOrCreateSymbol(Name);
}
MCSymbol *
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index f0bffe544158..9f354c009461 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -23,6 +23,8 @@
namespace llvm {
+class MachineInstr;
+
/// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and
/// contains private AArch64-specific information for each MachineFunction.
class AArch64FunctionInfo final : public MachineFunctionInfo {
@@ -145,7 +147,7 @@ public:
unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; }
void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; }
- typedef SmallPtrSet<const MachineInstr *, 16> SetOfInstructions;
+ using SetOfInstructions = SmallPtrSet<const MachineInstr *, 16>;
const SetOfInstructions &getLOHRelated() const { return LOHRelated; }
@@ -157,7 +159,7 @@ public:
SmallVector<const MachineInstr *, 3> Args;
public:
- typedef ArrayRef<const MachineInstr *> LOHArgs;
+ using LOHArgs = ArrayRef<const MachineInstr *>;
MILOHDirective(MCLOHType Kind, LOHArgs Args)
: Kind(Kind), Args(Args.begin(), Args.end()) {
@@ -168,8 +170,8 @@ public:
LOHArgs getArgs() const { return Args; }
};
- typedef MILOHDirective::LOHArgs MILOHArgs;
- typedef SmallVector<MILOHDirective, 32> MILOHContainer;
+ using MILOHArgs = MILOHDirective::LOHArgs;
+ using MILOHContainer = SmallVector<MILOHDirective, 32>;
const MILOHContainer &getLOHContainer() const { return LOHContainerSet; }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index 963cfadc54fd..6930c816b5ae 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -12,10 +12,9 @@
//
//===----------------------------------------------------------------------===//
-#include "AArch64MacroFusion.h"
#include "AArch64Subtarget.h"
#include "llvm/CodeGen/MacroFusion.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
using namespace llvm;
@@ -33,8 +32,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
// Assume wildcards for unspecified instrs.
unsigned FirstOpcode =
- FirstMI ? FirstMI->getOpcode()
- : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
+ FirstMI ? FirstMI->getOpcode()
+ : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
unsigned SecondOpcode = SecondMI.getOpcode();
if (ST.hasArithmeticBccFusion())
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index fe4ef4b40ece..ee6703aed1e2 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -20,7 +20,7 @@
#include "AArch64PBQPRegAlloc.h"
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -247,13 +247,13 @@ void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
// Do some Chain management
if (Chains.count(Ra)) {
if (Rd != Ra) {
- DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, TRI) << " to "
- << PrintReg(Rd, TRI) << '\n';);
+ DEBUG(dbgs() << "Moving acc chain from " << printReg(Ra, TRI) << " to "
+ << printReg(Rd, TRI) << '\n';);
Chains.remove(Ra);
Chains.insert(Rd);
}
} else {
- DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, TRI)
+ DEBUG(dbgs() << "Creating new acc chain for " << printReg(Rd, TRI)
<< '\n';);
Chains.insert(Rd);
}
@@ -340,7 +340,7 @@ void A57ChainingConstraint::apply(PBQPRAGraph &G) {
for (auto r : Chains) {
SmallVector<unsigned, 8> toDel;
if(regJustKilledBefore(LIs, r, MI)) {
- DEBUG(dbgs() << "Killing chain " << PrintReg(r, TRI) << " at ";
+ DEBUG(dbgs() << "Killing chain " << printReg(r, TRI) << " at ";
MI.print(dbgs()););
toDel.push_back(r);
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp b/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
index 8693f76d7c32..a8dc6e74ef6a 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -1,4 +1,4 @@
-//=- AArch64PromoteConstant.cpp --- Promote constant to global for AArch64 -==//
+//==- AArch64PromoteConstant.cpp - Promote constant to global for AArch64 --==//
//
// The LLVM Compiler Infrastructure
//
@@ -22,23 +22,31 @@
#include "AArch64.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <utility>
using namespace llvm;
@@ -56,6 +64,7 @@ STATISTIC(NumPromotedUses, "Number of promoted constants uses");
//===----------------------------------------------------------------------===//
namespace {
+
/// Promotes interesting constant into global variables.
/// The motivating example is:
/// static const uint16_t TableA[32] = {
@@ -83,13 +92,12 @@ namespace {
/// Therefore the final assembly final has 4 different loads. With this pass
/// enabled, only one load is issued for the constants.
class AArch64PromoteConstant : public ModulePass {
-
public:
struct PromotedConstant {
bool ShouldConvert = false;
GlobalVariable *GV = nullptr;
};
- typedef SmallDenseMap<Constant *, PromotedConstant, 16> PromotionCacheTy;
+ using PromotionCacheTy = SmallDenseMap<Constant *, PromotedConstant, 16>;
struct UpdateRecord {
Constant *C;
@@ -101,6 +109,7 @@ public:
};
static char ID;
+
AArch64PromoteConstant() : ModulePass(ID) {
initializeAArch64PromoteConstantPass(*PassRegistry::getPassRegistry());
}
@@ -135,9 +144,9 @@ private:
}
/// Type to store a list of Uses.
- typedef SmallVector<std::pair<Instruction *, unsigned>, 4> Uses;
+ using Uses = SmallVector<std::pair<Instruction *, unsigned>, 4>;
/// Map an insertion point to all the uses it dominates.
- typedef DenseMap<Instruction *, Uses> InsertionPoints;
+ using InsertionPoints = DenseMap<Instruction *, Uses>;
/// Find the closest point that dominates the given Use.
Instruction *findInsertionPoint(Instruction &User, unsigned OpNo);
@@ -212,6 +221,7 @@ private:
InsertPts.erase(OldInstr);
}
};
+
} // end anonymous namespace
char AArch64PromoteConstant::ID = 0;
@@ -357,7 +367,6 @@ Instruction *AArch64PromoteConstant::findInsertionPoint(Instruction &User,
bool AArch64PromoteConstant::isDominated(Instruction *NewPt, Instruction *User,
unsigned OpNo,
InsertionPoints &InsertPts) {
-
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
*NewPt->getParent()->getParent()).getDomTree();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index 22c11c7276d2..e5822b114324 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -5,27 +5,51 @@
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-// This pass removes unnecessary zero copies in BBs that are targets of
-// cbz/cbnz instructions. For instance, the copy instruction in the code below
-// can be removed because the CBZW jumps to BB#2 when W0 is zero.
-// BB#1:
-// CBZW %W0, <BB#2>
-// BB#2:
-// %W0 = COPY %WZR
-// Similarly, this pass also handles non-zero copies.
-// BB#0:
-// cmp x0, #1
+// This pass removes unnecessary copies/moves in BBs based on a dominating
+// condition.
+//
+// We handle three cases:
+// 1. For BBs that are targets of CBZ/CBNZ instructions, we know the value of
+// the CBZ/CBNZ source register is zero on the taken/not-taken path. For
+// instance, the copy instruction in the code below can be removed because
+// the CBZW jumps to %bb.2 when w0 is zero.
+//
+// %bb.1:
+// cbz w0, .LBB0_2
+// .LBB0_2:
+// mov w0, wzr ; <-- redundant
+//
+// 2. If the flag setting instruction defines a register other than WZR/XZR, we
+// can remove a zero copy in some cases.
+//
+// %bb.0:
+// subs w0, w1, w2
+// str w0, [x1]
+// b.ne .LBB0_2
+// %bb.1:
+// mov w0, wzr ; <-- redundant
+// str w0, [x2]
+// .LBB0_2
+//
+// 3. Finally, if the flag setting instruction is a comparison against a
+// constant (i.e., ADDS[W|X]ri, SUBS[W|X]ri), we can remove a mov immediate
+// in some cases.
+//
+// %bb.0:
+// subs xzr, x0, #1
// b.eq .LBB0_1
// .LBB0_1:
-// orr x0, xzr, #0x1
+// orr x0, xzr, #0x1 ; <-- redundant
//
// This pass should be run after register allocation.
//
// FIXME: This could also be extended to check the whole dominance subtree below
// the comparison if the compile time regression is acceptable.
//
+// FIXME: Add support for handling CCMP instructions.
+// FIXME: If the known register value is zero, we should be able to rewrite uses
+// to use WZR/XZR directly in some cases.
//===----------------------------------------------------------------------===//
-
#include "AArch64.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
@@ -45,7 +69,13 @@ namespace {
class AArch64RedundantCopyElimination : public MachineFunctionPass {
const MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
- BitVector ClobberedRegs;
+
+ // DomBBClobberedRegs is used when computing known values in the dominating
+ // BB.
+ BitVector DomBBClobberedRegs;
+
+ // OptBBClobberedRegs is used when optimizing away redundant copies/moves.
+ BitVector OptBBClobberedRegs;
public:
static char ID;
@@ -60,10 +90,10 @@ public:
RegImm(MCPhysReg Reg, int32_t Imm) : Reg(Reg), Imm(Imm) {}
};
- Optional<RegImm> knownRegValInBlock(MachineInstr &CondBr,
- MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &FirstUse);
- bool optimizeCopy(MachineBasicBlock *MBB);
+ bool knownRegValInBlock(MachineInstr &CondBr, MachineBasicBlock *MBB,
+ SmallVectorImpl<RegImm> &KnownRegs,
+ MachineBasicBlock::iterator &FirstUse);
+ bool optimizeBlock(MachineBasicBlock *MBB);
bool runOnMachineFunction(MachineFunction &MF) override;
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
@@ -103,13 +133,19 @@ static void trackRegDefs(const MachineInstr &MI, BitVector &ClobberedRegs,
/// It's possible to determine the value of a register based on a dominating
/// condition. To do so, this function checks to see if the basic block \p MBB
-/// is the target to which a conditional branch \p CondBr jumps and whose
-/// equality comparison is against a constant. If so, return a known physical
-/// register and constant value pair. Otherwise, return None.
-Optional<AArch64RedundantCopyElimination::RegImm>
-AArch64RedundantCopyElimination::knownRegValInBlock(
+/// is the target of a conditional branch \p CondBr with an equality comparison.
+/// If the branch is a CBZ/CBNZ, we know the value of its source operand is zero
+/// in \p MBB for some cases. Otherwise, we find and inspect the NZCV setting
+/// instruction (e.g., SUBS, ADDS). If this instruction defines a register
+/// other than WZR/XZR, we know the value of the destination register is zero in
+/// \p MMB for some cases. In addition, if the NZCV setting instruction is
+/// comparing against a constant we know the other source register is equal to
+/// the constant in \p MBB for some cases. If we find any constant values, push
+/// a physical register and constant value pair onto the KnownRegs vector and
+/// return true. Otherwise, return false if no known values were found.
+bool AArch64RedundantCopyElimination::knownRegValInBlock(
MachineInstr &CondBr, MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &FirstUse) {
+ SmallVectorImpl<RegImm> &KnownRegs, MachineBasicBlock::iterator &FirstUse) {
unsigned Opc = CondBr.getOpcode();
// Check if the current basic block is the target block to which the
@@ -119,41 +155,39 @@ AArch64RedundantCopyElimination::knownRegValInBlock(
((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) &&
MBB != CondBr.getOperand(1).getMBB())) {
FirstUse = CondBr;
- return RegImm(CondBr.getOperand(0).getReg(), 0);
+ KnownRegs.push_back(RegImm(CondBr.getOperand(0).getReg(), 0));
+ return true;
}
// Otherwise, must be a conditional branch.
if (Opc != AArch64::Bcc)
- return None;
+ return false;
// Must be an equality check (i.e., == or !=).
AArch64CC::CondCode CC = (AArch64CC::CondCode)CondBr.getOperand(0).getImm();
if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
- return None;
+ return false;
MachineBasicBlock *BrTarget = CondBr.getOperand(1).getMBB();
if ((CC == AArch64CC::EQ && BrTarget != MBB) ||
(CC == AArch64CC::NE && BrTarget == MBB))
- return None;
+ return false;
// Stop if we get to the beginning of PredMBB.
MachineBasicBlock *PredMBB = *MBB->pred_begin();
assert(PredMBB == CondBr.getParent() &&
"Conditional branch not in predecessor block!");
if (CondBr == PredMBB->begin())
- return None;
+ return false;
// Registers clobbered in PredMBB between CondBr instruction and current
// instruction being checked in loop.
- ClobberedRegs.reset();
+ DomBBClobberedRegs.reset();
// Find compare instruction that sets NZCV used by CondBr.
MachineBasicBlock::reverse_iterator RIt = CondBr.getReverseIterator();
for (MachineInstr &PredI : make_range(std::next(RIt), PredMBB->rend())) {
- // Track clobbered registers.
- trackRegDefs(PredI, ClobberedRegs, TRI);
-
bool IsCMN = false;
switch (PredI.getOpcode()) {
default:
@@ -169,37 +203,100 @@ AArch64RedundantCopyElimination::knownRegValInBlock(
case AArch64::SUBSXri: {
// Sometimes the first operand is a FrameIndex. Bail if tht happens.
if (!PredI.getOperand(1).isReg())
- return None;
+ return false;
+ MCPhysReg DstReg = PredI.getOperand(0).getReg();
MCPhysReg SrcReg = PredI.getOperand(1).getReg();
- // Must not be a symbolic immediate.
- if (!PredI.getOperand(2).isImm())
- return None;
-
- // The src register must not be modified between the cmp and conditional
- // branch. This includes a self-clobbering compare.
- if (ClobberedRegs[SrcReg])
- return None;
-
- // We've found the Cmp that sets NZCV.
- int32_t KnownImm = PredI.getOperand(2).getImm();
- int32_t Shift = PredI.getOperand(3).getImm();
- KnownImm <<= Shift;
- if (IsCMN)
- KnownImm = -KnownImm;
+ bool Res = false;
+ // If we're comparing against a non-symbolic immediate and the source
+ // register of the compare is not modified (including a self-clobbering
+ // compare) between the compare and conditional branch we known the value
+ // of the 1st source operand.
+ if (PredI.getOperand(2).isImm() && !DomBBClobberedRegs[SrcReg] &&
+ SrcReg != DstReg) {
+ // We've found the instruction that sets NZCV.
+ int32_t KnownImm = PredI.getOperand(2).getImm();
+ int32_t Shift = PredI.getOperand(3).getImm();
+ KnownImm <<= Shift;
+ if (IsCMN)
+ KnownImm = -KnownImm;
+ FirstUse = PredI;
+ KnownRegs.push_back(RegImm(SrcReg, KnownImm));
+ Res = true;
+ }
+
+ // If this instructions defines something other than WZR/XZR, we know it's
+ // result is zero in some cases.
+ if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
+ return Res;
+
+ // The destination register must not be modified between the NZCV setting
+ // instruction and the conditional branch.
+ if (DomBBClobberedRegs[DstReg])
+ return Res;
+
FirstUse = PredI;
- return RegImm(SrcReg, KnownImm);
+ KnownRegs.push_back(RegImm(DstReg, 0));
+ return true;
+ }
+
+ // Look for NZCV setting instructions that define something other than
+ // WZR/XZR.
+ case AArch64::ADCSWr:
+ case AArch64::ADCSXr:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSWrx:
+ case AArch64::ADDSXrr:
+ case AArch64::ADDSXrs:
+ case AArch64::ADDSXrx:
+ case AArch64::ADDSXrx64:
+ case AArch64::ANDSWri:
+ case AArch64::ANDSWrr:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXri:
+ case AArch64::ANDSXrr:
+ case AArch64::ANDSXrs:
+ case AArch64::BICSWrr:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
+ case AArch64::BICSXrr:
+ case AArch64::SBCSWr:
+ case AArch64::SBCSXr:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSWrx:
+ case AArch64::SUBSXrr:
+ case AArch64::SUBSXrs:
+ case AArch64::SUBSXrx:
+ case AArch64::SUBSXrx64: {
+ MCPhysReg DstReg = PredI.getOperand(0).getReg();
+ if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
+ return false;
+
+ // The destination register of the NZCV setting instruction must not be
+ // modified before the conditional branch.
+ if (DomBBClobberedRegs[DstReg])
+ return false;
+
+ // We've found the instruction that sets NZCV whose DstReg == 0.
+ FirstUse = PredI;
+ KnownRegs.push_back(RegImm(DstReg, 0));
+ return true;
}
}
// Bail if we see an instruction that defines NZCV that we don't handle.
if (PredI.definesRegister(AArch64::NZCV))
- return None;
+ return false;
+
+ // Track clobbered registers.
+ trackRegDefs(PredI, DomBBClobberedRegs, TRI);
}
- return None;
+ return false;
}
-bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
+bool AArch64RedundantCopyElimination::optimizeBlock(MachineBasicBlock *MBB) {
// Check if the current basic block has a single predecessor.
if (MBB->pred_size() != 1)
return false;
@@ -230,14 +327,11 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
do {
--Itr;
- Optional<RegImm> KnownRegImm = knownRegValInBlock(*Itr, MBB, FirstUse);
- if (KnownRegImm == None)
+ if (!knownRegValInBlock(*Itr, MBB, KnownRegs, FirstUse))
continue;
- KnownRegs.push_back(*KnownRegImm);
-
- // Reset the clobber list, which is used by knownRegValInBlock.
- ClobberedRegs.reset();
+ // Reset the clobber list.
+ OptBBClobberedRegs.reset();
// Look backward in PredMBB for COPYs from the known reg to find other
// registers that are known to be a constant value.
@@ -249,11 +343,11 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
MCPhysReg CopyDstReg = PredI->getOperand(0).getReg();
MCPhysReg CopySrcReg = PredI->getOperand(1).getReg();
for (auto &KnownReg : KnownRegs) {
- if (ClobberedRegs[KnownReg.Reg])
+ if (OptBBClobberedRegs[KnownReg.Reg])
continue;
// If we have X = COPY Y, and Y is known to be zero, then now X is
// known to be zero.
- if (CopySrcReg == KnownReg.Reg && !ClobberedRegs[CopyDstReg]) {
+ if (CopySrcReg == KnownReg.Reg && !OptBBClobberedRegs[CopyDstReg]) {
KnownRegs.push_back(RegImm(CopyDstReg, KnownReg.Imm));
if (SeenFirstUse)
FirstUse = PredI;
@@ -261,7 +355,7 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
}
// If we have X = COPY Y, and X is known to be zero, then now Y is
// known to be zero.
- if (CopyDstReg == KnownReg.Reg && !ClobberedRegs[CopySrcReg]) {
+ if (CopyDstReg == KnownReg.Reg && !OptBBClobberedRegs[CopySrcReg]) {
KnownRegs.push_back(RegImm(CopySrcReg, KnownReg.Imm));
if (SeenFirstUse)
FirstUse = PredI;
@@ -274,10 +368,10 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
if (PredI == PredMBB->begin())
break;
- trackRegDefs(*PredI, ClobberedRegs, TRI);
+ trackRegDefs(*PredI, OptBBClobberedRegs, TRI);
// Stop if all of the known-zero regs have been clobbered.
if (all_of(KnownRegs, [&](RegImm KnownReg) {
- return ClobberedRegs[KnownReg.Reg];
+ return OptBBClobberedRegs[KnownReg.Reg];
}))
break;
}
@@ -293,7 +387,7 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
// UsedKnownRegs is the set of KnownRegs that have had uses added to MBB.
SmallSetVector<unsigned, 4> UsedKnownRegs;
MachineBasicBlock::iterator LastChange = MBB->begin();
- // Remove redundant Copy instructions unless KnownReg is modified.
+ // Remove redundant copy/move instructions unless KnownReg is modified.
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
MachineInstr *MI = &*I;
++I;
@@ -391,18 +485,19 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
bool AArch64RedundantCopyElimination::runOnMachineFunction(
MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
- // Resize the clobber register bitfield tracker. We do this once per
- // function and then clear the bitfield each time we optimize a copy.
- ClobberedRegs.resize(TRI->getNumRegs());
+ // Resize the clobber register bitfield trackers. We do this once per
+ // function.
+ DomBBClobberedRegs.resize(TRI->getNumRegs());
+ OptBBClobberedRegs.resize(TRI->getNumRegs());
bool Changed = false;
for (MachineBasicBlock &MBB : MF)
- Changed |= optimizeCopy(&MBB);
+ Changed |= optimizeBlock(&MBB);
return Changed;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 69124dbd0f83..c497669f937f 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -22,10 +22,10 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetOpcodes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
@@ -37,10 +37,6 @@
using namespace llvm;
-#ifndef LLVM_BUILD_GLOBAL_ISEL
-#error "You shouldn't build this"
-#endif
-
AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
: AArch64GenRegisterBankInfo() {
static bool AlreadyInit = false;
@@ -63,10 +59,9 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
assert(&AArch64::FPRRegBank == &RBFPR &&
"The order in RegBanks is messed up");
- const RegisterBank &RBCCR = getRegBank(AArch64::CCRRegBankID);
+ const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
(void)RBCCR;
- assert(&AArch64::CCRRegBank == &RBCCR &&
- "The order in RegBanks is messed up");
+ assert(&AArch64::CCRegBank == &RBCCR && "The order in RegBanks is messed up");
// The GPR register bank is fully defined by all the registers in
// GR64all + its subclasses.
@@ -92,9 +87,9 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
{PMI_GPR32, PMI_GPR64}) &&
"PartialMappingIdx's are incorrectly ordered");
- assert(checkPartialMappingIdx(
- PMI_FirstFPR, PMI_LastFPR,
- {PMI_FPR32, PMI_FPR64, PMI_FPR128, PMI_FPR256, PMI_FPR512}) &&
+ assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
+ {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
+ PMI_FPR256, PMI_FPR512}) &&
"PartialMappingIdx's are incorrectly ordered");
// Now, the content.
// Check partial mapping.
@@ -107,6 +102,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
+ CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
@@ -126,6 +122,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
CHECK_VALUEMAP(GPR, 32);
CHECK_VALUEMAP(GPR, 64);
+ CHECK_VALUEMAP(FPR, 16);
CHECK_VALUEMAP(FPR, 32);
CHECK_VALUEMAP(FPR, 64);
CHECK_VALUEMAP(FPR, 128);
@@ -178,6 +175,30 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
+#define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \
+ do { \
+ unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \
+ unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \
+ (void)PartialMapDstIdx; \
+ (void)PartialMapSrcIdx; \
+ const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \
+ (void)Map; \
+ assert(Map[0].BreakDown == \
+ &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
+ Map[0].NumBreakDowns == 1 && "FPR" #DstSize \
+ " Dst is incorrectly initialized"); \
+ assert(Map[1].BreakDown == \
+ &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
+ Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \
+ " Src is incorrectly initialized"); \
+ \
+ } while (false)
+
+ CHECK_VALUEMAP_FPEXT(32, 16);
+ CHECK_VALUEMAP_FPEXT(64, 16);
+ CHECK_VALUEMAP_FPEXT(64, 32);
+ CHECK_VALUEMAP_FPEXT(128, 64);
+
assert(verify(TRI) && "Invalid register bank information");
}
@@ -233,7 +254,7 @@ const RegisterBank &AArch64RegisterBankInfo::getRegBankFromRegClass(
case AArch64::XSeqPairsClassRegClassID:
return getRegBank(AArch64::GPRRegBankID);
case AArch64::CCRRegClassID:
- return getRegBank(AArch64::CCRRegBankID);
+ return getRegBank(AArch64::CCRegBankID);
default:
llvm_unreachable("Register class not supported");
}
@@ -419,18 +440,22 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
const RegisterBankInfo::InstructionMapping &
AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const unsigned Opc = MI.getOpcode();
- const MachineFunction &MF = *MI.getParent()->getParent();
- const MachineRegisterInfo &MRI = MF.getRegInfo();
// Try the default logic for non-generic instructions that are either copies
// or already have some operands assigned to banks.
- if (!isPreISelGenericOpcode(Opc)) {
+ if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
+ Opc == TargetOpcode::G_PHI) {
const RegisterBankInfo::InstructionMapping &Mapping =
getInstrMappingImpl(MI);
if (Mapping.isValid())
return Mapping;
}
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+
switch (Opc) {
// G_{F|S|U}REM are not listed because they are not legal.
// Arithmetic ops.
@@ -454,12 +479,47 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
return getSameKindOfOperandsMapping(MI);
+ case TargetOpcode::G_FPEXT: {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ return getInstructionMapping(
+ DefaultMappingID, /*Cost*/ 1,
+ getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
+ /*NumOperands*/ 2);
+ }
+ case TargetOpcode::COPY: {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ // Check if one of the register is not a generic register.
+ if ((TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ !MRI.getType(DstReg).isValid()) ||
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
+ !MRI.getType(SrcReg).isValid())) {
+ const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
+ const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
+ if (!DstRB)
+ DstRB = SrcRB;
+ else if (!SrcRB)
+ SrcRB = DstRB;
+ // If both RB are null that means both registers are generic.
+ // We shouldn't be here.
+ assert(DstRB && SrcRB && "Both RegBank were nullptr");
+ unsigned Size = getSizeInBits(DstReg, MRI, TRI);
+ return getInstructionMapping(
+ DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
+ getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
+ // We only care about the mapping of the destination.
+ /*NumOperands*/ 1);
+ }
+ // Both registers are generic, use G_BITCAST.
+ LLVM_FALLTHROUGH;
+ }
case TargetOpcode::G_BITCAST: {
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
unsigned Size = DstTy.getSizeInBits();
- bool DstIsGPR = !DstTy.isVector();
- bool SrcIsGPR = !SrcTy.isVector();
+ bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
+ bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
const RegisterBank &DstRB =
DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
const RegisterBank &SrcRB =
@@ -467,7 +527,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getInstructionMapping(
DefaultMappingID, copyCost(DstRB, SrcRB, Size),
getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
- /*NumOperands*/ 2);
+ // We only care about the mapping of the destination for COPY.
+ /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
}
default:
break;
@@ -488,7 +549,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
// For floating-point instructions, scalars go in FPRs.
- if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc))
+ if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) ||
+ Ty.getSizeInBits() > 64)
OpRegBankIdx[Idx] = PMI_FirstFPR;
else
OpRegBankIdx[Idx] = PMI_FirstGPR;
@@ -532,15 +594,24 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// In that case, we want the default mapping to be on FPR
// instead of blind map every scalar to GPR.
for (const MachineInstr &UseMI :
- MRI.use_instructions(MI.getOperand(0).getReg()))
+ MRI.use_instructions(MI.getOperand(0).getReg())) {
// If we have at least one direct use in a FP instruction,
// assume this was a floating point load in the IR.
// If it was not, we would have had a bitcast before
// reaching that instruction.
- if (isPreISelGenericFloatingPointOpcode(UseMI.getOpcode())) {
+ unsigned UseOpc = UseMI.getOpcode();
+ if (isPreISelGenericFloatingPointOpcode(UseOpc) ||
+ // Check if we feed a copy-like instruction with
+ // floating point constraints. In that case, we are still
+ // feeding fp instructions, but indirectly
+ // (e.g., through ABI copies).
+ ((UseOpc == TargetOpcode::COPY || UseMI.isPHI()) &&
+ getRegBank(UseMI.getOperand(0).getReg(), MRI, TRI) ==
+ &AArch64::FPRRegBank)) {
OpRegBankIdx[0] = PMI_FirstFPR;
break;
}
+ }
break;
case TargetOpcode::G_STORE:
// Check if that store is fed by fp instructions.
@@ -549,7 +620,15 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
if (!VReg)
break;
MachineInstr *DefMI = MRI.getVRegDef(VReg);
- if (isPreISelGenericFloatingPointOpcode(DefMI->getOpcode()))
+ unsigned DefOpc = DefMI->getOpcode();
+ if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
+ // Check if we come from a copy-like instruction with
+ // floating point constraints. In that case, we are still
+ // fed by fp instructions, but indirectly
+ // (e.g., through ABI copies).
+ ((DefOpc == TargetOpcode::COPY || DefMI->isPHI()) &&
+ getRegBank(DefMI->getOperand(0).getReg(), MRI, TRI) ==
+ &AArch64::FPRRegBank))
OpRegBankIdx[0] = PMI_FirstFPR;
break;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
index 6d74a47095a9..008221dbef58 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
@@ -25,10 +25,10 @@ class TargetRegisterInfo;
class AArch64GenRegisterBankInfo : public RegisterBankInfo {
protected:
-
enum PartialMappingIdx {
PMI_None = -1,
- PMI_FPR32 = 1,
+ PMI_FPR16 = 1,
+ PMI_FPR32,
PMI_FPR64,
PMI_FPR128,
PMI_FPR256,
@@ -37,7 +37,7 @@ protected:
PMI_GPR64,
PMI_FirstGPR = PMI_GPR32,
PMI_LastGPR = PMI_GPR64,
- PMI_FirstFPR = PMI_FPR32,
+ PMI_FirstFPR = PMI_FPR16,
PMI_LastFPR = PMI_FPR512,
PMI_Min = PMI_FirstFPR,
};
@@ -49,11 +49,15 @@ protected:
enum ValueMappingIdx {
InvalidIdx = 0,
First3OpsIdx = 1,
- Last3OpsIdx = 19,
+ Last3OpsIdx = 22,
DistanceBetweenRegBanks = 3,
- FirstCrossRegCpyIdx = 22,
- LastCrossRegCpyIdx = 34,
- DistanceBetweenCrossRegCpy = 2
+ FirstCrossRegCpyIdx = 25,
+ LastCrossRegCpyIdx = 39,
+ DistanceBetweenCrossRegCpy = 2,
+ FPExt16To32Idx = 41,
+ FPExt16To64Idx = 43,
+ FPExt32To64Idx = 45,
+ FPExt64To128Idx = 47,
};
static bool checkPartialMap(unsigned Idx, unsigned ValStartIdx,
@@ -82,6 +86,15 @@ protected:
static const RegisterBankInfo::ValueMapping *
getCopyMapping(unsigned DstBankID, unsigned SrcBankID, unsigned Size);
+ /// Get the instruction mapping for G_FPEXT.
+ ///
+ /// \pre (DstSize, SrcSize) pair is one of the following:
+ /// (32, 16), (64, 16), (64, 32), (128, 64)
+ ///
+ /// \return An InstructionMapping with statically allocated OperandsMapping.
+ static const RegisterBankInfo::ValueMapping *
+ getFPExtMapping(unsigned DstSize, unsigned SrcSize);
+
#define GET_TARGET_REGBANK_CLASS
#include "AArch64GenRegisterBank.inc"
};
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBanks.td b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBanks.td
index c2b6c0b04e9b..eee584708f69 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBanks.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBanks.td
@@ -17,4 +17,4 @@ def GPRRegBank : RegisterBank<"GPR", [GPR64all]>;
def FPRRegBank : RegisterBank<"FPR", [QQQQ]>;
/// Conditional register: NZCV.
-def CCRRegBank : RegisterBank<"CCR", [CCR]>;
+def CCRegBank : RegisterBank<"CC", [CCR]>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 9f7dcb3fe1c3..88dd297e0079 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -26,7 +26,7 @@
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -35,27 +35,29 @@ using namespace llvm;
#include "AArch64GenRegisterInfo.inc"
AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
- : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {}
+ : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {
+ AArch64_MC::initLLVMToCVRegMapping(this);
+}
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
- if (MF->getFunction()->getCallingConv() == CallingConv::GHC)
+ if (MF->getFunction().getCallingConv() == CallingConv::GHC)
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
return CSR_AArch64_NoRegs_SaveList;
- if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
+ if (MF->getFunction().getCallingConv() == CallingConv::AnyReg)
return CSR_AArch64_AllRegs_SaveList;
- if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS)
+ if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
CSR_AArch64_CXX_TLS_Darwin_SaveList;
if (MF->getSubtarget<AArch64Subtarget>().getTargetLowering()
->supportSwiftError() &&
- MF->getFunction()->getAttributes().hasAttrSomewhere(
+ MF->getFunction().getAttributes().hasAttrSomewhere(
Attribute::SwiftError))
return CSR_AArch64_AAPCS_SwiftError_SaveList;
- if (MF->getFunction()->getCallingConv() == CallingConv::PreserveMost)
+ if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
return CSR_AArch64_RT_MostRegs_SaveList;
else
return CSR_AArch64_AAPCS_SaveList;
@@ -64,7 +66,7 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
- if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
MF->getInfo<AArch64FunctionInfo>()->isSplitCSR())
return CSR_AArch64_CXX_TLS_Darwin_ViaCopy_SaveList;
return nullptr;
@@ -82,7 +84,7 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return CSR_AArch64_CXX_TLS_Darwin_RegMask;
if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()
->supportSwiftError() &&
- MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
return CSR_AArch64_AAPCS_SwiftError_RegMask;
if (CC == CallingConv::PreserveMost)
return CSR_AArch64_RT_MostRegs_RegMask;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 7e29ee5e9baf..39e3e33b0d27 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -32,6 +32,12 @@ let Namespace = "AArch64" in {
def qsub : SubRegIndex<64>;
def sube64 : SubRegIndex<64>;
def subo64 : SubRegIndex<64>;
+ // SVE
+ def zsub : SubRegIndex<128>;
+ // Note: zsub_hi should never be used directly because it represents
+ // the scalable part of the SVE vector and cannot be manipulated as a
+ // subvector in the same way the lower 128bits can.
+ def zsub_hi : SubRegIndex<128>;
// Note: Code depends on these having consecutive numbers
def dsub0 : SubRegIndex<64>;
def dsub1 : SubRegIndex<64>;
@@ -169,6 +175,15 @@ def GPR64sp0 : RegisterOperand<GPR64sp> {
let ParserMatchClass = GPR64spPlus0Operand;
}
+// GPR32/GPR64 but with zero-register substitution enabled.
+// TODO: Roll this out to GPR32/GPR64/GPR32all/GPR64all.
+def GPR32z : RegisterOperand<GPR32> {
+ let GIZeroRegister = WZR;
+}
+def GPR64z : RegisterOperand<GPR64> {
+ let GIZeroRegister = XZR;
+}
+
// GPR register classes which include WZR/XZR AND SP/WSP. This is not a
// constraint used by any instructions, it is used as a common super-class.
def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>;
@@ -451,11 +466,11 @@ def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> {
// assmebler matching.
def VectorReg64AsmOperand : AsmOperandClass {
let Name = "VectorReg64";
- let PredicateMethod = "isVectorReg";
+ let PredicateMethod = "isNeonVectorReg";
}
def VectorReg128AsmOperand : AsmOperandClass {
let Name = "VectorReg128";
- let PredicateMethod = "isVectorReg";
+ let PredicateMethod = "isNeonVectorReg";
}
def V64 : RegisterOperand<FPR64, "printVRegOperand"> {
@@ -466,7 +481,10 @@ def V128 : RegisterOperand<FPR128, "printVRegOperand"> {
let ParserMatchClass = VectorReg128AsmOperand;
}
-def VectorRegLoAsmOperand : AsmOperandClass { let Name = "VectorRegLo"; }
+def VectorRegLoAsmOperand : AsmOperandClass {
+ let Name = "VectorRegLo";
+ let PredicateMethod = "isNeonVectorRegLo";
+}
def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> {
let ParserMatchClass = VectorRegLoAsmOperand;
}
@@ -633,3 +651,170 @@ def XSeqPairClassOperand :
//===----- END: v8.1a atomic CASP register operands -----------------------===//
+
+// SVE predicate registers
+def P0 : AArch64Reg<0, "p0">, DwarfRegNum<[48]>;
+def P1 : AArch64Reg<1, "p1">, DwarfRegNum<[49]>;
+def P2 : AArch64Reg<2, "p2">, DwarfRegNum<[50]>;
+def P3 : AArch64Reg<3, "p3">, DwarfRegNum<[51]>;
+def P4 : AArch64Reg<4, "p4">, DwarfRegNum<[52]>;
+def P5 : AArch64Reg<5, "p5">, DwarfRegNum<[53]>;
+def P6 : AArch64Reg<6, "p6">, DwarfRegNum<[54]>;
+def P7 : AArch64Reg<7, "p7">, DwarfRegNum<[55]>;
+def P8 : AArch64Reg<8, "p8">, DwarfRegNum<[56]>;
+def P9 : AArch64Reg<9, "p9">, DwarfRegNum<[57]>;
+def P10 : AArch64Reg<10, "p10">, DwarfRegNum<[58]>;
+def P11 : AArch64Reg<11, "p11">, DwarfRegNum<[59]>;
+def P12 : AArch64Reg<12, "p12">, DwarfRegNum<[60]>;
+def P13 : AArch64Reg<13, "p13">, DwarfRegNum<[61]>;
+def P14 : AArch64Reg<14, "p14">, DwarfRegNum<[62]>;
+def P15 : AArch64Reg<15, "p15">, DwarfRegNum<[63]>;
+
+// The part of SVE registers that don't overlap Neon registers.
+// These are only used as part of clobber lists.
+def Z0_HI : AArch64Reg<0, "z0_hi">;
+def Z1_HI : AArch64Reg<1, "z1_hi">;
+def Z2_HI : AArch64Reg<2, "z2_hi">;
+def Z3_HI : AArch64Reg<3, "z3_hi">;
+def Z4_HI : AArch64Reg<4, "z4_hi">;
+def Z5_HI : AArch64Reg<5, "z5_hi">;
+def Z6_HI : AArch64Reg<6, "z6_hi">;
+def Z7_HI : AArch64Reg<7, "z7_hi">;
+def Z8_HI : AArch64Reg<8, "z8_hi">;
+def Z9_HI : AArch64Reg<9, "z9_hi">;
+def Z10_HI : AArch64Reg<10, "z10_hi">;
+def Z11_HI : AArch64Reg<11, "z11_hi">;
+def Z12_HI : AArch64Reg<12, "z12_hi">;
+def Z13_HI : AArch64Reg<13, "z13_hi">;
+def Z14_HI : AArch64Reg<14, "z14_hi">;
+def Z15_HI : AArch64Reg<15, "z15_hi">;
+def Z16_HI : AArch64Reg<16, "z16_hi">;
+def Z17_HI : AArch64Reg<17, "z17_hi">;
+def Z18_HI : AArch64Reg<18, "z18_hi">;
+def Z19_HI : AArch64Reg<19, "z19_hi">;
+def Z20_HI : AArch64Reg<20, "z20_hi">;
+def Z21_HI : AArch64Reg<21, "z21_hi">;
+def Z22_HI : AArch64Reg<22, "z22_hi">;
+def Z23_HI : AArch64Reg<23, "z23_hi">;
+def Z24_HI : AArch64Reg<24, "z24_hi">;
+def Z25_HI : AArch64Reg<25, "z25_hi">;
+def Z26_HI : AArch64Reg<26, "z26_hi">;
+def Z27_HI : AArch64Reg<27, "z27_hi">;
+def Z28_HI : AArch64Reg<28, "z28_hi">;
+def Z29_HI : AArch64Reg<29, "z29_hi">;
+def Z30_HI : AArch64Reg<30, "z30_hi">;
+def Z31_HI : AArch64Reg<31, "z31_hi">;
+
+// SVE variable-size vector registers
+let SubRegIndices = [zsub,zsub_hi] in {
+def Z0 : AArch64Reg<0, "z0", [Q0, Z0_HI]>, DwarfRegNum<[96]>;
+def Z1 : AArch64Reg<1, "z1", [Q1, Z1_HI]>, DwarfRegNum<[97]>;
+def Z2 : AArch64Reg<2, "z2", [Q2, Z2_HI]>, DwarfRegNum<[98]>;
+def Z3 : AArch64Reg<3, "z3", [Q3, Z3_HI]>, DwarfRegNum<[99]>;
+def Z4 : AArch64Reg<4, "z4", [Q4, Z4_HI]>, DwarfRegNum<[100]>;
+def Z5 : AArch64Reg<5, "z5", [Q5, Z5_HI]>, DwarfRegNum<[101]>;
+def Z6 : AArch64Reg<6, "z6", [Q6, Z6_HI]>, DwarfRegNum<[102]>;
+def Z7 : AArch64Reg<7, "z7", [Q7, Z7_HI]>, DwarfRegNum<[103]>;
+def Z8 : AArch64Reg<8, "z8", [Q8, Z8_HI]>, DwarfRegNum<[104]>;
+def Z9 : AArch64Reg<9, "z9", [Q9, Z9_HI]>, DwarfRegNum<[105]>;
+def Z10 : AArch64Reg<10, "z10", [Q10, Z10_HI]>, DwarfRegNum<[106]>;
+def Z11 : AArch64Reg<11, "z11", [Q11, Z11_HI]>, DwarfRegNum<[107]>;
+def Z12 : AArch64Reg<12, "z12", [Q12, Z12_HI]>, DwarfRegNum<[108]>;
+def Z13 : AArch64Reg<13, "z13", [Q13, Z13_HI]>, DwarfRegNum<[109]>;
+def Z14 : AArch64Reg<14, "z14", [Q14, Z14_HI]>, DwarfRegNum<[110]>;
+def Z15 : AArch64Reg<15, "z15", [Q15, Z15_HI]>, DwarfRegNum<[111]>;
+def Z16 : AArch64Reg<16, "z16", [Q16, Z16_HI]>, DwarfRegNum<[112]>;
+def Z17 : AArch64Reg<17, "z17", [Q17, Z17_HI]>, DwarfRegNum<[113]>;
+def Z18 : AArch64Reg<18, "z18", [Q18, Z18_HI]>, DwarfRegNum<[114]>;
+def Z19 : AArch64Reg<19, "z19", [Q19, Z19_HI]>, DwarfRegNum<[115]>;
+def Z20 : AArch64Reg<20, "z20", [Q20, Z20_HI]>, DwarfRegNum<[116]>;
+def Z21 : AArch64Reg<21, "z21", [Q21, Z21_HI]>, DwarfRegNum<[117]>;
+def Z22 : AArch64Reg<22, "z22", [Q22, Z22_HI]>, DwarfRegNum<[118]>;
+def Z23 : AArch64Reg<23, "z23", [Q23, Z23_HI]>, DwarfRegNum<[119]>;
+def Z24 : AArch64Reg<24, "z24", [Q24, Z24_HI]>, DwarfRegNum<[120]>;
+def Z25 : AArch64Reg<25, "z25", [Q25, Z25_HI]>, DwarfRegNum<[121]>;
+def Z26 : AArch64Reg<26, "z26", [Q26, Z26_HI]>, DwarfRegNum<[122]>;
+def Z27 : AArch64Reg<27, "z27", [Q27, Z27_HI]>, DwarfRegNum<[123]>;
+def Z28 : AArch64Reg<28, "z28", [Q28, Z28_HI]>, DwarfRegNum<[124]>;
+def Z29 : AArch64Reg<29, "z29", [Q29, Z29_HI]>, DwarfRegNum<[125]>;
+def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>;
+def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>;
+}
+
+class SVERegOp <string Suffix, AsmOperandClass C,
+ RegisterClass RC> : RegisterOperand<RC> {
+ let PrintMethod = !if(!eq(Suffix, ""),
+ "printSVERegOp<>",
+ "printSVERegOp<'" # Suffix # "'>");
+ let ParserMatchClass = C;
+}
+
+class PPRRegOp <string Suffix, AsmOperandClass C,
+ RegisterClass RC> : SVERegOp<Suffix, C, RC> {}
+class ZPRRegOp <string Suffix, AsmOperandClass C,
+ RegisterClass RC> : SVERegOp<Suffix, C, RC> {}
+
+//******************************************************************************
+
+// SVE predicate register class.
+def PPR : RegisterClass<"AArch64",
+ [nxv16i1, nxv8i1, nxv4i1, nxv2i1],
+ 16, (sequence "P%u", 0, 15)> {
+ let Size = 16;
+}
+
+class PPRAsmOperand <string name, int Width>: AsmOperandClass {
+ let Name = "SVE" # name # "Reg";
+ let PredicateMethod = "isSVEVectorRegOfWidth<"
+ # Width # ", AArch64::PPRRegClassID>";
+ let DiagnosticType = "InvalidSVE" # name # "Reg";
+ let RenderMethod = "addRegOperands";
+ let ParserMethod = "tryParseSVEPredicateVector";
+}
+
+def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", -1>;
+def PPRAsmOp8 : PPRAsmOperand<"PredicateB", 8>;
+def PPRAsmOp16 : PPRAsmOperand<"PredicateH", 16>;
+def PPRAsmOp32 : PPRAsmOperand<"PredicateS", 32>;
+def PPRAsmOp64 : PPRAsmOperand<"PredicateD", 64>;
+
+def PPRAny : PPRRegOp<"", PPRAsmOpAny, PPR>;
+def PPR8 : PPRRegOp<"b", PPRAsmOp8, PPR>;
+def PPR16 : PPRRegOp<"h", PPRAsmOp16, PPR>;
+def PPR32 : PPRRegOp<"s", PPRAsmOp32, PPR>;
+def PPR64 : PPRRegOp<"d", PPRAsmOp64, PPR>;
+
+//******************************************************************************
+
+// SVE vector register class
+def ZPR : RegisterClass<"AArch64",
+ [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
+ nxv2f16, nxv4f16, nxv8f16,
+ nxv1f32, nxv2f32, nxv4f32,
+ nxv1f64, nxv2f64],
+ 128, (sequence "Z%u", 0, 31)> {
+ let Size = 128;
+}
+
+class ZPRAsmOperand <string name, int Width>: AsmOperandClass {
+ let Name = "SVE" # name # "Reg";
+ let PredicateMethod = "isSVEVectorRegOfWidth<"
+ # Width # ", AArch64::ZPRRegClassID>";
+ let RenderMethod = "addRegOperands";
+ let ParserMethod = "tryParseSVEDataVector<"
+ # !if(!eq(Width, -1), "false", "true") # ">";
+}
+
+def ZPRAsmOpAny : ZPRAsmOperand<"VectorAny", -1>;
+def ZPRAsmOp8 : ZPRAsmOperand<"VectorB", 8>;
+def ZPRAsmOp16 : ZPRAsmOperand<"VectorH", 16>;
+def ZPRAsmOp32 : ZPRAsmOperand<"VectorS", 32>;
+def ZPRAsmOp64 : ZPRAsmOperand<"VectorD", 64>;
+def ZPRAsmOp128 : ZPRAsmOperand<"VectorQ", 128>;
+
+def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ZPR>;
+def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ZPR>;
+def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ZPR>;
+def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ZPR>;
+def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ZPR>;
+def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ZPR>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/contrib/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
new file mode 100644
index 000000000000..e1851875abc5
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
@@ -0,0 +1,741 @@
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs optimization on SIMD instructions
+// with high latency by splitting them into more efficient series of
+// instructions.
+//
+// 1. Rewrite certain SIMD instructions with vector element due to their
+// inefficiency on some targets.
+//
+// For example:
+// fmla v0.4s, v1.4s, v2.s[1]
+//
+// Is rewritten into:
+// dup v3.4s, v2.s[1]
+// fmla v0.4s, v1.4s, v3.4s
+//
+// 2. Rewrite interleaved memory access instructions due to their
+// inefficiency on some targets.
+//
+// For example:
+// st2 {v0.4s, v1.4s}, addr
+//
+// Is rewritten into:
+// zip1 v2.4s, v0.4s, v1.4s
+// zip2 v3.4s, v0.4s, v1.4s
+// stp q2, q3, addr
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Pass.h"
+#include <unordered_map>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-simdinstr-opt"
+
+STATISTIC(NumModifiedInstr,
+ "Number of SIMD instructions modified");
+
+#define AARCH64_VECTOR_BY_ELEMENT_OPT_NAME \
+ "AArch64 SIMD instructions optimization pass"
+
+namespace {
+
+struct AArch64SIMDInstrOpt : public MachineFunctionPass {
+ static char ID;
+
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ TargetSchedModel SchedModel;
+
+ // The two maps below are used to cache decisions instead of recomputing:
+ // This is used to cache instruction replacement decisions within function
+ // units and across function units.
+ std::map<std::pair<unsigned, std::string>, bool> SIMDInstrTable;
+ // This is used to cache the decision of whether to leave the interleaved
+ // store instructions replacement pass early or not for a particular target.
+ std::unordered_map<std::string, bool> InterlEarlyExit;
+
+ typedef enum {
+ VectorElem,
+ Interleave
+ } Subpass;
+
+ // Instruction represented by OrigOpc is replaced by instructions in ReplOpc.
+ struct InstReplInfo {
+ unsigned OrigOpc;
+ std::vector<unsigned> ReplOpc;
+ const TargetRegisterClass RC;
+ };
+
+#define RuleST2(OpcOrg, OpcR0, OpcR1, OpcR2, RC) \
+ {OpcOrg, {OpcR0, OpcR1, OpcR2}, RC}
+#define RuleST4(OpcOrg, OpcR0, OpcR1, OpcR2, OpcR3, OpcR4, OpcR5, OpcR6, \
+ OpcR7, OpcR8, OpcR9, RC) \
+ {OpcOrg, \
+ {OpcR0, OpcR1, OpcR2, OpcR3, OpcR4, OpcR5, OpcR6, OpcR7, OpcR8, OpcR9}, RC}
+
+ // The Instruction Replacement Table:
+ std::vector<InstReplInfo> IRT = {
+ // ST2 instructions
+ RuleST2(AArch64::ST2Twov2d, AArch64::ZIP1v2i64, AArch64::ZIP2v2i64,
+ AArch64::STPQi, AArch64::FPR128RegClass),
+ RuleST2(AArch64::ST2Twov4s, AArch64::ZIP1v4i32, AArch64::ZIP2v4i32,
+ AArch64::STPQi, AArch64::FPR128RegClass),
+ RuleST2(AArch64::ST2Twov2s, AArch64::ZIP1v2i32, AArch64::ZIP2v2i32,
+ AArch64::STPDi, AArch64::FPR64RegClass),
+ RuleST2(AArch64::ST2Twov8h, AArch64::ZIP1v8i16, AArch64::ZIP2v8i16,
+ AArch64::STPQi, AArch64::FPR128RegClass),
+ RuleST2(AArch64::ST2Twov4h, AArch64::ZIP1v4i16, AArch64::ZIP2v4i16,
+ AArch64::STPDi, AArch64::FPR64RegClass),
+ RuleST2(AArch64::ST2Twov16b, AArch64::ZIP1v16i8, AArch64::ZIP2v16i8,
+ AArch64::STPQi, AArch64::FPR128RegClass),
+ RuleST2(AArch64::ST2Twov8b, AArch64::ZIP1v8i8, AArch64::ZIP2v8i8,
+ AArch64::STPDi, AArch64::FPR64RegClass),
+ // ST4 instructions
+ RuleST4(AArch64::ST4Fourv2d, AArch64::ZIP1v2i64, AArch64::ZIP2v2i64,
+ AArch64::ZIP1v2i64, AArch64::ZIP2v2i64, AArch64::ZIP1v2i64,
+ AArch64::ZIP2v2i64, AArch64::ZIP1v2i64, AArch64::ZIP2v2i64,
+ AArch64::STPQi, AArch64::STPQi, AArch64::FPR128RegClass),
+ RuleST4(AArch64::ST4Fourv4s, AArch64::ZIP1v4i32, AArch64::ZIP2v4i32,
+ AArch64::ZIP1v4i32, AArch64::ZIP2v4i32, AArch64::ZIP1v4i32,
+ AArch64::ZIP2v4i32, AArch64::ZIP1v4i32, AArch64::ZIP2v4i32,
+ AArch64::STPQi, AArch64::STPQi, AArch64::FPR128RegClass),
+ RuleST4(AArch64::ST4Fourv2s, AArch64::ZIP1v2i32, AArch64::ZIP2v2i32,
+ AArch64::ZIP1v2i32, AArch64::ZIP2v2i32, AArch64::ZIP1v2i32,
+ AArch64::ZIP2v2i32, AArch64::ZIP1v2i32, AArch64::ZIP2v2i32,
+ AArch64::STPDi, AArch64::STPDi, AArch64::FPR64RegClass),
+ RuleST4(AArch64::ST4Fourv8h, AArch64::ZIP1v8i16, AArch64::ZIP2v8i16,
+ AArch64::ZIP1v8i16, AArch64::ZIP2v8i16, AArch64::ZIP1v8i16,
+ AArch64::ZIP2v8i16, AArch64::ZIP1v8i16, AArch64::ZIP2v8i16,
+ AArch64::STPQi, AArch64::STPQi, AArch64::FPR128RegClass),
+ RuleST4(AArch64::ST4Fourv4h, AArch64::ZIP1v4i16, AArch64::ZIP2v4i16,
+ AArch64::ZIP1v4i16, AArch64::ZIP2v4i16, AArch64::ZIP1v4i16,
+ AArch64::ZIP2v4i16, AArch64::ZIP1v4i16, AArch64::ZIP2v4i16,
+ AArch64::STPDi, AArch64::STPDi, AArch64::FPR64RegClass),
+ RuleST4(AArch64::ST4Fourv16b, AArch64::ZIP1v16i8, AArch64::ZIP2v16i8,
+ AArch64::ZIP1v16i8, AArch64::ZIP2v16i8, AArch64::ZIP1v16i8,
+ AArch64::ZIP2v16i8, AArch64::ZIP1v16i8, AArch64::ZIP2v16i8,
+ AArch64::STPQi, AArch64::STPQi, AArch64::FPR128RegClass),
+ RuleST4(AArch64::ST4Fourv8b, AArch64::ZIP1v8i8, AArch64::ZIP2v8i8,
+ AArch64::ZIP1v8i8, AArch64::ZIP2v8i8, AArch64::ZIP1v8i8,
+ AArch64::ZIP2v8i8, AArch64::ZIP1v8i8, AArch64::ZIP2v8i8,
+ AArch64::STPDi, AArch64::STPDi, AArch64::FPR64RegClass)
+ };
+
+ // A costly instruction is replaced in this work by N efficient instructions
+ // The maximum of N is curently 10 and it is for ST4 case.
+ static const unsigned MaxNumRepl = 10;
+
+ AArch64SIMDInstrOpt() : MachineFunctionPass(ID) {
+ initializeAArch64SIMDInstrOptPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// Based only on latency of instructions, determine if it is cost efficient
+ /// to replace the instruction InstDesc by the instructions stored in the
+ /// array InstDescRepl.
+ /// Return true if replacement is expected to be faster.
+ bool shouldReplaceInst(MachineFunction *MF, const MCInstrDesc *InstDesc,
+ SmallVectorImpl<const MCInstrDesc*> &ReplInstrMCID);
+
+ /// Determine if we need to exit the instruction replacement optimization
+ /// passes early. This makes sure that no compile time is spent in this pass
+ /// for targets with no need for any of these optimizations.
+ /// Return true if early exit of the pass is recommended.
+ bool shouldExitEarly(MachineFunction *MF, Subpass SP);
+
+ /// Check whether an equivalent DUP instruction has already been
+ /// created or not.
+ /// Return true when the DUP instruction already exists. In this case,
+ /// DestReg will point to the destination of the already created DUP.
+ bool reuseDUP(MachineInstr &MI, unsigned DupOpcode, unsigned SrcReg,
+ unsigned LaneNumber, unsigned *DestReg) const;
+
+ /// Certain SIMD instructions with vector element operand are not efficient.
+ /// Rewrite them into SIMD instructions with vector operands. This rewrite
+ /// is driven by the latency of the instructions.
+ /// Return true if the SIMD instruction is modified.
+ bool optimizeVectElement(MachineInstr &MI);
+
+ /// Process The REG_SEQUENCE instruction, and extract the source
+ /// operands of the ST2/4 instruction from it.
+ /// Example of such instructions.
+ /// %dest = REG_SEQUENCE %st2_src1, dsub0, %st2_src2, dsub1;
+ /// Return true when the instruction is processed successfully.
+ bool processSeqRegInst(MachineInstr *DefiningMI, unsigned* StReg,
+ unsigned* StRegKill, unsigned NumArg) const;
+
+ /// Load/Store Interleaving instructions are not always beneficial.
+ /// Replace them by ZIP instructionand classical load/store.
+ /// Return true if the SIMD instruction is modified.
+ bool optimizeLdStInterleave(MachineInstr &MI);
+
+ /// Return the number of useful source registers for this
+ /// instruction (2 for ST2 and 4 for ST4).
+ unsigned determineSrcReg(MachineInstr &MI) const;
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override {
+ return AARCH64_VECTOR_BY_ELEMENT_OPT_NAME;
+ }
+};
+
+char AArch64SIMDInstrOpt::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(AArch64SIMDInstrOpt, "aarch64-simdinstr-opt",
+ AARCH64_VECTOR_BY_ELEMENT_OPT_NAME, false, false)
+
+/// Based only on latency of instructions, determine if it is cost efficient
+/// to replace the instruction InstDesc by the instructions stored in the
+/// array InstDescRepl.
+/// Return true if replacement is expected to be faster.
+bool AArch64SIMDInstrOpt::
+shouldReplaceInst(MachineFunction *MF, const MCInstrDesc *InstDesc,
+ SmallVectorImpl<const MCInstrDesc*> &InstDescRepl) {
+ // Check if replacement decision is already available in the cached table.
+ // if so, return it.
+ std::string Subtarget = SchedModel.getSubtargetInfo()->getCPU();
+ auto InstID = std::make_pair(InstDesc->getOpcode(), Subtarget);
+ if (SIMDInstrTable.find(InstID) != SIMDInstrTable.end())
+ return SIMDInstrTable[InstID];
+
+ unsigned SCIdx = InstDesc->getSchedClass();
+ const MCSchedClassDesc *SCDesc =
+ SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
+
+ // If a target does not define resources for the instructions
+ // of interest, then return false for no replacement.
+ const MCSchedClassDesc *SCDescRepl;
+ if (!SCDesc->isValid() || SCDesc->isVariant())
+ {
+ SIMDInstrTable[InstID] = false;
+ return false;
+ }
+ for (auto IDesc : InstDescRepl)
+ {
+ SCDescRepl = SchedModel.getMCSchedModel()->getSchedClassDesc(
+ IDesc->getSchedClass());
+ if (!SCDescRepl->isValid() || SCDescRepl->isVariant())
+ {
+ SIMDInstrTable[InstID] = false;
+ return false;
+ }
+ }
+
+ // Replacement cost.
+ unsigned ReplCost = 0;
+ for (auto IDesc :InstDescRepl)
+ ReplCost += SchedModel.computeInstrLatency(IDesc->getOpcode());
+
+ if (SchedModel.computeInstrLatency(InstDesc->getOpcode()) > ReplCost)
+ {
+ SIMDInstrTable[InstID] = true;
+ return true;
+ }
+ else
+ {
+ SIMDInstrTable[InstID] = false;
+ return false;
+ }
+}
+
+/// Determine if we need to exit this pass for a kind of instruction replacement
+/// early. This makes sure that no compile time is spent in this pass for
+/// targets with no need for any of these optimizations beyond performing this
+/// check.
+/// Return true if early exit of this pass for a kind of instruction
+/// replacement is recommended for a target.
+bool AArch64SIMDInstrOpt::shouldExitEarly(MachineFunction *MF, Subpass SP) {
+ const MCInstrDesc* OriginalMCID;
+ SmallVector<const MCInstrDesc*, MaxNumRepl> ReplInstrMCID;
+
+ switch (SP) {
+ // For this optimization, check by comparing the latency of a representative
+ // instruction to that of the replacement instructions.
+ // TODO: check for all concerned instructions.
+ case VectorElem:
+ OriginalMCID = &TII->get(AArch64::FMLAv4i32_indexed);
+ ReplInstrMCID.push_back(&TII->get(AArch64::DUPv4i32lane));
+ ReplInstrMCID.push_back(&TII->get(AArch64::FMLAv4f32));
+ if (shouldReplaceInst(MF, OriginalMCID, ReplInstrMCID))
+ return false;
+ break;
+
+ // For this optimization, check for all concerned instructions.
+ case Interleave:
+ std::string Subtarget = SchedModel.getSubtargetInfo()->getCPU();
+ if (InterlEarlyExit.find(Subtarget) != InterlEarlyExit.end())
+ return InterlEarlyExit[Subtarget];
+
+ for (auto &I : IRT) {
+ OriginalMCID = &TII->get(I.OrigOpc);
+ for (auto &Repl : I.ReplOpc)
+ ReplInstrMCID.push_back(&TII->get(Repl));
+ if (shouldReplaceInst(MF, OriginalMCID, ReplInstrMCID)) {
+ InterlEarlyExit[Subtarget] = false;
+ return false;
+ }
+ ReplInstrMCID.clear();
+ }
+ InterlEarlyExit[Subtarget] = true;
+ break;
+ }
+
+ return true;
+}
+
+/// Check whether an equivalent DUP instruction has already been
+/// created or not.
+/// Return true when the DUP instruction already exists. In this case,
+/// DestReg will point to the destination of the already created DUP.
+bool AArch64SIMDInstrOpt::reuseDUP(MachineInstr &MI, unsigned DupOpcode,
+ unsigned SrcReg, unsigned LaneNumber,
+ unsigned *DestReg) const {
+ for (MachineBasicBlock::iterator MII = MI, MIE = MI.getParent()->begin();
+ MII != MIE;) {
+ MII--;
+ MachineInstr *CurrentMI = &*MII;
+
+ if (CurrentMI->getOpcode() == DupOpcode &&
+ CurrentMI->getNumOperands() == 3 &&
+ CurrentMI->getOperand(1).getReg() == SrcReg &&
+ CurrentMI->getOperand(2).getImm() == LaneNumber) {
+ *DestReg = CurrentMI->getOperand(0).getReg();
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// Certain SIMD instructions with vector element operand are not efficient.
+/// Rewrite them into SIMD instructions with vector operands. This rewrite
+/// is driven by the latency of the instructions.
+/// The instruction of concerns are for the time being FMLA, FMLS, FMUL,
+/// and FMULX and hence they are hardcoded.
+///
+/// For example:
+/// fmla v0.4s, v1.4s, v2.s[1]
+///
+/// Is rewritten into
+/// dup v3.4s, v2.s[1] // DUP not necessary if redundant
+/// fmla v0.4s, v1.4s, v3.4s
+///
+/// Return true if the SIMD instruction is modified.
+bool AArch64SIMDInstrOpt::optimizeVectElement(MachineInstr &MI) {
+ const MCInstrDesc *MulMCID, *DupMCID;
+ const TargetRegisterClass *RC = &AArch64::FPR128RegClass;
+
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+
+ // 4X32 instructions
+ case AArch64::FMLAv4i32_indexed:
+ DupMCID = &TII->get(AArch64::DUPv4i32lane);
+ MulMCID = &TII->get(AArch64::FMLAv4f32);
+ break;
+ case AArch64::FMLSv4i32_indexed:
+ DupMCID = &TII->get(AArch64::DUPv4i32lane);
+ MulMCID = &TII->get(AArch64::FMLSv4f32);
+ break;
+ case AArch64::FMULXv4i32_indexed:
+ DupMCID = &TII->get(AArch64::DUPv4i32lane);
+ MulMCID = &TII->get(AArch64::FMULXv4f32);
+ break;
+ case AArch64::FMULv4i32_indexed:
+ DupMCID = &TII->get(AArch64::DUPv4i32lane);
+ MulMCID = &TII->get(AArch64::FMULv4f32);
+ break;
+
+ // 2X64 instructions
+ case AArch64::FMLAv2i64_indexed:
+ DupMCID = &TII->get(AArch64::DUPv2i64lane);
+ MulMCID = &TII->get(AArch64::FMLAv2f64);
+ break;
+ case AArch64::FMLSv2i64_indexed:
+ DupMCID = &TII->get(AArch64::DUPv2i64lane);
+ MulMCID = &TII->get(AArch64::FMLSv2f64);
+ break;
+ case AArch64::FMULXv2i64_indexed:
+ DupMCID = &TII->get(AArch64::DUPv2i64lane);
+ MulMCID = &TII->get(AArch64::FMULXv2f64);
+ break;
+ case AArch64::FMULv2i64_indexed:
+ DupMCID = &TII->get(AArch64::DUPv2i64lane);
+ MulMCID = &TII->get(AArch64::FMULv2f64);
+ break;
+
+ // 2X32 instructions
+ case AArch64::FMLAv2i32_indexed:
+ RC = &AArch64::FPR64RegClass;
+ DupMCID = &TII->get(AArch64::DUPv2i32lane);
+ MulMCID = &TII->get(AArch64::FMLAv2f32);
+ break;
+ case AArch64::FMLSv2i32_indexed:
+ RC = &AArch64::FPR64RegClass;
+ DupMCID = &TII->get(AArch64::DUPv2i32lane);
+ MulMCID = &TII->get(AArch64::FMLSv2f32);
+ break;
+ case AArch64::FMULXv2i32_indexed:
+ RC = &AArch64::FPR64RegClass;
+ DupMCID = &TII->get(AArch64::DUPv2i32lane);
+ MulMCID = &TII->get(AArch64::FMULXv2f32);
+ break;
+ case AArch64::FMULv2i32_indexed:
+ RC = &AArch64::FPR64RegClass;
+ DupMCID = &TII->get(AArch64::DUPv2i32lane);
+ MulMCID = &TII->get(AArch64::FMULv2f32);
+ break;
+ }
+
+ SmallVector<const MCInstrDesc*, 2> ReplInstrMCID;
+ ReplInstrMCID.push_back(DupMCID);
+ ReplInstrMCID.push_back(MulMCID);
+ if (!shouldReplaceInst(MI.getParent()->getParent(), &TII->get(MI.getOpcode()),
+ ReplInstrMCID))
+ return false;
+
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ // Get the operands of the current SIMD arithmetic instruction.
+ unsigned MulDest = MI.getOperand(0).getReg();
+ unsigned SrcReg0 = MI.getOperand(1).getReg();
+ unsigned Src0IsKill = getKillRegState(MI.getOperand(1).isKill());
+ unsigned SrcReg1 = MI.getOperand(2).getReg();
+ unsigned Src1IsKill = getKillRegState(MI.getOperand(2).isKill());
+ unsigned DupDest;
+
+ // Instructions of interest have either 4 or 5 operands.
+ if (MI.getNumOperands() == 5) {
+ unsigned SrcReg2 = MI.getOperand(3).getReg();
+ unsigned Src2IsKill = getKillRegState(MI.getOperand(3).isKill());
+ unsigned LaneNumber = MI.getOperand(4).getImm();
+ // Create a new DUP instruction. Note that if an equivalent DUP instruction
+ // has already been created before, then use that one instead of creating
+ // a new one.
+ if (!reuseDUP(MI, DupMCID->getOpcode(), SrcReg2, LaneNumber, &DupDest)) {
+ DupDest = MRI.createVirtualRegister(RC);
+ BuildMI(MBB, MI, DL, *DupMCID, DupDest)
+ .addReg(SrcReg2, Src2IsKill)
+ .addImm(LaneNumber);
+ }
+ BuildMI(MBB, MI, DL, *MulMCID, MulDest)
+ .addReg(SrcReg0, Src0IsKill)
+ .addReg(SrcReg1, Src1IsKill)
+ .addReg(DupDest, Src2IsKill);
+ } else if (MI.getNumOperands() == 4) {
+ unsigned LaneNumber = MI.getOperand(3).getImm();
+ if (!reuseDUP(MI, DupMCID->getOpcode(), SrcReg1, LaneNumber, &DupDest)) {
+ DupDest = MRI.createVirtualRegister(RC);
+ BuildMI(MBB, MI, DL, *DupMCID, DupDest)
+ .addReg(SrcReg1, Src1IsKill)
+ .addImm(LaneNumber);
+ }
+ BuildMI(MBB, MI, DL, *MulMCID, MulDest)
+ .addReg(SrcReg0, Src0IsKill)
+ .addReg(DupDest, Src1IsKill);
+ } else {
+ return false;
+ }
+
+ ++NumModifiedInstr;
+ return true;
+}
+
+/// Load/Store Interleaving instructions are not always beneficial.
+/// Replace them by ZIP instructions and classical load/store.
+///
+/// For example:
+/// st2 {v0.4s, v1.4s}, addr
+///
+/// Is rewritten into:
+/// zip1 v2.4s, v0.4s, v1.4s
+/// zip2 v3.4s, v0.4s, v1.4s
+/// stp q2, q3, addr
+//
+/// For example:
+/// st4 {v0.4s, v1.4s, v2.4s, v3.4s}, addr
+///
+/// Is rewritten into:
+/// zip1 v4.4s, v0.4s, v2.4s
+/// zip2 v5.4s, v0.4s, v2.4s
+/// zip1 v6.4s, v1.4s, v3.4s
+/// zip2 v7.4s, v1.4s, v3.4s
+/// zip1 v8.4s, v4.4s, v6.4s
+/// zip2 v9.4s, v4.4s, v6.4s
+/// zip1 v10.4s, v5.4s, v7.4s
+/// zip2 v11.4s, v5.4s, v7.4s
+/// stp q8, q9, addr
+/// stp q10, q11, addr+32
+///
+/// Currently only instructions related to ST2 and ST4 are considered.
+/// Other may be added later.
+/// Return true if the SIMD instruction is modified.
+bool AArch64SIMDInstrOpt::optimizeLdStInterleave(MachineInstr &MI) {
+
+ unsigned SeqReg, AddrReg;
+ unsigned StReg[4], StRegKill[4];
+ MachineInstr *DefiningMI;
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineBasicBlock &MBB = *MI.getParent();
+ SmallVector<unsigned, MaxNumRepl> ZipDest;
+ SmallVector<const MCInstrDesc*, MaxNumRepl> ReplInstrMCID;
+
+ // If current instruction matches any of the rewriting rules, then
+ // gather information about parameters of the new instructions.
+ bool Match = false;
+ for (auto &I : IRT) {
+ if (MI.getOpcode() == I.OrigOpc) {
+ SeqReg = MI.getOperand(0).getReg();
+ AddrReg = MI.getOperand(1).getReg();
+ DefiningMI = MRI->getUniqueVRegDef(SeqReg);
+ unsigned NumReg = determineSrcReg(MI);
+ if (!processSeqRegInst(DefiningMI, StReg, StRegKill, NumReg))
+ return false;
+
+ for (auto &Repl : I.ReplOpc) {
+ ReplInstrMCID.push_back(&TII->get(Repl));
+ // Generate destination registers but only for non-store instruction.
+ if (Repl != AArch64::STPQi && Repl != AArch64::STPDi)
+ ZipDest.push_back(MRI->createVirtualRegister(&I.RC));
+ }
+ Match = true;
+ break;
+ }
+ }
+
+ if (!Match)
+ return false;
+
+ // Determine if it is profitable to replace MI by the series of instructions
+ // represented in ReplInstrMCID.
+ if (!shouldReplaceInst(MI.getParent()->getParent(), &TII->get(MI.getOpcode()),
+ ReplInstrMCID))
+ return false;
+
+ // Generate the replacement instructions composed of ZIP1, ZIP2, and STP (at
+ // this point, the code generation is hardcoded and does not rely on the IRT
+ // table used above given that code generation for ST2 replacement is somewhat
+ // different than for ST4 replacement. We could have added more info into the
+ // table related to how we build new instructions but we may be adding more
+ // complexity with that).
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+
+ case AArch64::ST2Twov16b:
+ case AArch64::ST2Twov8b:
+ case AArch64::ST2Twov8h:
+ case AArch64::ST2Twov4h:
+ case AArch64::ST2Twov4s:
+ case AArch64::ST2Twov2s:
+ case AArch64::ST2Twov2d:
+ // ZIP instructions
+ BuildMI(MBB, MI, DL, *ReplInstrMCID[0], ZipDest[0])
+ .addReg(StReg[0])
+ .addReg(StReg[1]);
+ BuildMI(MBB, MI, DL, *ReplInstrMCID[1], ZipDest[1])
+ .addReg(StReg[0], StRegKill[0])
+ .addReg(StReg[1], StRegKill[1]);
+ // STP instructions
+ BuildMI(MBB, MI, DL, *ReplInstrMCID[2])
+ .addReg(ZipDest[0])
+ .addReg(ZipDest[1])
+ .addReg(AddrReg)
+ .addImm(0);
+ break;
+
+ case AArch64::ST4Fourv16b:
+ case AArch64::ST4Fourv8b:
+ case AArch64::ST4Fourv8h:
+ case AArch64::ST4Fourv4h:
+ case AArch64::ST4Fourv4s:
+ case AArch64::ST4Fourv2s:
+ case AArch64::ST4Fourv2d:
+ // ZIP instructions
+ BuildMI(MBB, MI, DL, *ReplInstrMCID[0], ZipDest[0])
+ .addReg(StReg[0])
+ .addReg(StReg[2]);
+ BuildMI(MBB, MI, DL, *ReplInstrMCID[1], ZipDest[1])
+ .addReg(StReg[0], StRegKill[0])
+ .addReg(StReg[2], StRegKill[2]);
+ BuildMI(MBB, MI, DL, *ReplInstrMCID[2], ZipDest[2])
+ .addReg(StReg[1])
+ .addReg(StReg[3]);
+ BuildMI(MBB, MI, DL, *ReplInstrMCID[3], ZipDest[3])
+ .addReg(StReg[1], StRegKill[1])
+ .addReg(StReg[3], StRegKill[3]);
+ BuildMI(MBB, MI, DL, *ReplInstrMCID[4], ZipDest[4])
+ .addReg(ZipDest[0])
+ .addReg(ZipDest[2]);
+ BuildMI(MBB, MI, DL, *ReplInstrMCID[5], ZipDest[5])
+ .addReg(ZipDest[0])
+ .addReg(ZipDest[2]);
+ BuildMI(MBB, MI, DL, *ReplInstrMCID[6], ZipDest[6])
+ .addReg(ZipDest[1])
+ .addReg(ZipDest[3]);
+ BuildMI(MBB, MI, DL, *ReplInstrMCID[7], ZipDest[7])
+ .addReg(ZipDest[1])
+ .addReg(ZipDest[3]);
+ // stp instructions
+ BuildMI(MBB, MI, DL, *ReplInstrMCID[8])
+ .addReg(ZipDest[4])
+ .addReg(ZipDest[5])
+ .addReg(AddrReg)
+ .addImm(0);
+ BuildMI(MBB, MI, DL, *ReplInstrMCID[9])
+ .addReg(ZipDest[6])
+ .addReg(ZipDest[7])
+ .addReg(AddrReg)
+ .addImm(2);
+ break;
+ }
+
+ ++NumModifiedInstr;
+ return true;
+}
+
+/// Process The REG_SEQUENCE instruction, and extract the source
+/// operands of the ST2/4 instruction from it.
+/// Example of such instruction.
+/// %dest = REG_SEQUENCE %st2_src1, dsub0, %st2_src2, dsub1;
+/// Return true when the instruction is processed successfully.
+bool AArch64SIMDInstrOpt::processSeqRegInst(MachineInstr *DefiningMI,
+ unsigned* StReg, unsigned* StRegKill, unsigned NumArg) const {
+ assert (DefiningMI != NULL);
+ if (DefiningMI->getOpcode() != AArch64::REG_SEQUENCE)
+ return false;
+
+ for (unsigned i=0; i<NumArg; i++) {
+ StReg[i] = DefiningMI->getOperand(2*i+1).getReg();
+ StRegKill[i] = getKillRegState(DefiningMI->getOperand(2*i+1).isKill());
+
+ // Sanity check for the other arguments.
+ if (DefiningMI->getOperand(2*i+2).isImm()) {
+ switch (DefiningMI->getOperand(2*i+2).getImm()) {
+ default:
+ return false;
+
+ case AArch64::dsub0:
+ case AArch64::dsub1:
+ case AArch64::dsub2:
+ case AArch64::dsub3:
+ case AArch64::qsub0:
+ case AArch64::qsub1:
+ case AArch64::qsub2:
+ case AArch64::qsub3:
+ break;
+ }
+ }
+ else
+ return false;
+ }
+ return true;
+}
+
+/// Return the number of useful source registers for this instruction
+/// (2 for ST2 and 4 for ST4).
+unsigned AArch64SIMDInstrOpt::determineSrcReg(MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unsupported instruction for this pass");
+
+ case AArch64::ST2Twov16b:
+ case AArch64::ST2Twov8b:
+ case AArch64::ST2Twov8h:
+ case AArch64::ST2Twov4h:
+ case AArch64::ST2Twov4s:
+ case AArch64::ST2Twov2s:
+ case AArch64::ST2Twov2d:
+ return 2;
+
+ case AArch64::ST4Fourv16b:
+ case AArch64::ST4Fourv8b:
+ case AArch64::ST4Fourv8h:
+ case AArch64::ST4Fourv4h:
+ case AArch64::ST4Fourv4s:
+ case AArch64::ST4Fourv2s:
+ case AArch64::ST4Fourv2d:
+ return 4;
+ }
+}
+
+bool AArch64SIMDInstrOpt::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ TII = MF.getSubtarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ const AArch64InstrInfo *AAII =
+ static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
+ if (!AAII)
+ return false;
+ SchedModel.init(ST.getSchedModel(), &ST, AAII);
+ if (!SchedModel.hasInstrSchedModel())
+ return false;
+
+ bool Changed = false;
+ for (auto OptimizationKind : {VectorElem, Interleave}) {
+ if (!shouldExitEarly(&MF, OptimizationKind)) {
+ SmallVector<MachineInstr *, 8> RemoveMIs;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
+ MII != MIE;) {
+ MachineInstr &MI = *MII;
+ bool InstRewrite;
+ if (OptimizationKind == VectorElem)
+ InstRewrite = optimizeVectElement(MI) ;
+ else
+ InstRewrite = optimizeLdStInterleave(MI);
+ if (InstRewrite) {
+ // Add MI to the list of instructions to be removed given that it
+ // has been replaced.
+ RemoveMIs.push_back(&MI);
+ Changed = true;
+ }
+ ++MII;
+ }
+ }
+ for (MachineInstr *MI : RemoveMIs)
+ MI->eraseFromParent();
+ }
+ }
+
+ return Changed;
+}
+
+/// Returns an instance of the high cost ASIMD instruction replacement
+/// optimization pass.
+FunctionPass *llvm::createAArch64SIMDInstrOptPass() {
+ return new AArch64SIMDInstrOpt();
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
new file mode 100644
index 000000000000..bcd7b60875a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -0,0 +1,23 @@
+//=- AArch64SVEInstrInfo.td - AArch64 SVE Instructions -*- tablegen -*-----=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Vector Extension (SVE) Instruction definitions.
+//
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasSVE] in {
+ defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add">;
+ defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub">;
+
+ defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1">;
+ defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2">;
+
+ defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1">;
+ defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2">;
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedA53.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedA53.td
index 18d000ace94c..90ebd78f4ab9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedA53.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedA53.td
@@ -26,6 +26,8 @@ def CortexA53Model : SchedMachineModel {
// Specification - Instruction Timings"
// v 1.0 Spreadsheet
let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td
index 5d1608ef04af..ade03f23f8c7 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td
@@ -31,6 +31,8 @@ def CortexA57Model : SchedMachineModel {
// experiments and benchmarking data.
let LoopMicroOpBufferSize = 16;
let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
index 9fd3ae6818e5..7a474ba8ef9b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
@@ -18,6 +18,8 @@ def CycloneModel : SchedMachineModel {
let LoadLatency = 4; // Optimistic load latency.
let MispredictPenalty = 16; // 14-19 cycles are typical.
let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
index 44fd94fc3d48..7277198b585f 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
@@ -23,6 +23,8 @@ def FalkorModel : SchedMachineModel {
let LoadLatency = 3; // Optimistic load latency.
let MispredictPenalty = 11; // Minimum branch misprediction penalty.
let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedKryo.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedKryo.td
index 4e491a04c78d..ce2afd499afb 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedKryo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedKryo.td
@@ -27,6 +27,8 @@ def KryoModel : SchedMachineModel {
// experiments and benchmarking data.
let LoopMicroOpBufferSize = 16;
let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td
index 3b71cf8399a0..91b6ffcd7083 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td
@@ -24,6 +24,8 @@ def ExynosM1Model : SchedMachineModel {
let LoadLatency = 4; // Optimistic load cases.
let MispredictPenalty = 14; // Minimum branch misprediction penalty.
let CompleteModel = 1; // Use the default model otherwise.
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
//===----------------------------------------------------------------------===//
@@ -62,39 +64,98 @@ let SchedModel = ExynosM1Model in {
let SchedModel = ExynosM1Model in {
//===----------------------------------------------------------------------===//
-// Coarse scheduling model for the Exynos-M1.
+// Predicates.
+
+def M1BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
+ MI->getOperand(0).getReg() != AArch64::LR}]>;
+def M1ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
+
+//===----------------------------------------------------------------------===//
+// Coarse scheduling model.
def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
def M1WriteA2 : SchedWriteRes<[M1UnitALU]> { let Latency = 2; }
+def M1WriteAA : SchedWriteRes<[M1UnitALU]> { let Latency = 2;
+ let ResourceCycles = [2]; }
+def M1WriteAB : SchedWriteRes<[M1UnitALU,
+ M1UnitC]> { let Latency = 1;
+ let NumMicroOps = 2; }
+def M1WriteAC : SchedWriteRes<[M1UnitALU,
+ M1UnitALU,
+ M1UnitC]> { let Latency = 2;
+ let NumMicroOps = 3; }
+def M1WriteAD : SchedWriteRes<[M1UnitALU,
+ M1UnitC]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M1WriteAX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteA1]>,
+ SchedVar<NoSchedPred, [M1WriteAA]>]>;
def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; }
def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; }
-def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
-
-def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
-def M1WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteL5,
- M1WriteA1]>,
- SchedVar<NoSchedPred, [M1WriteL5]>]>;
-
-def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; }
-def M1WriteS2 : SchedWriteRes<[M1UnitS]> { let Latency = 2; }
-def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
-def M1WriteSX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteS2,
- M1WriteA1]>,
- SchedVar<NoSchedPred, [M1WriteS1]>]>;
+def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
+def M1WriteBX : SchedWriteVariant<[SchedVar<M1BranchLinkFastPred, [M1WriteAB]>,
+ SchedVar<NoSchedPred, [M1WriteAC]>]>;
+
+def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
+def M1WriteL6 : SchedWriteRes<[M1UnitL]> { let Latency = 6; }
+def M1WriteLA : SchedWriteRes<[M1UnitL]> { let Latency = 6;
+ let ResourceCycles = [2]; }
+def M1WriteLB : SchedWriteRes<[M1UnitL,
+ M1UnitA]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def M1WriteLC : SchedWriteRes<[M1UnitL,
+ M1UnitA]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M1WriteLD : SchedWriteRes<[M1UnitL,
+ M1UnitA]> { let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2]; }
+def M1WriteLH : SchedWriteRes<[]> { let Latency = 5;
+ let NumMicroOps = 0; }
+def M1WriteLX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
+ SchedVar<NoSchedPred, [M1WriteLC]>]>;
+def M1WriteLY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
+ SchedVar<NoSchedPred, [M1WriteLD]>]>;
+
+def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; }
+def M1WriteS3 : SchedWriteRes<[M1UnitS]> { let Latency = 3; }
+def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
+def M1WriteSA : SchedWriteRes<[M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST]> { let Latency = 1;
+ let NumMicroOps = 2; }
+def M1WriteSB : SchedWriteRes<[M1UnitS,
+ M1UnitFST,
+ M1UnitA]> { let Latency = 3;
+ let NumMicroOps = 2; }
+def M1WriteSC : SchedWriteRes<[M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitA]> { let Latency = 3;
+ let NumMicroOps = 3; }
+def M1WriteSD : SchedWriteRes<[M1UnitS,
+ M1UnitFST,
+ M1UnitA]> { let Latency = 1;
+ let NumMicroOps = 2; }
+def M1WriteSE : SchedWriteRes<[M1UnitS,
+ M1UnitA]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M1WriteSX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
+ SchedVar<NoSchedPred, [M1WriteSE]>]>;
+def M1WriteSY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
+ SchedVar<NoSchedPred, [M1WriteSB]>]>;
def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>]>;
-def : SchedAlias<ReadAdrBase, M1ReadAdrBase>;
// Branch instructions.
-// NOTE: Unconditional direct branches actually take neither cycles nor units.
-def : WriteRes<WriteBr, [M1UnitB]> { let Latency = 1; }
+def : WriteRes<WriteBr, []> { let Latency = 0; }
def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; }
// Arithmetic and logical integer instructions.
def : WriteRes<WriteI, [M1UnitALU]> { let Latency = 1; }
-// TODO: Shift over 3 and some extensions take 2 cycles.
def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; }
def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; }
def : WriteRes<WriteIS, [M1UnitALU]> { let Latency = 1; }
@@ -110,21 +171,24 @@ def : WriteRes<WriteID64, [M1UnitC,
M1UnitD]> { let Latency = 21;
let ResourceCycles = [1, 21]; }
// TODO: Long multiplication take 5 cycles and also the ALU.
-// TODO: Multiplication with accumulation can be advanced.
def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; }
-// TODO: 64-bit multiplication has a throughput of 1/2.
-def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4; }
+def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4;
+ let ResourceCycles = [2]; }
// Miscellaneous instructions.
def : WriteRes<WriteExtr, [M1UnitALU,
- M1UnitALU]> { let Latency = 2; }
+ M1UnitALU]> { let Latency = 2;
+ let NumMicroOps = 2; }
-// TODO: The latency for the post or pre register is 1 cycle.
-def : WriteRes<WriteAdr, []> { let Latency = 0; }
+// Addressing modes.
+def : WriteRes<WriteAdr, []> { let Latency = 1;
+ let NumMicroOps = 0; }
+def : SchedAlias<ReadAdrBase, M1ReadAdrBase>;
// Load instructions.
def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; }
-def : WriteRes<WriteLDHi, [M1UnitALU]> { let Latency = 4; }
+def : WriteRes<WriteLDHi, []> { let Latency = 4;
+ let NumMicroOps = 0; }
def : SchedAlias<WriteLDIdx, M1WriteLX>;
// Store instructions.
@@ -135,25 +199,23 @@ def : SchedAlias<WriteSTIdx, M1WriteSX>;
// FP data instructions.
def : WriteRes<WriteF, [M1UnitFADD]> { let Latency = 3; }
-// TODO: FCCMP is much different.
def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; }
def : WriteRes<WriteFDiv, [M1UnitFVAR]> { let Latency = 15;
let ResourceCycles = [15]; }
def : WriteRes<WriteFMul, [M1UnitFMAC]> { let Latency = 4; }
// FP miscellaneous instructions.
-// TODO: Conversion between register files is much different.
def : WriteRes<WriteFCvt, [M1UnitFCVT]> { let Latency = 3; }
def : WriteRes<WriteFImm, [M1UnitNALU]> { let Latency = 1; }
def : WriteRes<WriteFCopy, [M1UnitS]> { let Latency = 4; }
// FP load instructions.
-// TODO: ASIMD loads are much different.
-def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; }
+def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; }
// FP store instructions.
-// TODO: ASIMD stores are much different.
-def : WriteRes<WriteVST, [M1UnitS, M1UnitFST]> { let Latency = 1; }
+def : WriteRes<WriteVST, [M1UnitS,
+ M1UnitFST]> { let Latency = 1;
+ let NumMicroOps = 1; }
// ASIMD FP instructions.
def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; }
@@ -165,55 +227,67 @@ def : WriteRes<WriteHint, []> { let Latency = 1; }
def : WriteRes<WriteSys, []> { let Latency = 1; }
//===----------------------------------------------------------------------===//
-// Generic fast forwarding.
+// Fast forwarding.
// TODO: Add FP register forwarding rules.
-
def : ReadAdvance<ReadI, 0>;
def : ReadAdvance<ReadISReg, 0>;
def : ReadAdvance<ReadIEReg, 0>;
def : ReadAdvance<ReadIM, 0>;
-// Integer multiply-accumulate.
-// TODO: The forwarding for WriteIM64 saves actually 3 cycles.
-def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
+// TODO: The forwarding for WriteIM32 saves actually 2 cycles.
+def : ReadAdvance<ReadIMA, 3, [WriteIM32, WriteIM64]>;
def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
//===----------------------------------------------------------------------===//
-// Finer scheduling model for the Exynos-M1.
+// Finer scheduling model.
def M1WriteNEONA : SchedWriteRes<[M1UnitNALU,
M1UnitNALU,
- M1UnitFADD]> { let Latency = 9; }
+ M1UnitFADD]> { let Latency = 9;
+ let NumMicroOps = 3; }
def M1WriteNEONB : SchedWriteRes<[M1UnitNALU,
- M1UnitFST]> { let Latency = 5; }
+ M1UnitFST]> { let Latency = 5;
+ let NumMicroOps = 2;}
def M1WriteNEONC : SchedWriteRes<[M1UnitNALU,
- M1UnitFST]> { let Latency = 6; }
+ M1UnitFST]> { let Latency = 6;
+ let NumMicroOps = 2; }
def M1WriteNEOND : SchedWriteRes<[M1UnitNALU,
M1UnitFST,
- M1UnitL]> { let Latency = 10; }
+ M1UnitL]> { let Latency = 10;
+ let NumMicroOps = 3; }
def M1WriteNEONE : SchedWriteRes<[M1UnitFCVT,
- M1UnitFST]> { let Latency = 8; }
+ M1UnitFST]> { let Latency = 8;
+ let NumMicroOps = 2; }
def M1WriteNEONF : SchedWriteRes<[M1UnitFCVT,
M1UnitFST,
- M1UnitL]> { let Latency = 13; }
+ M1UnitL]> { let Latency = 13;
+ let NumMicroOps = 3; }
def M1WriteNEONG : SchedWriteRes<[M1UnitNMISC,
- M1UnitFST]> { let Latency = 6; }
+ M1UnitFST]> { let Latency = 6;
+ let NumMicroOps = 2; }
def M1WriteNEONH : SchedWriteRes<[M1UnitNALU,
- M1UnitFST]> { let Latency = 3; }
+ M1UnitFST]> { let Latency = 3;
+ let NumMicroOps = 2; }
def M1WriteNEONI : SchedWriteRes<[M1UnitFST,
- M1UnitL]> { let Latency = 9; }
+ M1UnitL]> { let Latency = 9;
+ let NumMicroOps = 2; }
def M1WriteNEONJ : SchedWriteRes<[M1UnitNMISC,
- M1UnitFMAC]> { let Latency = 6; }
+ M1UnitFMAC]> { let Latency = 6;
+ let NumMicroOps = 2; }
def M1WriteNEONK : SchedWriteRes<[M1UnitNMISC,
- M1UnitFMAC]> { let Latency = 7; }
+ M1UnitFMAC]> { let Latency = 7;
+ let NumMicroOps = 2; }
+def M1WriteNEONL : SchedWriteRes<[M1UnitNALU]> { let Latency = 2;
+ let ResourceCycles = [2]; }
def M1WriteFADD3 : SchedWriteRes<[M1UnitFADD]> { let Latency = 3; }
def M1WriteFCVT3 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 3; }
def M1WriteFCVT4 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 4; }
def M1WriteFMAC4 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 4; }
def M1WriteFMAC5 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 5; }
+// TODO
def M1WriteFVAR15 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 15;
let ResourceCycles = [15]; }
def M1WriteFVAR23 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 23;
@@ -230,75 +304,93 @@ def M1WriteNMISC2 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 2; }
def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; }
def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; }
def M1WriteTB : SchedWriteRes<[M1UnitC,
- M1UnitALU]> { let Latency = 2; }
+ M1UnitALU]> { let Latency = 2;
+ let NumMicroOps = 2; }
def M1WriteVLDA : SchedWriteRes<[M1UnitL,
- M1UnitL]> { let Latency = 6; }
+ M1UnitL]> { let Latency = 6;
+ let NumMicroOps = 2; }
def M1WriteVLDB : SchedWriteRes<[M1UnitL,
M1UnitL,
- M1UnitL]> { let Latency = 7; }
+ M1UnitL]> { let Latency = 7;
+ let NumMicroOps = 3; }
def M1WriteVLDC : SchedWriteRes<[M1UnitL,
M1UnitL,
M1UnitL,
- M1UnitL]> { let Latency = 8; }
+ M1UnitL]> { let Latency = 8;
+ let NumMicroOps = 4; }
def M1WriteVLDD : SchedWriteRes<[M1UnitL,
M1UnitNALU]> { let Latency = 7;
+ let NumMicroOps = 2;
let ResourceCycles = [2]; }
def M1WriteVLDE : SchedWriteRes<[M1UnitL,
- M1UnitNALU]> { let Latency = 6; }
+ M1UnitNALU]> { let Latency = 6;
+ let NumMicroOps = 2; }
def M1WriteVLDF : SchedWriteRes<[M1UnitL,
M1UnitL]> { let Latency = 10;
+ let NumMicroOps = 2;
let ResourceCycles = [5]; }
def M1WriteVLDG : SchedWriteRes<[M1UnitL,
M1UnitNALU,
M1UnitNALU]> { let Latency = 7;
+ let NumMicroOps = 3;
let ResourceCycles = [2]; }
def M1WriteVLDH : SchedWriteRes<[M1UnitL,
M1UnitNALU,
- M1UnitNALU]> { let Latency = 6; }
+ M1UnitNALU]> { let Latency = 6;
+ let NumMicroOps = 3; }
def M1WriteVLDI : SchedWriteRes<[M1UnitL,
M1UnitL,
M1UnitL]> { let Latency = 12;
+ let NumMicroOps = 3;
let ResourceCycles = [6]; }
def M1WriteVLDJ : SchedWriteRes<[M1UnitL,
M1UnitNALU,
M1UnitNALU,
M1UnitNALU]> { let Latency = 9;
+ let NumMicroOps = 4;
let ResourceCycles = [4]; }
def M1WriteVLDK : SchedWriteRes<[M1UnitL,
M1UnitNALU,
M1UnitNALU,
M1UnitNALU,
M1UnitNALU]> { let Latency = 9;
+ let NumMicroOps = 5;
let ResourceCycles = [4]; }
def M1WriteVLDL : SchedWriteRes<[M1UnitL,
M1UnitNALU,
M1UnitNALU,
+ M1UnitL,
M1UnitNALU]> { let Latency = 7;
+ let NumMicroOps = 5;
let ResourceCycles = [2]; }
def M1WriteVLDM : SchedWriteRes<[M1UnitL,
M1UnitNALU,
M1UnitNALU,
+ M1UnitL,
M1UnitNALU,
M1UnitNALU]> { let Latency = 7;
+ let NumMicroOps = 6;
let ResourceCycles = [2]; }
def M1WriteVLDN : SchedWriteRes<[M1UnitL,
M1UnitL,
M1UnitL,
M1UnitL]> { let Latency = 14;
+ let NumMicroOps = 4;
let ResourceCycles = [7]; }
-
def M1WriteVSTA : WriteSequence<[WriteVST], 2>;
def M1WriteVSTB : WriteSequence<[WriteVST], 3>;
def M1WriteVSTC : WriteSequence<[WriteVST], 4>;
def M1WriteVSTD : SchedWriteRes<[M1UnitS,
M1UnitFST,
M1UnitFST]> { let Latency = 7;
+ let NumMicroOps = 2;
let ResourceCycles = [7]; }
def M1WriteVSTE : SchedWriteRes<[M1UnitS,
M1UnitFST,
M1UnitS,
M1UnitFST,
M1UnitFST]> { let Latency = 8;
+ let NumMicroOps = 3;
let ResourceCycles = [8]; }
def M1WriteVSTF : SchedWriteRes<[M1UnitNALU,
M1UnitS,
@@ -307,6 +399,7 @@ def M1WriteVSTF : SchedWriteRes<[M1UnitNALU,
M1UnitFST,
M1UnitFST,
M1UnitFST]> { let Latency = 15;
+ let NumMicroOps = 5;
let ResourceCycles = [15]; }
def M1WriteVSTG : SchedWriteRes<[M1UnitNALU,
M1UnitS,
@@ -317,12 +410,14 @@ def M1WriteVSTG : SchedWriteRes<[M1UnitNALU,
M1UnitFST,
M1UnitFST,
M1UnitFST]> { let Latency = 16;
+ let NumMicroOps = 6;
let ResourceCycles = [16]; }
def M1WriteVSTH : SchedWriteRes<[M1UnitNALU,
M1UnitS,
M1UnitFST,
M1UnitFST,
M1UnitFST]> { let Latency = 14;
+ let NumMicroOps = 4;
let ResourceCycles = [14]; }
def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
M1UnitS,
@@ -335,27 +430,30 @@ def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
M1UnitFST,
M1UnitFST,
M1UnitFST]> { let Latency = 17;
+ let NumMicroOps = 7;
let ResourceCycles = [17]; }
// Branch instructions
def : InstRW<[M1WriteB1], (instrs Bcc)>;
-// NOTE: Conditional branch and link adds a B uop.
def : InstRW<[M1WriteA1], (instrs BL)>;
-// NOTE: Indirect branch and link with LR adds an ALU uop.
-def : InstRW<[M1WriteA1,
- M1WriteC1], (instrs BLR)>;
+def : InstRW<[M1WriteBX], (instrs BLR)>;
def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
-def : InstRW<[M1WriteC1,
- M1WriteA2], (instregex "^TBN?Z[WX]")>;
+def : InstRW<[M1WriteAD], (instregex "^TBN?Z[WX]")>;
// Arithmetic and logical integer instructions.
def : InstRW<[M1WriteA1], (instrs COPY)>;
+def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
// Divide and multiply instructions.
// Miscellaneous instructions.
// Load instructions.
+def : InstRW<[M1WriteLB,
+ WriteLDHi,
+ WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
+def : InstRW<[M1WriteLX,
+ ReadAdrBase], (instregex "^PRFMro[WX]")>;
// Store instructions.
@@ -375,16 +473,51 @@ def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>;
def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>;
// FP miscellaneous instructions.
-def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>;
-def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
-def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>;
-def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>;
-def : InstRW<[M1WriteS4], (instregex "^FMOV[WX][DS](High)?r")>;
-def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>;
+def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>;
+def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
+def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>;
+def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>;
+def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev1")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^FRECPXv1")>;
+def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)S(16|32|64)")>;
+def : InstRW<[M1WriteS4], (instregex "^FMOV[WX][DS](High)?r")>;
+def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>;
// FP load instructions.
+def : InstRW<[WriteVLD], (instregex "^LDR[DSQ]l")>;
+def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>;
+def : InstRW<[WriteVLD,
+ WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
+def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
+def : InstRW<[M1WriteLY,
+ ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
+def : InstRW<[M1WriteLD,
+ ReadAdrBase], (instregex "^LDRQro[WX]")>;
+def : InstRW<[WriteVLD,
+ M1WriteLH], (instregex "^LDN?P[DS]i")>;
+def : InstRW<[M1WriteLA,
+ M1WriteLH], (instregex "^LDN?PQi")>;
+def : InstRW<[M1WriteLC,
+ M1WriteLH,
+ WriteAdr], (instregex "^LDP[DS](post|pre)")>;
+def : InstRW<[M1WriteLD,
+ M1WriteLH,
+ WriteAdr], (instregex "^LDPQ(post|pre)")>;
// FP store instructions.
+def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
+def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
+def : InstRW<[M1WriteSY,
+ ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
+def : InstRW<[M1WriteSB,
+ ReadAdrBase], (instregex "^STRQro[WX]")>;
+def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "^STP[DS](post|pre)")>;
+def : InstRW<[M1WriteSC,
+ WriteAdr], (instregex "^STPQ(post|pre)")>;
// ASIMD instructions.
def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>;
@@ -409,10 +542,12 @@ def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>;
def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>;
def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>;
def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>;
-def : InstRW<[M1WriteNALU1], (instregex "^[SU]?SH(L|LL|R)2?v")>;
-def : InstRW<[M1WriteNALU1], (instregex "^S[LR]Iv")>;
-def : InstRW<[M1WriteNAL13], (instregex "^[SU]?(Q|QR|R)?SHR(N|U|UN)?2?v")>;
-def : InstRW<[M1WriteNAL13], (instregex "^[SU](Q|QR|R)SHLU?v")>;
+def : InstRW<[M1WriteNALU1], (instregex "^SHL[dv]")>;
+def : InstRW<[M1WriteNALU1], (instregex "^[SU]SH[LR][dv]")>;
+def : InstRW<[M1WriteNALU1], (instregex "^S[RS]I[dv]")>;
+def : InstRW<[M1WriteNAL13], (instregex "^(([SU]Q)?R)?SHRU?N[bhsv]")>;
+def : InstRW<[M1WriteNAL13], (instregex "^[SU]RSH[LR][dv]")>;
+def : InstRW<[M1WriteNAL13], (instregex "^[SU]QR?SHLU?[bdhsv]")>;
// ASIMD FP instructions.
def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>;
@@ -435,13 +570,16 @@ def : InstRW<[M1WriteFCVT3], (instregex "^FRINT[AIMNPXZ]v")>;
// ASIMD miscellaneous instructions.
def : InstRW<[M1WriteNALU1], (instregex "^RBITv")>;
def : InstRW<[M1WriteNAL11], (instregex "^(BIF|BIT|BSL)v")>;
-def : InstRW<[M1WriteNALU1], (instregex "^CPY")>;
def : InstRW<[M1WriteNEONB], (instregex "^DUPv.+gpr")>;
def : InstRW<[M1WriteNALU1], (instregex "^DUPv.+lane")>;
+def : InstRW<[M1WriteNALU1], (instregex "^EXTv8")>;
+def : InstRW<[M1WriteNEONL], (instregex "^EXTv16")>;
def : InstRW<[M1WriteNAL13], (instregex "^[SU]?Q?XTU?Nv")>;
-def : InstRW<[M1WriteNEONC], (instregex "^INSv.+gpr")>;
-def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev")>;
-def : InstRW<[M1WriteNMISC1], (instregex "^[FU](RECP|RSQRT)Xv")>;
+def : InstRW<[M1WriteNALU1], (instregex "^CPY")>;
+def : InstRW<[M1WriteNALU1], (instregex "^INSv.+lane")>;
+def : InstRW<[M1WriteNALU1], (instregex "^MOVI[Dv]")>;
+def : InstRW<[M1WriteNALU1], (instregex "^FMOVv")>;
+def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev[248]")>;
def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)Sv")>;
def : InstRW<[M1WriteNALU1], (instregex "^REV(16|32|64)v")>;
def : InstRW<[M1WriteNAL11], (instregex "^TB[LX]v8i8One")>;
@@ -459,7 +597,7 @@ def : InstRW<[WriteSequence<[M1WriteNAL12], 3>],
def : InstRW<[WriteSequence<[M1WriteNAL12], 4>],
(instregex "^TB[LX]v16i8Four")>;
def : InstRW<[M1WriteNEOND], (instregex "^[SU]MOVv")>;
-def : InstRW<[M1WriteNALU1], (instregex "^INSv.+lane")>;
+def : InstRW<[M1WriteNEONC], (instregex "^INSv.+gpr")>;
def : InstRW<[M1WriteNALU1], (instregex "^(TRN|UZP)[12](v8i8|v4i16|v2i32)")>;
def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64)")>;
def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
index 9a0cb702518d..585688aae279 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
@@ -25,6 +25,8 @@ def ThunderXT8XModel : SchedMachineModel {
let MispredictPenalty = 8; // Branch mispredict penalty.
let PostRAScheduler = 1; // Use PostRA scheduler.
let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
// Modeling each pipeline with BufferSize == 0 since T8X is in-order.
@@ -239,20 +241,20 @@ def : ReadAdvance<ReadID, 1, [WriteImm, WriteI,
//---
// Branch
//---
-def : InstRW<[THXT8XWriteBR], (instregex "^B")>;
-def : InstRW<[THXT8XWriteBR], (instregex "^BL")>;
-def : InstRW<[THXT8XWriteBR], (instregex "^B.*")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^B$")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^BL$")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^B..$")>;
def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>;
def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>;
def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>;
def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>;
-def : InstRW<[THXT8XWriteBRR], (instregex "^BR")>;
-def : InstRW<[THXT8XWriteBRR], (instregex "^BLR")>;
+def : InstRW<[THXT8XWriteBRR], (instregex "^BR$")>;
+def : InstRW<[THXT8XWriteBRR], (instregex "^BLR$")>;
//---
// Ret
//---
-def : InstRW<[THXT8XWriteRET], (instregex "^RET")>;
+def : InstRW<[THXT8XWriteRET], (instregex "^RET$")>;
//---
// Miscellaneous
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index 10df50bcf156..22f272edd680 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -22,9 +22,11 @@ def ThunderX2T99Model : SchedMachineModel {
let LoadLatency = 4; // Optimistic load latency.
let MispredictPenalty = 12; // Extra cycles for mispredicted branch.
// Determined via a mix of micro-arch details and experimentation.
- let LoopMicroOpBufferSize = 32;
+ let LoopMicroOpBufferSize = 128;
let PostRAScheduler = 1; // Using PostRA sched.
let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
}
// Define the issue ports.
@@ -315,6 +317,36 @@ def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let NumMicroOps = 3;
}
+// 8 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_8Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+// 12 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_12Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 12;
+ let NumMicroOps = 6;
+}
+
+// 16 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_16Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 16;
+ let NumMicroOps = 8;
+}
+
+// 24 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_24Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 24;
+ let NumMicroOps = 12;
+}
+
+// 32 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_32Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 32;
+ let NumMicroOps = 16;
+}
+
// Define commonly used read types.
// No forwarding is provided for these types.
@@ -368,7 +400,7 @@ def : WriteRes<WriteAtomic, []> {
//---
def : InstRW<[THX2T99Write_1Cyc_I2], (instrs B, BL, BR, BLR)>;
def : InstRW<[THX2T99Write_1Cyc_I2], (instrs RET)>;
-def : InstRW<[THX2T99Write_1Cyc_I2], (instregex "^B.*")>;
+def : InstRW<[THX2T99Write_1Cyc_I2], (instregex "^B..$")>;
def : InstRW<[THX2T99Write_1Cyc_I2],
(instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
@@ -1741,5 +1773,108 @@ def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>;
def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST4i(8|16|32|64)_POST$")>;
+// V8.1a Atomics (LSE)
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs CASB, CASH, CASW, CASX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs CASAB, CASAH, CASAW, CASAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs CASLB, CASLH, CASLW, CASLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+ (instrs CASALB, CASALH, CASALW, CASALX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+ (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+ (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+ (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+ (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
+ LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
+ LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
+ LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
+ LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
+ LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
+ LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
+ LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
+ LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
+ LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
+ LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
+ LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
+ LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs SWPB, SWPH, SWPW, SWPX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+ (instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
+
} // SchedModel = ThunderX2T99Model
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index fe984ccbaf1d..571e61d7083c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -16,10 +16,10 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
@@ -120,7 +120,7 @@ bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
}
bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
const TargetSubtargetInfo &ST = MF.getSubtarget();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index ea6112452736..e397d585ae77 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -18,19 +18,12 @@
#include "AArch64PBQPRegAlloc.h"
#include "AArch64TargetMachine.h"
-#ifdef LLVM_BUILD_GLOBAL_ISEL
#include "AArch64CallLowering.h"
#include "AArch64LegalizerInfo.h"
#include "AArch64RegisterBankInfo.h"
-#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
-#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
-#include "llvm/CodeGen/GlobalISel/Legalizer.h"
-#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
-#endif
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -98,6 +91,11 @@ void AArch64Subtarget::initializeProperties() {
MinPrefetchStride = 2048;
MaxPrefetchIterationsAhead = 8;
break;
+ case Saphira:
+ MaxInterleaveFactor = 4;
+ // FIXME: remove this to enable 64-bit SLP if performance looks good.
+ MinVectorRegisterBitWidth = 128;
+ break;
case Kryo:
MaxInterleaveFactor = 4;
VectorInsertExtractBaseCost = 2;
@@ -130,93 +128,55 @@ void AArch64Subtarget::initializeProperties() {
MinVectorRegisterBitWidth = 128;
break;
case CortexA35: break;
- case CortexA53: break;
- case CortexA72:
- PrefFunctionAlignment = 4;
+ case CortexA53:
+ PrefFunctionAlignment = 3;
break;
+ case CortexA55: break;
+ case CortexA72:
case CortexA73:
+ case CortexA75:
PrefFunctionAlignment = 4;
break;
case Others: break;
}
}
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-namespace {
-
-struct AArch64GISelActualAccessor : public GISelAccessor {
- std::unique_ptr<CallLowering> CallLoweringInfo;
- std::unique_ptr<InstructionSelector> InstSelector;
- std::unique_ptr<LegalizerInfo> Legalizer;
- std::unique_ptr<RegisterBankInfo> RegBankInfo;
-
- const CallLowering *getCallLowering() const override {
- return CallLoweringInfo.get();
- }
-
- const InstructionSelector *getInstructionSelector() const override {
- return InstSelector.get();
- }
-
- const LegalizerInfo *getLegalizerInfo() const override {
- return Legalizer.get();
- }
-
- const RegisterBankInfo *getRegBankInfo() const override {
- return RegBankInfo.get();
- }
-};
-
-} // end anonymous namespace
-#endif
-
AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS),
- ReserveX18(TT.isOSDarwin() || TT.isOSWindows()),
- IsLittle(LittleEndian), TargetTriple(TT), FrameLowering(),
+ ReserveX18(TT.isOSDarwin() || TT.isOSWindows()), IsLittle(LittleEndian),
+ TargetTriple(TT), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
- TLInfo(TM, *this), GISel() {
-#ifndef LLVM_BUILD_GLOBAL_ISEL
- GISelAccessor *AArch64GISel = new GISelAccessor();
-#else
- AArch64GISelActualAccessor *AArch64GISel = new AArch64GISelActualAccessor();
- AArch64GISel->CallLoweringInfo.reset(
- new AArch64CallLowering(*getTargetLowering()));
- AArch64GISel->Legalizer.reset(new AArch64LegalizerInfo());
+ TLInfo(TM, *this) {
+ CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
+ Legalizer.reset(new AArch64LegalizerInfo(*this));
auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
// FIXME: At this point, we can't rely on Subtarget having RBI.
// It's awkward to mix passing RBI and the Subtarget; should we pass
// TII/TRI as well?
- AArch64GISel->InstSelector.reset(createAArch64InstructionSelector(
+ InstSelector.reset(createAArch64InstructionSelector(
*static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
- AArch64GISel->RegBankInfo.reset(RBI);
-#endif
- setGISelAccessor(*AArch64GISel);
+ RegBankInfo.reset(RBI);
}
const CallLowering *AArch64Subtarget::getCallLowering() const {
- assert(GISel && "Access to GlobalISel APIs not set");
- return GISel->getCallLowering();
+ return CallLoweringInfo.get();
}
const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
- assert(GISel && "Access to GlobalISel APIs not set");
- return GISel->getInstructionSelector();
+ return InstSelector.get();
}
const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
- assert(GISel && "Access to GlobalISel APIs not set");
- return GISel->getLegalizerInfo();
+ return Legalizer.get();
}
const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
- assert(GISel && "Access to GlobalISel APIs not set");
- return GISel->getRegBankInfo();
+ return RegBankInfo.get();
}
/// Find the target operand flags that describe how a global value should be
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 5a1f45ee2552..5d9759d363dd 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -19,9 +19,12 @@
#include "AArch64InstrInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64SelectionDAGInfo.h"
-#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -38,13 +41,16 @@ public:
Others,
CortexA35,
CortexA53,
+ CortexA55,
CortexA57,
CortexA72,
CortexA73,
+ CortexA75,
Cyclone,
ExynosM1,
Falkor,
Kryo,
+ Saphira,
ThunderX2T99,
ThunderX,
ThunderXT81,
@@ -58,10 +64,12 @@ protected:
bool HasV8_1aOps = false;
bool HasV8_2aOps = false;
+ bool HasV8_3aOps = false;
bool HasFPARMv8 = false;
bool HasNEON = false;
bool HasCrypto = false;
+ bool HasDotProd = false;
bool HasCRC = false;
bool HasLSE = false;
bool HasRAS = false;
@@ -71,12 +79,14 @@ protected:
bool HasSPE = false;
bool HasLSLFast = false;
bool HasSVE = false;
+ bool HasRCPC = false;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove = false;
// HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
bool HasZeroCycleZeroing = false;
+ bool HasZeroCycleZeroingFPWorkaround = false;
// StrictAlign - Disallow unaligned memory accesses.
bool StrictAlign = false;
@@ -94,6 +104,7 @@ protected:
bool UsePostRAScheduler = false;
bool Misaligned128StoreIsSlow = false;
bool Paired128IsSlow = false;
+ bool STRQroIsSlow = false;
bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
@@ -124,10 +135,12 @@ protected:
AArch64InstrInfo InstrInfo;
AArch64SelectionDAGInfo TSInfo;
AArch64TargetLowering TLInfo;
- /// Gather the accessor points to GlobalISel-related APIs.
- /// This is used to avoid ifndefs spreading around while GISel is
- /// an optional library.
- std::unique_ptr<GISelAccessor> GISel;
+
+ /// GlobalISel related APIs.
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
private:
/// initializeSubtargetDependencies - Initializes using CPUString and the
@@ -146,11 +159,6 @@ public:
const std::string &FS, const TargetMachine &TM,
bool LittleEndian);
- /// This object will take onwership of \p GISelAccessor.
- void setGISelAccessor(GISelAccessor &GISel) {
- this->GISel.reset(&GISel);
- }
-
const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
@@ -184,11 +192,16 @@ public:
bool hasV8_1aOps() const { return HasV8_1aOps; }
bool hasV8_2aOps() const { return HasV8_2aOps; }
+ bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
+ bool hasZeroCycleZeroingFPWorkaround() const {
+ return HasZeroCycleZeroingFPWorkaround;
+ }
+
bool requiresStrictAlign() const { return StrictAlign; }
bool isXRaySupported() const override { return true; }
@@ -201,6 +214,7 @@ public:
bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
+ bool hasDotProd() const { return HasDotProd; }
bool hasCRC() const { return HasCRC; }
bool hasLSE() const { return HasLSE; }
bool hasRAS() const { return HasRAS; }
@@ -212,6 +226,7 @@ public:
bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }
bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }
bool isPaired128Slow() const { return Paired128IsSlow; }
+ bool isSTRQroSlow() const { return STRQroIsSlow; }
bool useAlternateSExtLoadCVTF32Pattern() const {
return UseAlternateSExtLoadCVTF32Pattern;
}
@@ -253,6 +268,7 @@ public:
bool hasSPE() const { return HasSPE; }
bool hasLSLFast() const { return HasLSLFast; }
bool hasSVE() const { return HasSVE; }
+ bool hasRCPC() const { return HasRCPC; }
bool isLittleEndian() const { return IsLittle; }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index 7c5dcb0853eb..df939add70fa 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -342,6 +342,9 @@ def : ROSysReg<"ID_ISAR2_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b010>;
def : ROSysReg<"ID_ISAR3_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b011>;
def : ROSysReg<"ID_ISAR4_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b100>;
def : ROSysReg<"ID_ISAR5_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b101>;
+def : ROSysReg<"ID_ISAR6_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b111> {
+ let Requires = [{ {AArch64::HasV8_2aOps} }];
+}
def : ROSysReg<"ID_AA64PFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b000>;
def : ROSysReg<"ID_AA64PFR1_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b001>;
def : ROSysReg<"ID_AA64DFR0_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b000>;
@@ -1016,6 +1019,21 @@ def : RWSysReg<"VDISR_EL2", 0b11, 0b100, 0b1100, 0b0001, 0b001>;
def : RWSysReg<"VSESR_EL2", 0b11, 0b100, 0b0101, 0b0010, 0b011>;
}
+// v8.3a "Pointer authentication extension" registers
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::HasV8_3aOps} }] in {
+def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>;
+def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>;
+def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>;
+def : RWSysReg<"APIBKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b011>;
+def : RWSysReg<"APDAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b000>;
+def : RWSysReg<"APDAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b001>;
+def : RWSysReg<"APDBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b010>;
+def : RWSysReg<"APDBKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b011>;
+def : RWSysReg<"APGAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b000>;
+def : RWSysReg<"APGAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b001>;
+}
+
// Cyclone specific system registers
// Op0 Op1 CRn CRm Op2
let Requires = [{ {AArch64::ProcCyclone} }] in
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index ba28c01a2eff..64583ead73f2 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
@@ -35,7 +36,6 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
#include <memory>
@@ -157,7 +157,7 @@ extern "C" void LLVMInitializeAArch64Target() {
initializeAArch64DeadRegisterDefinitionsPass(*PR);
initializeAArch64ExpandPseudoPass(*PR);
initializeAArch64LoadStoreOptPass(*PR);
- initializeAArch64VectorByElementOptPass(*PR);
+ initializeAArch64SIMDInstrOptPass(*PR);
initializeAArch64PromoteConstantPass(*PR);
initializeAArch64RedundantCopyEliminationPass(*PR);
initializeAArch64StorePairSuppressPass(*PR);
@@ -206,20 +206,42 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
return *RM;
}
+static CodeModel::Model getEffectiveCodeModel(const Triple &TT,
+ Optional<CodeModel::Model> CM,
+ bool JIT) {
+ if (CM) {
+ if (*CM != CodeModel::Small && *CM != CodeModel::Large) {
+ if (!TT.isOSFuchsia())
+ report_fatal_error(
+ "Only small and large code models are allowed on AArch64");
+ else if (CM != CodeModel::Kernel)
+ report_fatal_error(
+ "Only small, kernel, and large code models are allowed on AArch64");
+ }
+ return *CM;
+ }
+ // The default MCJIT memory managers make no guarantees about where they can
+ // find an executable page; JITed code needs to be able to refer to globals
+ // no matter how far away they are.
+ if (JIT)
+ return CodeModel::Large;
+ return CodeModel::Small;
+}
+
/// Create an AArch64 architecture model.
///
-AArch64TargetMachine::AArch64TargetMachine(
- const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
- const TargetOptions &Options, Optional<Reloc::Model> RM,
- CodeModel::Model CM, CodeGenOpt::Level OL, bool LittleEndian)
- // This nested ternary is horrible, but DL needs to be properly
- // initialized before TLInfo is constructed.
- : LLVMTargetMachine(T, computeDataLayout(TT, Options.MCOptions,
- LittleEndian),
- TT, CPU, FS, Options,
- getEffectiveRelocModel(TT, RM), CM, OL),
- TLOF(createTLOF(getTargetTriple())),
- isLittle(LittleEndian) {
+AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Optional<Reloc::Model> RM,
+ Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT,
+ bool LittleEndian)
+ : LLVMTargetMachine(T,
+ computeDataLayout(TT, Options.MCOptions, LittleEndian),
+ TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM),
+ getEffectiveCodeModel(TT, CM, JIT), OL),
+ TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) {
initAsmInfo();
}
@@ -254,16 +276,16 @@ void AArch64leTargetMachine::anchor() { }
AArch64leTargetMachine::AArch64leTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, Optional<Reloc::Model> RM,
- CodeModel::Model CM, CodeGenOpt::Level OL)
- : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+ Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
+ : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {}
void AArch64beTargetMachine::anchor() { }
AArch64beTargetMachine::AArch64beTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, Optional<Reloc::Model> RM,
- CodeModel::Model CM, CodeGenOpt::Level OL)
- : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+ Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
+ : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {}
namespace {
@@ -308,13 +330,11 @@ public:
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
-#ifdef LLVM_BUILD_GLOBAL_ISEL
bool addIRTranslator() override;
bool addLegalizeMachineIR() override;
bool addRegBankSelect() override;
void addPreGlobalInstructionSelect() override;
bool addGlobalInstructionSelect() override;
-#endif
bool addILPOpts() override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
@@ -345,7 +365,7 @@ void AArch64PassConfig::addIRPasses() {
// determine whether it succeeded. We can exploit existing control-flow in
// ldrex/strex loops to simplify this, but it needs tidying up.
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
- addPass(createCFGSimplificationPass());
+ addPass(createCFGSimplificationPass(1, true, true, false, true));
// Run LoopDataPrefetch
//
@@ -410,7 +430,6 @@ bool AArch64PassConfig::addInstSelector() {
return false;
}
-#ifdef LLVM_BUILD_GLOBAL_ISEL
bool AArch64PassConfig::addIRTranslator() {
addPass(new IRTranslator());
return false;
@@ -436,7 +455,6 @@ bool AArch64PassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
return false;
}
-#endif
bool AArch64PassConfig::isGlobalISelEnabled() const {
return TM->getOptLevel() <= EnableGlobalISelAtO;
@@ -455,7 +473,7 @@ bool AArch64PassConfig::addILPOpts() {
addPass(&EarlyIfConverterID);
if (EnableStPairSuppress)
addPass(createAArch64StorePairSuppressPass());
- addPass(createAArch64VectorByElementOptPass());
+ addPass(createAArch64SIMDInstrOptPass());
return true;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index 85de02e859e0..2bbfb2da3db6 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -31,13 +31,14 @@ protected:
public:
AArch64TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool IsLittleEndian);
+ Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT, bool IsLittleEndian);
~AArch64TargetMachine() override;
const AArch64Subtarget *getSubtargetImpl(const Function &F) const override;
- // The no argument getSubtargetImpl, while it exists on some, targets is
- // deprecated and should not be used.
+ // DO NOT IMPLEMENT: There is no such thing as a valid default subtarget,
+ // subtargets are per-function entities based on the target-specific
+ // attributes of each function.
const AArch64Subtarget *getSubtargetImpl() const = delete;
// Pass Pipeline Configuration
@@ -61,8 +62,9 @@ class AArch64leTargetMachine : public AArch64TargetMachine {
public:
AArch64leTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
+ Optional<Reloc::Model> RM,
+ Optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ bool JIT);
};
// AArch64 big endian target machine.
@@ -72,8 +74,9 @@ class AArch64beTargetMachine : public AArch64TargetMachine {
public:
AArch64beTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
+ Optional<Reloc::Model> RM,
+ Optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ bool JIT);
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
index 9077eb7902fd..f081d7caba67 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -10,8 +10,8 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETOBJECTFILE_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETOBJECTFILE_H
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
class AArch64TargetMachine;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a76f080530bb..1820ad959fcb 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -12,9 +12,10 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/CodeGen/CostTable.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/CostTable.h"
-#include "llvm/Target/TargetLowering.h"
#include <algorithm>
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 31c037354925..08c693ff38a8 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -1,4 +1,4 @@
-//===-- AArch64TargetTransformInfo.h - AArch64 specific TTI -----*- C++ -*-===//
+//===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,17 +18,31 @@
#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
#include "AArch64.h"
+#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/Target/TargetLowering.h"
-#include <algorithm>
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include <cstdint>
namespace llvm {
+class APInt;
+class Instruction;
+class IntrinsicInst;
+class Loop;
+class SCEV;
+class ScalarEvolution;
+class Type;
+class Value;
+class VectorType;
+
class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
- typedef BasicTTIImplBase<AArch64TTIImpl> BaseT;
- typedef TargetTransformInfo TTI;
+ using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
+ using TTI = TargetTransformInfo;
+
friend BaseT;
const AArch64Subtarget *ST;
@@ -157,4 +171,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp b/contrib/llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
deleted file mode 100644
index f53af2315ec9..000000000000
--- a/contrib/llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
+++ /dev/null
@@ -1,388 +0,0 @@
-//=- AArch64VectorByElementOpt.cpp - AArch64 vector by element inst opt pass =//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass that performs optimization for vector by element
-// SIMD instructions.
-//
-// Certain SIMD instructions with vector element operand are not efficient.
-// Rewrite them into SIMD instructions with vector operands. This rewrite
-// is driven by the latency of the instructions.
-//
-// Example:
-// fmla v0.4s, v1.4s, v2.s[1]
-// is rewritten into
-// dup v3.4s, v2.s[1]
-// fmla v0.4s, v1.4s, v3.4s
-//
-//===----------------------------------------------------------------------===//
-
-#include "AArch64InstrInfo.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCSchedule.h"
-#include "llvm/Pass.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <map>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "aarch64-vectorbyelement-opt"
-
-STATISTIC(NumModifiedInstr,
- "Number of vector by element instructions modified");
-
-#define AARCH64_VECTOR_BY_ELEMENT_OPT_NAME \
- "AArch64 vector by element instruction optimization pass"
-
-namespace {
-
-struct AArch64VectorByElementOpt : public MachineFunctionPass {
- static char ID;
-
- const TargetInstrInfo *TII;
- MachineRegisterInfo *MRI;
- TargetSchedModel SchedModel;
-
- AArch64VectorByElementOpt() : MachineFunctionPass(ID) {
- initializeAArch64VectorByElementOptPass(*PassRegistry::getPassRegistry());
- }
-
- /// Based only on latency of instructions, determine if it is cost efficient
- /// to replace the instruction InstDesc by the two instructions InstDescRep1
- /// and InstDescRep2.
- /// Return true if replacement is recommended.
- bool
- shouldReplaceInstruction(MachineFunction *MF, const MCInstrDesc *InstDesc,
- const MCInstrDesc *InstDescRep1,
- const MCInstrDesc *InstDescRep2,
- std::map<unsigned, bool> &VecInstElemTable) const;
-
- /// Determine if we need to exit the vector by element instruction
- /// optimization pass early. This makes sure that Targets with no need
- /// for this optimization do not spent any compile time on this pass.
- /// This check is done by comparing the latency of an indexed FMLA
- /// instruction to the latency of the DUP + the latency of a vector
- /// FMLA instruction. We do not check on other related instructions such
- /// as FMLS as we assume that if the situation shows up for one
- /// instruction, then it is likely to show up for the related ones.
- /// Return true if early exit of the pass is recommended.
- bool earlyExitVectElement(MachineFunction *MF);
-
- /// Check whether an equivalent DUP instruction has already been
- /// created or not.
- /// Return true when the dup instruction already exists. In this case,
- /// DestReg will point to the destination of the already created DUP.
- bool reuseDUP(MachineInstr &MI, unsigned DupOpcode, unsigned SrcReg,
- unsigned LaneNumber, unsigned *DestReg) const;
-
- /// Certain SIMD instructions with vector element operand are not efficient.
- /// Rewrite them into SIMD instructions with vector operands. This rewrite
- /// is driven by the latency of the instructions.
- /// Return true if the SIMD instruction is modified.
- bool optimizeVectElement(MachineInstr &MI,
- std::map<unsigned, bool> *VecInstElemTable) const;
-
- bool runOnMachineFunction(MachineFunction &Fn) override;
-
- StringRef getPassName() const override {
- return AARCH64_VECTOR_BY_ELEMENT_OPT_NAME;
- }
-};
-
-char AArch64VectorByElementOpt::ID = 0;
-
-} // end anonymous namespace
-
-INITIALIZE_PASS(AArch64VectorByElementOpt, "aarch64-vectorbyelement-opt",
- AARCH64_VECTOR_BY_ELEMENT_OPT_NAME, false, false)
-
-/// Based only on latency of instructions, determine if it is cost efficient
-/// to replace the instruction InstDesc by the two instructions InstDescRep1
-/// and InstDescRep2. Note that it is assumed in this fuction that an
-/// instruction of type InstDesc is always replaced by the same two
-/// instructions as results are cached here.
-/// Return true if replacement is recommended.
-bool AArch64VectorByElementOpt::shouldReplaceInstruction(
- MachineFunction *MF, const MCInstrDesc *InstDesc,
- const MCInstrDesc *InstDescRep1, const MCInstrDesc *InstDescRep2,
- std::map<unsigned, bool> &VecInstElemTable) const {
- // Check if replacment decision is alredy available in the cached table.
- // if so, return it.
- if (!VecInstElemTable.empty() &&
- VecInstElemTable.find(InstDesc->getOpcode()) != VecInstElemTable.end())
- return VecInstElemTable[InstDesc->getOpcode()];
-
- unsigned SCIdx = InstDesc->getSchedClass();
- unsigned SCIdxRep1 = InstDescRep1->getSchedClass();
- unsigned SCIdxRep2 = InstDescRep2->getSchedClass();
- const MCSchedClassDesc *SCDesc =
- SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
- const MCSchedClassDesc *SCDescRep1 =
- SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdxRep1);
- const MCSchedClassDesc *SCDescRep2 =
- SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdxRep2);
-
- // If a subtarget does not define resources for any of the instructions
- // of interest, then return false for no replacement.
- if (!SCDesc->isValid() || SCDesc->isVariant() || !SCDescRep1->isValid() ||
- SCDescRep1->isVariant() || !SCDescRep2->isValid() ||
- SCDescRep2->isVariant()) {
- VecInstElemTable[InstDesc->getOpcode()] = false;
- return false;
- }
-
- if (SchedModel.computeInstrLatency(InstDesc->getOpcode()) >
- SchedModel.computeInstrLatency(InstDescRep1->getOpcode()) +
- SchedModel.computeInstrLatency(InstDescRep2->getOpcode())) {
- VecInstElemTable[InstDesc->getOpcode()] = true;
- return true;
- }
- VecInstElemTable[InstDesc->getOpcode()] = false;
- return false;
-}
-
-/// Determine if we need to exit the vector by element instruction
-/// optimization pass early. This makes sure that Targets with no need
-/// for this optimization do not spent any compile time on this pass.
-/// This check is done by comparing the latency of an indexed FMLA
-/// instruction to the latency of the DUP + the latency of a vector
-/// FMLA instruction. We do not check on other related instructions such
-/// as FMLS as we assume that if the situation shows up for one
-/// instruction, then it is likely to show up for the related ones.
-/// Return true if early exit of the pass is recommended.
-bool AArch64VectorByElementOpt::earlyExitVectElement(MachineFunction *MF) {
- std::map<unsigned, bool> VecInstElemTable;
- const MCInstrDesc *IndexMulMCID = &TII->get(AArch64::FMLAv4i32_indexed);
- const MCInstrDesc *DupMCID = &TII->get(AArch64::DUPv4i32lane);
- const MCInstrDesc *MulMCID = &TII->get(AArch64::FMULv4f32);
-
- if (!shouldReplaceInstruction(MF, IndexMulMCID, DupMCID, MulMCID,
- VecInstElemTable))
- return true;
- return false;
-}
-
-/// Check whether an equivalent DUP instruction has already been
-/// created or not.
-/// Return true when the dup instruction already exists. In this case,
-/// DestReg will point to the destination of the already created DUP.
-bool AArch64VectorByElementOpt::reuseDUP(MachineInstr &MI, unsigned DupOpcode,
- unsigned SrcReg, unsigned LaneNumber,
- unsigned *DestReg) const {
- for (MachineBasicBlock::iterator MII = MI, MIE = MI.getParent()->begin();
- MII != MIE;) {
- MII--;
- MachineInstr *CurrentMI = &*MII;
-
- if (CurrentMI->getOpcode() == DupOpcode &&
- CurrentMI->getNumOperands() == 3 &&
- CurrentMI->getOperand(1).getReg() == SrcReg &&
- CurrentMI->getOperand(2).getImm() == LaneNumber) {
- *DestReg = CurrentMI->getOperand(0).getReg();
- return true;
- }
- }
-
- return false;
-}
-
-/// Certain SIMD instructions with vector element operand are not efficient.
-/// Rewrite them into SIMD instructions with vector operands. This rewrite
-/// is driven by the latency of the instructions.
-/// The instruction of concerns are for the time being fmla, fmls, fmul,
-/// and fmulx and hence they are hardcoded.
-///
-/// Example:
-/// fmla v0.4s, v1.4s, v2.s[1]
-/// is rewritten into
-/// dup v3.4s, v2.s[1] // dup not necessary if redundant
-/// fmla v0.4s, v1.4s, v3.4s
-/// Return true if the SIMD instruction is modified.
-bool AArch64VectorByElementOpt::optimizeVectElement(
- MachineInstr &MI, std::map<unsigned, bool> *VecInstElemTable) const {
- const MCInstrDesc *MulMCID, *DupMCID;
- const TargetRegisterClass *RC = &AArch64::FPR128RegClass;
-
- switch (MI.getOpcode()) {
- default:
- return false;
-
- // 4X32 instructions
- case AArch64::FMLAv4i32_indexed:
- DupMCID = &TII->get(AArch64::DUPv4i32lane);
- MulMCID = &TII->get(AArch64::FMLAv4f32);
- break;
- case AArch64::FMLSv4i32_indexed:
- DupMCID = &TII->get(AArch64::DUPv4i32lane);
- MulMCID = &TII->get(AArch64::FMLSv4f32);
- break;
- case AArch64::FMULXv4i32_indexed:
- DupMCID = &TII->get(AArch64::DUPv4i32lane);
- MulMCID = &TII->get(AArch64::FMULXv4f32);
- break;
- case AArch64::FMULv4i32_indexed:
- DupMCID = &TII->get(AArch64::DUPv4i32lane);
- MulMCID = &TII->get(AArch64::FMULv4f32);
- break;
-
- // 2X64 instructions
- case AArch64::FMLAv2i64_indexed:
- DupMCID = &TII->get(AArch64::DUPv2i64lane);
- MulMCID = &TII->get(AArch64::FMLAv2f64);
- break;
- case AArch64::FMLSv2i64_indexed:
- DupMCID = &TII->get(AArch64::DUPv2i64lane);
- MulMCID = &TII->get(AArch64::FMLSv2f64);
- break;
- case AArch64::FMULXv2i64_indexed:
- DupMCID = &TII->get(AArch64::DUPv2i64lane);
- MulMCID = &TII->get(AArch64::FMULXv2f64);
- break;
- case AArch64::FMULv2i64_indexed:
- DupMCID = &TII->get(AArch64::DUPv2i64lane);
- MulMCID = &TII->get(AArch64::FMULv2f64);
- break;
-
- // 2X32 instructions
- case AArch64::FMLAv2i32_indexed:
- RC = &AArch64::FPR64RegClass;
- DupMCID = &TII->get(AArch64::DUPv2i32lane);
- MulMCID = &TII->get(AArch64::FMLAv2f32);
- break;
- case AArch64::FMLSv2i32_indexed:
- RC = &AArch64::FPR64RegClass;
- DupMCID = &TII->get(AArch64::DUPv2i32lane);
- MulMCID = &TII->get(AArch64::FMLSv2f32);
- break;
- case AArch64::FMULXv2i32_indexed:
- RC = &AArch64::FPR64RegClass;
- DupMCID = &TII->get(AArch64::DUPv2i32lane);
- MulMCID = &TII->get(AArch64::FMULXv2f32);
- break;
- case AArch64::FMULv2i32_indexed:
- RC = &AArch64::FPR64RegClass;
- DupMCID = &TII->get(AArch64::DUPv2i32lane);
- MulMCID = &TII->get(AArch64::FMULv2f32);
- break;
- }
-
- if (!shouldReplaceInstruction(MI.getParent()->getParent(),
- &TII->get(MI.getOpcode()), DupMCID, MulMCID,
- *VecInstElemTable))
- return false;
-
- const DebugLoc &DL = MI.getDebugLoc();
- MachineBasicBlock &MBB = *MI.getParent();
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-
- // get the operands of the current SIMD arithmetic instruction.
- unsigned MulDest = MI.getOperand(0).getReg();
- unsigned SrcReg0 = MI.getOperand(1).getReg();
- unsigned Src0IsKill = getKillRegState(MI.getOperand(1).isKill());
- unsigned SrcReg1 = MI.getOperand(2).getReg();
- unsigned Src1IsKill = getKillRegState(MI.getOperand(2).isKill());
- unsigned DupDest;
-
- // Instructions of interest have either 4 or 5 operands.
- if (MI.getNumOperands() == 5) {
- unsigned SrcReg2 = MI.getOperand(3).getReg();
- unsigned Src2IsKill = getKillRegState(MI.getOperand(3).isKill());
- unsigned LaneNumber = MI.getOperand(4).getImm();
-
- // Create a new DUP instruction. Note that if an equivalent DUP instruction
- // has already been created before, then use that one instread of creating
- // a new one.
- if (!reuseDUP(MI, DupMCID->getOpcode(), SrcReg2, LaneNumber, &DupDest)) {
- DupDest = MRI.createVirtualRegister(RC);
- BuildMI(MBB, MI, DL, *DupMCID, DupDest)
- .addReg(SrcReg2, Src2IsKill)
- .addImm(LaneNumber);
- }
- BuildMI(MBB, MI, DL, *MulMCID, MulDest)
- .addReg(SrcReg0, Src0IsKill)
- .addReg(SrcReg1, Src1IsKill)
- .addReg(DupDest, Src2IsKill);
- } else if (MI.getNumOperands() == 4) {
- unsigned LaneNumber = MI.getOperand(3).getImm();
- if (!reuseDUP(MI, DupMCID->getOpcode(), SrcReg1, LaneNumber, &DupDest)) {
- DupDest = MRI.createVirtualRegister(RC);
- BuildMI(MBB, MI, DL, *DupMCID, DupDest)
- .addReg(SrcReg1, Src1IsKill)
- .addImm(LaneNumber);
- }
- BuildMI(MBB, MI, DL, *MulMCID, MulDest)
- .addReg(SrcReg0, Src0IsKill)
- .addReg(DupDest, Src1IsKill);
- } else {
- return false;
- }
-
- ++NumModifiedInstr;
- return true;
-}
-
-bool AArch64VectorByElementOpt::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
- return false;
-
- TII = MF.getSubtarget().getInstrInfo();
- MRI = &MF.getRegInfo();
- const TargetSubtargetInfo &ST = MF.getSubtarget();
- const AArch64InstrInfo *AAII =
- static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
- if (!AAII)
- return false;
- SchedModel.init(ST.getSchedModel(), &ST, AAII);
- if (!SchedModel.hasInstrSchedModel())
- return false;
-
- // A simple check to exit this pass early for targets that do not need it.
- if (earlyExitVectElement(&MF))
- return false;
-
- bool Changed = false;
- std::map<unsigned, bool> VecInstElemTable;
- SmallVector<MachineInstr *, 8> RemoveMIs;
-
- for (MachineBasicBlock &MBB : MF) {
- for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
- MII != MIE;) {
- MachineInstr &MI = *MII;
- if (optimizeVectElement(MI, &VecInstElemTable)) {
- // Add MI to the list of instructions to be removed given that it has
- // been replaced.
- RemoveMIs.push_back(&MI);
- Changed = true;
- }
- ++MII;
- }
- }
-
- for (MachineInstr *MI : RemoveMIs)
- MI->eraseFromParent();
-
- return Changed;
-}
-
-/// createAArch64VectorByElementOptPass - returns an instance of the
-/// vector by element optimization pass.
-FunctionPass *llvm::createAArch64VectorByElementOptPass() {
- return new AArch64VectorByElementOpt();
-}
diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index a79d51820545..aeffbd70fc81 100644
--- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -59,12 +59,19 @@ using namespace llvm;
namespace {
+enum class RegKind {
+ Scalar,
+ NeonVector,
+ SVEDataVector,
+ SVEPredicateVector
+};
+
class AArch64AsmParser : public MCTargetAsmParser {
private:
StringRef Mnemonic; ///< Instruction mnemonic.
// Map of register aliases registers via the .req directive.
- StringMap<std::pair<bool, unsigned>> RegisterReqs;
+ StringMap<std::pair<RegKind, unsigned>> RegisterReqs;
AArch64TargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
@@ -77,7 +84,7 @@ private:
void createSysAlias(uint16_t Encoding, OperandVector &Operands, SMLoc S);
AArch64CC::CondCode parseCondCodeString(StringRef Cond);
bool parseCondCode(OperandVector &Operands, bool invertCondCode);
- unsigned matchRegisterNameAlias(StringRef Name, bool isVector);
+ unsigned matchRegisterNameAlias(StringRef Name, RegKind Kind);
int tryParseRegister();
int tryMatchVectorRegister(StringRef &Kind, bool expected);
bool parseRegister(OperandVector &Operands);
@@ -114,6 +121,8 @@ private:
/// }
+ OperandMatchResultTy tryParseSVERegister(int &Reg, StringRef &Kind,
+ RegKind MatchKind);
OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands);
OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands);
OperandMatchResultTy tryParseMRSSystemRegister(OperandVector &Operands);
@@ -126,8 +135,11 @@ private:
OperandMatchResultTy tryParseFPImm(OperandVector &Operands);
OperandMatchResultTy tryParseAddSubImm(OperandVector &Operands);
OperandMatchResultTy tryParseGPR64sp0Operand(OperandVector &Operands);
- bool tryParseVectorRegister(OperandVector &Operands);
+ bool tryParseNeonVectorRegister(OperandVector &Operands);
OperandMatchResultTy tryParseGPRSeqPair(OperandVector &Operands);
+ template <bool ParseSuffix>
+ OperandMatchResultTy tryParseSVEDataVector(OperandVector &Operands);
+ OperandMatchResultTy tryParseSVEPredicateVector(OperandVector &Operands);
public:
enum AArch64MatchResultTy {
@@ -139,7 +151,7 @@ public:
AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(Options, STI) {
+ : MCTargetAsmParser(Options, STI, MII) {
IsILP32 = Options.getABIName() == "ilp32";
MCAsmParserExtension::Initialize(Parser);
MCStreamer &S = getParser().getStreamer();
@@ -194,7 +206,9 @@ private:
struct RegOp {
unsigned RegNum;
- bool isVector;
+ RegKind Kind;
+
+ int ElementWidth;
};
struct VectorListOp {
@@ -465,6 +479,15 @@ public:
int64_t Val = MCE->getValue();
return (Val >= -256 && Val < 256);
}
+ bool isSImm10s8() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= -4096 && Val < 4089 && (Val & 7) == 0);
+ }
bool isSImm7s4() const {
if (!isImm())
return false;
@@ -795,37 +818,76 @@ public:
return SysReg.PStateField != -1U;
}
- bool isReg() const override { return Kind == k_Register && !Reg.isVector; }
- bool isVectorReg() const { return Kind == k_Register && Reg.isVector; }
+ bool isReg() const override {
+ return Kind == k_Register && Reg.Kind == RegKind::Scalar;
+ }
+
+ bool isNeonVectorReg() const {
+ return Kind == k_Register && Reg.Kind == RegKind::NeonVector;
+ }
- bool isVectorRegLo() const {
- return Kind == k_Register && Reg.isVector &&
+ bool isNeonVectorRegLo() const {
+ return Kind == k_Register && Reg.Kind == RegKind::NeonVector &&
AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains(
Reg.RegNum);
}
+ template <unsigned Class> bool isSVEVectorReg() const {
+ RegKind RK;
+ switch (Class) {
+ case AArch64::ZPRRegClassID:
+ RK = RegKind::SVEDataVector;
+ break;
+ case AArch64::PPRRegClassID:
+ RK = RegKind::SVEPredicateVector;
+ break;
+ default:
+ llvm_unreachable("Unsupport register class");
+ }
+
+ return (Kind == k_Register && Reg.Kind == RK) &&
+ AArch64MCRegisterClasses[Class].contains(getReg());
+ }
+
+ template <int ElementWidth, unsigned Class>
+ bool isSVEVectorRegOfWidth() const {
+ return isSVEVectorReg<Class>() &&
+ (ElementWidth == -1 || Reg.ElementWidth == ElementWidth);
+ }
+
bool isGPR32as64() const {
- return Kind == k_Register && !Reg.isVector &&
+ return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum);
}
bool isWSeqPair() const {
- return Kind == k_Register && !Reg.isVector &&
+ return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID].contains(
Reg.RegNum);
}
bool isXSeqPair() const {
- return Kind == k_Register && !Reg.isVector &&
+ return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID].contains(
Reg.RegNum);
}
bool isGPR64sp0() const {
- return Kind == k_Register && !Reg.isVector &&
+ return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
AArch64MCRegisterClasses[AArch64::GPR64spRegClassID].contains(Reg.RegNum);
}
+ template<int64_t Angle, int64_t Remainder>
+ bool isComplexRotation() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ uint64_t Value = CE->getValue();
+
+ return (Value % Angle == Remainder && Value <= 270);
+ }
+
/// Is this a vector list with the type implicit (presumably attached to the
/// instruction itself)?
template <unsigned NumRegs> bool isImplicitlyTypedVectorList() const {
@@ -1213,6 +1275,12 @@ public:
Inst.addOperand(MCOperand::createImm(MCE->getValue()));
}
+ void addSImm10s8Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(MCE->getValue() / 8));
+ }
+
void addSImm7s4Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
@@ -1512,6 +1580,18 @@ public:
Inst.addOperand(MCOperand::createImm((~Value >> Shift) & 0xffff));
}
+ void addComplexRotationEvenOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(MCE->getValue() / 90));
+ }
+
+ void addComplexRotationOddOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm((MCE->getValue() - 90) / 180));
+ }
+
void print(raw_ostream &OS) const override;
static std::unique_ptr<AArch64Operand>
@@ -1526,10 +1606,22 @@ public:
}
static std::unique_ptr<AArch64Operand>
- CreateReg(unsigned RegNum, bool isVector, SMLoc S, SMLoc E, MCContext &Ctx) {
+ CreateReg(unsigned RegNum, RegKind Kind, SMLoc S, SMLoc E, MCContext &Ctx) {
auto Op = make_unique<AArch64Operand>(k_Register, Ctx);
Op->Reg.RegNum = RegNum;
- Op->Reg.isVector = isVector;
+ Op->Reg.Kind = Kind;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<AArch64Operand>
+ CreateReg(unsigned RegNum, RegKind Kind, unsigned ElementWidth,
+ SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Register, Ctx);
+ Op->Reg.RegNum = RegNum;
+ Op->Reg.ElementWidth = ElementWidth;
+ Op->Reg.Kind = Kind;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
@@ -1753,7 +1845,7 @@ static unsigned MatchRegisterName(StringRef Name);
/// }
-static unsigned matchVectorRegName(StringRef Name) {
+static unsigned MatchNeonVectorRegName(StringRef Name) {
return StringSwitch<unsigned>(Name.lower())
.Case("v0", AArch64::Q0)
.Case("v1", AArch64::Q1)
@@ -1810,9 +1902,83 @@ static bool isValidVectorKind(StringRef Name) {
.Case(".d", true)
// Needed for fp16 scalar pairwise reductions
.Case(".2h", true)
+ // another special case for the ARMv8.2a dot product operand
+ .Case(".4b", true)
.Default(false);
}
+static unsigned matchSVEDataVectorRegName(StringRef Name) {
+ return StringSwitch<unsigned>(Name.lower())
+ .Case("z0", AArch64::Z0)
+ .Case("z1", AArch64::Z1)
+ .Case("z2", AArch64::Z2)
+ .Case("z3", AArch64::Z3)
+ .Case("z4", AArch64::Z4)
+ .Case("z5", AArch64::Z5)
+ .Case("z6", AArch64::Z6)
+ .Case("z7", AArch64::Z7)
+ .Case("z8", AArch64::Z8)
+ .Case("z9", AArch64::Z9)
+ .Case("z10", AArch64::Z10)
+ .Case("z11", AArch64::Z11)
+ .Case("z12", AArch64::Z12)
+ .Case("z13", AArch64::Z13)
+ .Case("z14", AArch64::Z14)
+ .Case("z15", AArch64::Z15)
+ .Case("z16", AArch64::Z16)
+ .Case("z17", AArch64::Z17)
+ .Case("z18", AArch64::Z18)
+ .Case("z19", AArch64::Z19)
+ .Case("z20", AArch64::Z20)
+ .Case("z21", AArch64::Z21)
+ .Case("z22", AArch64::Z22)
+ .Case("z23", AArch64::Z23)
+ .Case("z24", AArch64::Z24)
+ .Case("z25", AArch64::Z25)
+ .Case("z26", AArch64::Z26)
+ .Case("z27", AArch64::Z27)
+ .Case("z28", AArch64::Z28)
+ .Case("z29", AArch64::Z29)
+ .Case("z30", AArch64::Z30)
+ .Case("z31", AArch64::Z31)
+ .Default(0);
+}
+
+static unsigned matchSVEPredicateVectorRegName(StringRef Name) {
+ return StringSwitch<unsigned>(Name.lower())
+ .Case("p0", AArch64::P0)
+ .Case("p1", AArch64::P1)
+ .Case("p2", AArch64::P2)
+ .Case("p3", AArch64::P3)
+ .Case("p4", AArch64::P4)
+ .Case("p5", AArch64::P5)
+ .Case("p6", AArch64::P6)
+ .Case("p7", AArch64::P7)
+ .Case("p8", AArch64::P8)
+ .Case("p9", AArch64::P9)
+ .Case("p10", AArch64::P10)
+ .Case("p11", AArch64::P11)
+ .Case("p12", AArch64::P12)
+ .Case("p13", AArch64::P13)
+ .Case("p14", AArch64::P14)
+ .Case("p15", AArch64::P15)
+ .Default(0);
+}
+
+static bool isValidSVEKind(StringRef Name) {
+ return StringSwitch<bool>(Name.lower())
+ .Case(".b", true)
+ .Case(".h", true)
+ .Case(".s", true)
+ .Case(".d", true)
+ .Case(".q", true)
+ .Default(false);
+}
+
+static bool isSVERegister(StringRef Name) {
+ return Name[0] == 'z' || Name[0] == 'p';
+}
+
static void parseValidVectorKind(StringRef Name, unsigned &NumElements,
char &ElementKind) {
assert(isValidVectorKind(Name));
@@ -1841,19 +2007,33 @@ bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
// Matches a register name or register alias previously defined by '.req'
unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name,
- bool isVector) {
- unsigned RegNum = isVector ? matchVectorRegName(Name)
- : MatchRegisterName(Name);
+ RegKind Kind) {
+ unsigned RegNum;
+ switch (Kind) {
+ case RegKind::Scalar:
+ RegNum = MatchRegisterName(Name);
+ break;
+ case RegKind::NeonVector:
+ RegNum = MatchNeonVectorRegName(Name);
+ break;
+ case RegKind::SVEDataVector:
+ RegNum = matchSVEDataVectorRegName(Name);
+ break;
+ case RegKind::SVEPredicateVector:
+ RegNum = matchSVEPredicateVectorRegName(Name);
+ break;
+ }
- if (RegNum == 0) {
+ if (!RegNum) {
// Check for aliases registered via .req. Canonicalize to lower case.
// That's more consistent since register names are case insensitive, and
// it's how the original entry was passed in from MC/MCParser/AsmParser.
auto Entry = RegisterReqs.find(Name.lower());
if (Entry == RegisterReqs.end())
return 0;
+
// set RegNum if the match is the right kind of register
- if (isVector == Entry->getValue().first)
+ if (Kind == Entry->getValue().first)
RegNum = Entry->getValue().second;
}
return RegNum;
@@ -1869,7 +2049,10 @@ int AArch64AsmParser::tryParseRegister() {
return -1;
std::string lowerCase = Tok.getString().lower();
- unsigned RegNum = matchRegisterNameAlias(lowerCase, false);
+ if (isSVERegister(lowerCase))
+ return -1;
+
+ unsigned RegNum = matchRegisterNameAlias(lowerCase, RegKind::Scalar);
// Also handle a few aliases of registers.
if (RegNum == 0)
RegNum = StringSwitch<unsigned>(lowerCase)
@@ -1900,7 +2083,7 @@ int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) {
// a '.'.
size_t Start = 0, Next = Name.find('.');
StringRef Head = Name.slice(Start, Next);
- unsigned RegNum = matchRegisterNameAlias(Head, true);
+ unsigned RegNum = matchRegisterNameAlias(Head, RegKind::NeonVector);
if (RegNum) {
if (Next != StringRef::npos) {
@@ -2519,8 +2702,8 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
return MatchOperand_Success;
}
-/// tryParseVectorRegister - Parse a vector register operand.
-bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
+/// tryParseNeonVectorRegister - Parse a vector register operand.
+bool AArch64AsmParser::tryParseNeonVectorRegister(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::Identifier))
return true;
@@ -2532,7 +2715,9 @@ bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
if (Reg == -1)
return true;
Operands.push_back(
- AArch64Operand::CreateReg(Reg, true, S, getLoc(), getContext()));
+ AArch64Operand::CreateReg(Reg, RegKind::NeonVector, S, getLoc(),
+ getContext()));
+
// If there was an explicit qualifier, that goes on as a literal text
// operand.
if (!Kind.empty())
@@ -2563,19 +2748,85 @@ bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
return false;
}
+// tryParseSVEDataVectorRegister - Try to parse a SVE vector register name with
+// optional kind specifier. If it is a register specifier, eat the token
+// and return it.
+OperandMatchResultTy
+AArch64AsmParser::tryParseSVERegister(int &Reg, StringRef &Kind,
+ RegKind MatchKind) {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &Tok = Parser.getTok();
+
+ if (Tok.isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+
+ StringRef Name = Tok.getString();
+ // If there is a kind specifier, it's separated from the register name by
+ // a '.'.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Head = Name.slice(Start, Next);
+ unsigned RegNum = matchRegisterNameAlias(Head, MatchKind);
+
+ if (RegNum) {
+ if (Next != StringRef::npos) {
+ Kind = Name.slice(Next, StringRef::npos);
+ if (!isValidSVEKind(Kind)) {
+ TokError("invalid sve vector kind qualifier");
+ return MatchOperand_ParseFail;
+ }
+ }
+ Parser.Lex(); // Eat the register token.
+
+ Reg = RegNum;
+ return MatchOperand_Success;
+ }
+
+ return MatchOperand_NoMatch;
+}
+
+/// tryParseSVEPredicateVector - Parse a SVE predicate register operand.
+OperandMatchResultTy
+AArch64AsmParser::tryParseSVEPredicateVector(OperandVector &Operands) {
+ // Check for a SVE predicate register specifier first.
+ const SMLoc S = getLoc();
+ StringRef Kind;
+ int RegNum = -1;
+ auto Res = tryParseSVERegister(RegNum, Kind, RegKind::SVEPredicateVector);
+ if (Res != MatchOperand_Success)
+ return Res;
+
+ unsigned ElementWidth = StringSwitch<unsigned>(Kind.lower())
+ .Case("", -1)
+ .Case(".b", 8)
+ .Case(".h", 16)
+ .Case(".s", 32)
+ .Case(".d", 64)
+ .Case(".q", 128)
+ .Default(0);
+
+ if (!ElementWidth)
+ return MatchOperand_NoMatch;
+
+ Operands.push_back(
+ AArch64Operand::CreateReg(RegNum, RegKind::SVEPredicateVector,
+ ElementWidth, S, getLoc(), getContext()));
+
+ return MatchOperand_Success;
+}
+
/// parseRegister - Parse a non-vector register operand.
bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
SMLoc S = getLoc();
- // Try for a vector register.
- if (!tryParseVectorRegister(Operands))
+ // Try for a vector (neon) register.
+ if (!tryParseNeonVectorRegister(Operands))
return false;
// Try for a scalar register.
int64_t Reg = tryParseRegister();
if (Reg == -1)
return true;
- Operands.push_back(
- AArch64Operand::CreateReg(Reg, false, S, getLoc(), getContext()));
+ Operands.push_back(AArch64Operand::CreateReg(Reg, RegKind::Scalar, S,
+ getLoc(), getContext()));
return false;
}
@@ -2743,7 +2994,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
if (!Tok.is(AsmToken::Identifier))
return MatchOperand_NoMatch;
- unsigned RegNum = matchRegisterNameAlias(Tok.getString().lower(), false);
+ unsigned RegNum = matchRegisterNameAlias(Tok.getString().lower(), RegKind::Scalar);
MCContext &Ctx = getContext();
const MCRegisterInfo *RI = Ctx.getRegisterInfo();
@@ -2755,7 +3006,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
if (!parseOptionalToken(AsmToken::Comma)) {
Operands.push_back(
- AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx));
+ AArch64Operand::CreateReg(RegNum, RegKind::Scalar, S, getLoc(), Ctx));
return MatchOperand_Success;
}
@@ -2774,7 +3025,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
}
Operands.push_back(
- AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx));
+ AArch64Operand::CreateReg(RegNum, RegKind::Scalar, S, getLoc(), Ctx));
return MatchOperand_Success;
}
@@ -2783,9 +3034,12 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
bool invertCondCode) {
MCAsmParser &Parser = getParser();
+
+ OperandMatchResultTy ResTy =
+ MatchOperandParserImpl(Operands, Mnemonic, /*ParseForAllFeatures=*/ true);
+
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
- OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
if (ResTy == MatchOperand_Success)
return false;
// If there wasn't a custom match, try the generic matcher below. Otherwise,
@@ -3257,7 +3511,8 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst,
}
}
-std::string AArch64MnemonicSpellCheck(StringRef S, uint64_t FBS);
+static std::string AArch64MnemonicSpellCheck(StringRef S, uint64_t FBS,
+ unsigned VariantID = 0);
bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
OperandVector &Operands) {
@@ -3297,6 +3552,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
"expected compatible register or floating-point constant");
case Match_InvalidMemoryIndexedSImm9:
return Error(Loc, "index must be an integer in range [-256, 255].");
+ case Match_InvalidMemoryIndexedSImm10:
+ return Error(Loc, "index must be a multiple of 8 in range [-4096, 4088].");
case Match_InvalidMemoryIndexed4SImm7:
return Error(Loc, "index must be a multiple of 4 in range [-256, 252].");
case Match_InvalidMemoryIndexed8SImm7:
@@ -3383,12 +3640,22 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
return Error(Loc, "expected readable system register");
case Match_MSR:
return Error(Loc, "expected writable system register or pstate");
+ case Match_InvalidComplexRotationEven:
+ return Error(Loc, "complex rotation must be 0, 90, 180 or 270.");
+ case Match_InvalidComplexRotationOdd:
+ return Error(Loc, "complex rotation must be 90 or 270.");
case Match_MnemonicFail: {
std::string Suggestion = AArch64MnemonicSpellCheck(
((AArch64Operand &)*Operands[0]).getToken(),
ComputeAvailableFeatures(STI->getFeatureBits()));
return Error(Loc, "unrecognized instruction mnemonic" + Suggestion);
}
+ case Match_InvalidSVEPredicateAnyReg:
+ case Match_InvalidSVEPredicateBReg:
+ case Match_InvalidSVEPredicateHReg:
+ case Match_InvalidSVEPredicateSReg:
+ case Match_InvalidSVEPredicateDReg:
+ return Error(Loc, "invalid predicate register.");
default:
llvm_unreachable("unexpected error code!");
}
@@ -3482,8 +3749,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
Operands[0] = AArch64Operand::CreateToken(
"bfm", false, Op.getStartLoc(), getContext());
Operands[2] = AArch64Operand::CreateReg(
- RegWidth == 32 ? AArch64::WZR : AArch64::XZR, false, SMLoc(),
- SMLoc(), getContext());
+ RegWidth == 32 ? AArch64::WZR : AArch64::XZR, RegKind::Scalar,
+ SMLoc(), SMLoc(), getContext());
Operands[3] = AArch64Operand::CreateImm(
ImmRExpr, LSBOp.getStartLoc(), LSBOp.getEndLoc(), getContext());
Operands.emplace_back(
@@ -3610,6 +3877,31 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
}
}
+
+ // The Cyclone CPU and early successors didn't execute the zero-cycle zeroing
+ // instruction for FP registers correctly in some rare circumstances. Convert
+ // it to a safe instruction and warn (because silently changing someone's
+ // assembly is rude).
+ if (getSTI().getFeatureBits()[AArch64::FeatureZCZeroingFPWorkaround] &&
+ NumOperands == 4 && Tok == "movi") {
+ AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]);
+ AArch64Operand &Op2 = static_cast<AArch64Operand &>(*Operands[2]);
+ AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
+ if ((Op1.isToken() && Op2.isNeonVectorReg() && Op3.isImm()) ||
+ (Op1.isNeonVectorReg() && Op2.isToken() && Op3.isImm())) {
+ StringRef Suffix = Op1.isToken() ? Op1.getToken() : Op2.getToken();
+ if (Suffix.lower() == ".2d" &&
+ cast<MCConstantExpr>(Op3.getImm())->getValue() == 0) {
+ Warning(IDLoc, "instruction movi.2d with immediate #0 may not function"
+ " correctly on this CPU, converting to equivalent movi.16b");
+ // Switch the suffix to .16b.
+ unsigned Idx = Op1.isToken() ? 1 : 2;
+ Operands[Idx] = AArch64Operand::CreateToken(".16b", false, IDLoc,
+ getContext());
+ }
+ }
+ }
+
// FIXME: Horrible hack for sxtw and uxtw with Wn src and Xd dst operands.
// InstAlias can't quite handle this since the reg classes aren't
// subclasses.
@@ -3619,8 +3911,9 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
if (Op.isReg()) {
unsigned Reg = getXRegFromWReg(Op.getReg());
- Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
- Op.getEndLoc(), getContext());
+ Operands[2] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
+ Op.getStartLoc(), Op.getEndLoc(),
+ getContext());
}
}
// FIXME: Likewise for sxt[bh] with a Xd dst operand
@@ -3634,7 +3927,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
if (Op.isReg()) {
unsigned Reg = getXRegFromWReg(Op.getReg());
- Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
+ Operands[2] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
+ Op.getStartLoc(),
Op.getEndLoc(), getContext());
}
}
@@ -3650,7 +3944,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
if (Op.isReg()) {
unsigned Reg = getWRegFromXReg(Op.getReg());
- Operands[1] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
+ Operands[1] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
+ Op.getStartLoc(),
Op.getEndLoc(), getContext());
}
}
@@ -3764,6 +4059,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidMemoryIndexed8SImm7:
case Match_InvalidMemoryIndexed16SImm7:
case Match_InvalidMemoryIndexedSImm9:
+ case Match_InvalidMemoryIndexedSImm10:
case Match_InvalidImm0_1:
case Match_InvalidImm0_7:
case Match_InvalidImm0_15:
@@ -3782,6 +4078,13 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidIndexS:
case Match_InvalidIndexD:
case Match_InvalidLabel:
+ case Match_InvalidComplexRotationEven:
+ case Match_InvalidComplexRotationOdd:
+ case Match_InvalidSVEPredicateAnyReg:
+ case Match_InvalidSVEPredicateBReg:
+ case Match_InvalidSVEPredicateHReg:
+ case Match_InvalidSVEPredicateSReg:
+ case Match_InvalidSVEPredicateDReg:
case Match_MSR:
case Match_MRS: {
if (ErrorInfo >= Operands.size())
@@ -3862,8 +4165,8 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
std::tie(Arch, ExtensionString) =
getParser().parseStringToEndOfStatement().trim().split('+');
- unsigned ID = AArch64::parseArch(Arch);
- if (ID == static_cast<unsigned>(AArch64::ArchKind::AK_INVALID))
+ AArch64::ArchKind ID = AArch64::parseArch(Arch);
+ if (ID == AArch64::ArchKind::INVALID)
return Error(ArchLoc, "unknown arch name");
if (parseToken(AsmToken::EndOfStatement))
@@ -4107,18 +4410,46 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
MCAsmParser &Parser = getParser();
Parser.Lex(); // Eat the '.req' token.
SMLoc SRegLoc = getLoc();
- unsigned RegNum = tryParseRegister();
- bool IsVector = false;
+ int RegNum = tryParseRegister();
+ RegKind RegisterKind = RegKind::Scalar;
- if (RegNum == static_cast<unsigned>(-1)) {
+ if (RegNum == -1) {
StringRef Kind;
+ RegisterKind = RegKind::NeonVector;
RegNum = tryMatchVectorRegister(Kind, false);
if (!Kind.empty())
return Error(SRegLoc, "vector register without type specifier expected");
- IsVector = true;
}
- if (RegNum == static_cast<unsigned>(-1))
+ if (RegNum == -1) {
+ StringRef Kind;
+ RegisterKind = RegKind::SVEDataVector;
+ OperandMatchResultTy Res =
+ tryParseSVERegister(RegNum, Kind, RegKind::SVEDataVector);
+
+ if (Res == MatchOperand_ParseFail)
+ return true;
+
+ if (Res == MatchOperand_Success && !Kind.empty())
+ return Error(SRegLoc,
+ "sve vector register without type specifier expected");
+ }
+
+ if (RegNum == -1) {
+ StringRef Kind;
+ RegisterKind = RegKind::SVEPredicateVector;
+ OperandMatchResultTy Res =
+ tryParseSVERegister(RegNum, Kind, RegKind::SVEPredicateVector);
+
+ if (Res == MatchOperand_ParseFail)
+ return true;
+
+ if (Res == MatchOperand_Success && !Kind.empty())
+ return Error(SRegLoc,
+ "sve predicate register without type specifier expected");
+ }
+
+ if (RegNum == -1)
return Error(SRegLoc, "register name or alias expected");
// Shouldn't be anything else.
@@ -4126,7 +4457,7 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
"unexpected input in .req directive"))
return true;
- auto pair = std::make_pair(IsVector, RegNum);
+ auto pair = std::make_pair(RegisterKind, (unsigned) RegNum);
if (RegisterReqs.insert(std::make_pair(Name, pair)).first->second != pair)
Warning(L, "ignoring redefinition of register alias '" + Name + "'");
@@ -4206,6 +4537,7 @@ extern "C" void LLVMInitializeAArch64AsmParser() {
#define GET_REGISTER_MATCHER
#define GET_SUBTARGET_FEATURE_NAME
#define GET_MATCHER_IMPLEMENTATION
+#define GET_MNEMONIC_SPELL_CHECKER
#include "AArch64GenAsmMatcher.inc"
// Define this matcher function after the auto-generated include so we
@@ -4337,8 +4669,43 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
&AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID]);
}
- Operands.push_back(AArch64Operand::CreateReg(Pair, false, S, getLoc(),
- getContext()));
+ Operands.push_back(AArch64Operand::CreateReg(Pair, RegKind::Scalar, S,
+ getLoc(), getContext()));
+
+ return MatchOperand_Success;
+}
+
+template <bool ParseSuffix>
+OperandMatchResultTy
+AArch64AsmParser::tryParseSVEDataVector(OperandVector &Operands) {
+ const SMLoc S = getLoc();
+ // Check for a SVE vector register specifier first.
+ int RegNum = -1;
+ StringRef Kind;
+
+ OperandMatchResultTy Res =
+ tryParseSVERegister(RegNum, Kind, RegKind::SVEDataVector);
+
+ if (Res != MatchOperand_Success)
+ return Res;
+
+ if (ParseSuffix && Kind.empty())
+ return MatchOperand_NoMatch;
+
+ unsigned ElementWidth = StringSwitch<unsigned>(Kind.lower())
+ .Case("", -1)
+ .Case(".b", 8)
+ .Case(".h", 16)
+ .Case(".s", 32)
+ .Case(".d", 64)
+ .Case(".q", 128)
+ .Default(0);
+ if (!ElementWidth)
+ return MatchOperand_NoMatch;
+
+ Operands.push_back(
+ AArch64Operand::CreateReg(RegNum, RegKind::SVEDataVector, ElementWidth,
+ S, S, getContext()));
return MatchOperand_Success;
}
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 7870dce5c9c0..ae278caeda69 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1,4 +1,4 @@
-//===- AArch64Disassembler.cpp - Disassembler for AArch64 -------*- C++ -*-===//
+//===- AArch64Disassembler.cpp - Disassembler for AArch64 -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,160 +14,168 @@
#include "AArch64ExternalSymbolizer.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "Utils/AArch64BaseInfo.h"
+#include "llvm-c/Disassembler.h"
+#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <memory>
using namespace llvm;
#define DEBUG_TYPE "aarch64-disassembler"
// Pull DecodeStatus and its enum values into the global namespace.
-typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;
+using DecodeStatus = MCDisassembler::DecodeStatus;
// Forward declare these because the autogenerated code will reference them.
// Definitions are further down.
-static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
+static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeFPR128_loRegisterClass(llvm::MCInst &Inst,
+static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeGPR64spRegisterClass(llvm::MCInst &Inst,
+static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeGPR32spRegisterClass(llvm::MCInst &Inst,
+static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeQQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeDDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decode);
+static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decode);
-static DecodeStatus DecodeFixedPointScaleImm32(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeFixedPointScaleImm64(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeFixedPointScaleImm64(MCInst &Inst, unsigned Imm,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodePCRelLabel19(MCInst &Inst, unsigned Imm,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeMemExtend(MCInst &Inst, unsigned Imm,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeMRSSystemRegister(MCInst &Inst, unsigned Imm,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeMSRSystemRegister(MCInst &Inst, unsigned Imm,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
- uint32_t insn,
+static DecodeStatus DecodeThreeAddrSRegInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn,
+static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
- uint32_t insn,
+static DecodeStatus DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Address,
+static DecodeStatus DecodeSignedLdStInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
- uint32_t insn,
+static DecodeStatus DecodeExclusiveLdStInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
+static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Address,
+static DecodeStatus DecodeAddSubERegInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Address,
+static DecodeStatus DecodeLogicalImmInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
+static DecodeStatus DecodeModImmInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Address,
+static DecodeStatus DecodeModImmTiedInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
+static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
+static DecodeStatus DecodeBaseAddSubImm(MCInst &Inst, uint32_t insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
+static DecodeStatus DecodeUnconditionalBranch(MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
- uint32_t insn,
+static DecodeStatus DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
+static DecodeStatus DecodeTestAndBranch(MCInst &Inst, uint32_t insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeFMOVLaneInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftR64Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftR64ImmNarrow(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const void *Decoder);
-static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftR32Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftR32ImmNarrow(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const void *Decoder);
-static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftR16Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftR16ImmNarrow(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const void *Decoder);
-static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftR8Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftL64Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftL32Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftL16Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftL8Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
static DecodeStatus DecodeWSeqPairsClassRegisterClass(MCInst &Inst,
unsigned RegNo,
@@ -177,6 +185,9 @@ static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Addr,
const void *Decoder);
+template<int Bits>
+static DecodeStatus DecodeSImm(llvm::MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder);
static bool Check(DecodeStatus &Out, DecodeStatus In) {
switch (In) {
@@ -196,9 +207,9 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
#include "AArch64GenDisassemblerTables.inc"
#include "AArch64GenInstrInfo.inc"
-#define Success llvm::MCDisassembler::Success
-#define Fail llvm::MCDisassembler::Fail
-#define SoftFail llvm::MCDisassembler::SoftFail
+#define Success MCDisassembler::Success
+#define Fail MCDisassembler::Fail
+#define SoftFail MCDisassembler::SoftFail
static MCDisassembler *createAArch64Disassembler(const Target &T,
const MCSubtargetInfo &STI,
@@ -232,8 +243,8 @@ createAArch64ExternalSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
LLVMSymbolLookupCallback SymbolLookUp,
void *DisInfo, MCContext *Ctx,
std::unique_ptr<MCRelocationInfo> &&RelInfo) {
- return new llvm::AArch64ExternalSymbolizer(*Ctx, move(RelInfo), GetOpInfo,
- SymbolLookUp, DisInfo);
+ return new AArch64ExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo,
+ SymbolLookUp, DisInfo);
}
extern "C" void LLVMInitializeAArch64Disassembler() {
@@ -431,6 +442,44 @@ static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo,
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
+static const unsigned ZPRDecoderTable[] = {
+ AArch64::Z0, AArch64::Z1, AArch64::Z2, AArch64::Z3,
+ AArch64::Z4, AArch64::Z5, AArch64::Z6, AArch64::Z7,
+ AArch64::Z8, AArch64::Z9, AArch64::Z10, AArch64::Z11,
+ AArch64::Z12, AArch64::Z13, AArch64::Z14, AArch64::Z15,
+ AArch64::Z16, AArch64::Z17, AArch64::Z18, AArch64::Z19,
+ AArch64::Z20, AArch64::Z21, AArch64::Z22, AArch64::Z23,
+ AArch64::Z24, AArch64::Z25, AArch64::Z26, AArch64::Z27,
+ AArch64::Z28, AArch64::Z29, AArch64::Z30, AArch64::Z31
+};
+
+static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void* Decoder) {
+ if (RegNo > 31)
+ return Fail;
+
+ unsigned Register = ZPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return Success;
+}
+
+static const unsigned PPRDecoderTable[] = {
+ AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3,
+ AArch64::P4, AArch64::P5, AArch64::P6, AArch64::P7,
+ AArch64::P8, AArch64::P9, AArch64::P10, AArch64::P11,
+ AArch64::P12, AArch64::P13, AArch64::P14, AArch64::P15
+};
+
+static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr, const void *Decoder) {
+ if (RegNo > 15)
+ return Fail;
+
+ unsigned Register = PPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return Success;
+}
static const unsigned VectorDecoderTable[] = {
AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
@@ -587,7 +636,7 @@ static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
return Success;
}
-static DecodeStatus DecodeFixedPointScaleImm32(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const void *Decoder) {
// scale{5} is asserted as 1 in tblgen.
@@ -596,14 +645,14 @@ static DecodeStatus DecodeFixedPointScaleImm32(llvm::MCInst &Inst, unsigned Imm,
return Success;
}
-static DecodeStatus DecodeFixedPointScaleImm64(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeFixedPointScaleImm64(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const void *Decoder) {
Inst.addOperand(MCOperand::createImm(64 - Imm));
return Success;
}
-static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodePCRelLabel19(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder) {
int64_t ImmVal = Imm;
const AArch64Disassembler *Dis =
@@ -619,14 +668,14 @@ static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm,
return Success;
}
-static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeMemExtend(MCInst &Inst, unsigned Imm,
uint64_t Address, const void *Decoder) {
Inst.addOperand(MCOperand::createImm((Imm >> 1) & 1));
Inst.addOperand(MCOperand::createImm(Imm & 1));
return Success;
}
-static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeMRSSystemRegister(MCInst &Inst, unsigned Imm,
uint64_t Address,
const void *Decoder) {
Inst.addOperand(MCOperand::createImm(Imm));
@@ -636,7 +685,7 @@ static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm,
return Success;
}
-static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeMSRSystemRegister(MCInst &Inst, unsigned Imm,
uint64_t Address,
const void *Decoder) {
Inst.addOperand(MCOperand::createImm(Imm));
@@ -644,7 +693,7 @@ static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm,
return Success;
}
-static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeFMOVLaneInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder) {
// This decoder exists to add the dummy Lane operand to the MCInst, which must
@@ -667,78 +716,78 @@ static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
return Success;
}
-static DecodeStatus DecodeVecShiftRImm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftRImm(MCInst &Inst, unsigned Imm,
unsigned Add) {
Inst.addOperand(MCOperand::createImm(Add - Imm));
return Success;
}
-static DecodeStatus DecodeVecShiftLImm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftLImm(MCInst &Inst, unsigned Imm,
unsigned Add) {
Inst.addOperand(MCOperand::createImm((Imm + Add) & (Add - 1)));
return Success;
}
-static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftR64Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder) {
return DecodeVecShiftRImm(Inst, Imm, 64);
}
-static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftR64ImmNarrow(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const void *Decoder) {
return DecodeVecShiftRImm(Inst, Imm | 0x20, 64);
}
-static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftR32Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder) {
return DecodeVecShiftRImm(Inst, Imm, 32);
}
-static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftR32ImmNarrow(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const void *Decoder) {
return DecodeVecShiftRImm(Inst, Imm | 0x10, 32);
}
-static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftR16Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder) {
return DecodeVecShiftRImm(Inst, Imm, 16);
}
-static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftR16ImmNarrow(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const void *Decoder) {
return DecodeVecShiftRImm(Inst, Imm | 0x8, 16);
}
-static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftR8Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder) {
return DecodeVecShiftRImm(Inst, Imm, 8);
}
-static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftL64Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder) {
return DecodeVecShiftLImm(Inst, Imm, 64);
}
-static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftL32Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder) {
return DecodeVecShiftLImm(Inst, Imm, 32);
}
-static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftL16Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder) {
return DecodeVecShiftLImm(Inst, Imm, 16);
}
-static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm,
+static DecodeStatus DecodeVecShiftL8Imm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder) {
return DecodeVecShiftLImm(Inst, Imm, 8);
}
-static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
+static DecodeStatus DecodeThreeAddrSRegInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
const void *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
@@ -799,7 +848,7 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
return Success;
}
-static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn,
+static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
const void *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
@@ -832,8 +881,8 @@ static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn,
return Success;
}
-static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
+static DecodeStatus DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
const void *Decoder) {
unsigned Rt = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
@@ -893,8 +942,8 @@ static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
return Success;
}
-static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
+static DecodeStatus DecodeSignedLdStInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
const void *Decoder) {
unsigned Rt = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
@@ -1078,8 +1127,8 @@ static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
return Success;
}
-static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
+static DecodeStatus DecodeExclusiveLdStInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
const void *Decoder) {
unsigned Rt = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
@@ -1161,7 +1210,7 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
return Success;
}
-static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
+static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
const void *Decoder) {
unsigned Rt = fieldFromInstruction(insn, 0, 5);
@@ -1290,8 +1339,8 @@ static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
return Success;
}
-static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
+static DecodeStatus DecodeAddSubERegInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
const void *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
@@ -1347,8 +1396,8 @@ static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst,
return Success;
}
-static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
+static DecodeStatus DecodeLogicalImmInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
const void *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
@@ -1378,7 +1427,7 @@ static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst,
return Success;
}
-static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
+static DecodeStatus DecodeModImmInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
const void *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
@@ -1417,8 +1466,8 @@ static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
return Success;
}
-static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
+static DecodeStatus DecodeModImmTiedInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
const void *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned cmode = fieldFromInstruction(insn, 12, 4);
@@ -1435,7 +1484,7 @@ static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst,
return Success;
}
-static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
+static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr, const void *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
int64_t imm = fieldFromInstruction(insn, 5, 19) << 2;
@@ -1454,7 +1503,7 @@ static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
return Success;
}
-static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
+static DecodeStatus DecodeBaseAddSubImm(MCInst &Inst, uint32_t insn,
uint64_t Addr, const void *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
@@ -1490,7 +1539,7 @@ static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
return Success;
}
-static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
+static DecodeStatus DecodeUnconditionalBranch(MCInst &Inst, uint32_t insn,
uint64_t Addr,
const void *Decoder) {
int64_t imm = fieldFromInstruction(insn, 0, 26);
@@ -1507,8 +1556,8 @@ static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
return Success;
}
-static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
+static DecodeStatus DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
const void *Decoder) {
uint64_t op1 = fieldFromInstruction(insn, 16, 3);
uint64_t op2 = fieldFromInstruction(insn, 5, 3);
@@ -1531,7 +1580,7 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
return Fail;
}
-static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
+static DecodeStatus DecodeTestAndBranch(MCInst &Inst, uint32_t insn,
uint64_t Addr, const void *Decoder) {
uint64_t Rt = fieldFromInstruction(insn, 0, 5);
uint64_t bit = fieldFromInstruction(insn, 31, 1) << 5;
@@ -1586,3 +1635,18 @@ static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst,
AArch64::XSeqPairsClassRegClassID,
RegNo, Addr, Decoder);
}
+
+template<int Bits>
+static DecodeStatus DecodeSImm(llvm::MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ if (Imm & ~((1LL << Bits) - 1))
+ return Fail;
+
+ // Imm is a signed immediate, so sign extend it.
+ if (Imm & (1 << (Bits - 1)))
+ Imm |= ~((1LL << Bits) - 1);
+
+ Inst.addOperand(MCOperand::createImm(Imm));
+ return Success;
+}
+
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index fc89657bffd3..bdf71b095fda 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -689,7 +689,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot,
const MCSubtargetInfo &STI) {
unsigned Opcode = MI->getOpcode();
- StringRef Layout, Mnemonic;
+ StringRef Layout;
bool IsTbx;
if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) {
@@ -1331,3 +1331,32 @@ void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal);
O << format("#%#016llx", Val);
}
+
+template<int64_t Angle, int64_t Remainder>
+void AArch64InstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Val = MI->getOperand(OpNo).getImm();
+ O << "#" << (Val * Angle) + Remainder;
+}
+
+template <char suffix>
+void AArch64InstPrinter::printSVERegOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ switch (suffix) {
+ case 0:
+ case 'b':
+ case 'h':
+ case 's':
+ case 'd':
+ case 'q':
+ break;
+ default: llvm_unreachable("Invalid kind specifier.");
+ }
+
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ O << getRegisterName(Reg);
+ if (suffix != 0)
+ O << '.' << suffix;
+} \ No newline at end of file
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index a45258cb97b7..76f20f042cef 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -158,10 +158,16 @@ protected:
const MCSubtargetInfo &STI, raw_ostream &O);
void printSIMDType10Operand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ template<int64_t Angle, int64_t Remainder>
+ void printComplexRotationOp(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
template<unsigned size>
void printGPRSeqPairsClassOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O);
+ template <char = 0>
+ void printSVERegOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
};
class AArch64AppleInstPrinter : public AArch64InstPrinter {
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 2bd0cbf9f7c6..7b33b4b5b542 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -30,12 +30,14 @@ namespace {
class AArch64AsmBackend : public MCAsmBackend {
static const unsigned PCRelFlagVal =
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits | MCFixupKindInfo::FKF_IsPCRel;
+ Triple TheTriple;
+
public:
bool IsLittleEndian;
public:
- AArch64AsmBackend(const Target &T, bool IsLittleEndian)
- : MCAsmBackend(), IsLittleEndian(IsLittleEndian) {}
+ AArch64AsmBackend(const Target &T, const Triple &TT, bool IsLittleEndian)
+ : MCAsmBackend(), TheTriple(TT), IsLittleEndian(IsLittleEndian) {}
unsigned getNumFixupKinds() const override {
return AArch64::NumTargetFixupKinds;
@@ -88,6 +90,9 @@ public:
unsigned getPointerSize() const { return 8; }
unsigned getFixupKindContainereSizeInBytes(unsigned Kind) const;
+
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
};
} // end anonymous namespace
@@ -140,7 +145,8 @@ static unsigned AdrImmBits(unsigned Value) {
}
static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- MCContext &Ctx) {
+ MCContext &Ctx, const Triple &TheTriple,
+ bool IsResolved) {
unsigned Kind = Fixup.getKind();
int64_t SignedValue = static_cast<int64_t>(Value);
switch (Kind) {
@@ -151,6 +157,9 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
return AdrImmBits(Value & 0x1fffffULL);
case AArch64::fixup_aarch64_pcrel_adrp_imm21:
+ assert(!IsResolved);
+ if (TheTriple.isOSBinFormatCOFF())
+ return AdrImmBits(Value & 0x1fffffULL);
return AdrImmBits((Value & 0x1fffff000ULL) >> 12);
case AArch64::fixup_aarch64_ldr_pcrel_imm19:
case AArch64::fixup_aarch64_pcrel_branch19:
@@ -163,11 +172,15 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return (Value >> 2) & 0x7ffff;
case AArch64::fixup_aarch64_add_imm12:
case AArch64::fixup_aarch64_ldst_imm12_scale1:
+ if (TheTriple.isOSBinFormatCOFF() && !IsResolved)
+ Value &= 0xfff;
// Unsigned 12-bit immediate
if (Value >= 0x1000)
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
return Value;
case AArch64::fixup_aarch64_ldst_imm12_scale2:
+ if (TheTriple.isOSBinFormatCOFF() && !IsResolved)
+ Value &= 0xfff;
// Unsigned 12-bit immediate which gets multiplied by 2
if (Value >= 0x2000)
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
@@ -175,6 +188,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Ctx.reportError(Fixup.getLoc(), "fixup must be 2-byte aligned");
return Value >> 1;
case AArch64::fixup_aarch64_ldst_imm12_scale4:
+ if (TheTriple.isOSBinFormatCOFF() && !IsResolved)
+ Value &= 0xfff;
// Unsigned 12-bit immediate which gets multiplied by 4
if (Value >= 0x4000)
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
@@ -182,6 +197,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Ctx.reportError(Fixup.getLoc(), "fixup must be 4-byte aligned");
return Value >> 2;
case AArch64::fixup_aarch64_ldst_imm12_scale8:
+ if (TheTriple.isOSBinFormatCOFF() && !IsResolved)
+ Value &= 0xfff;
// Unsigned 12-bit immediate which gets multiplied by 8
if (Value >= 0x8000)
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
@@ -189,6 +206,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Ctx.reportError(Fixup.getLoc(), "fixup must be 8-byte aligned");
return Value >> 3;
case AArch64::fixup_aarch64_ldst_imm12_scale16:
+ if (TheTriple.isOSBinFormatCOFF() && !IsResolved)
+ Value &= 0xfff;
// Unsigned 12-bit immediate which gets multiplied by 16
if (Value >= 0x10000)
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
@@ -275,7 +294,7 @@ void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
MCContext &Ctx = Asm.getContext();
// Apply any target-specific value adjustments.
- Value = adjustFixupValue(Fixup, Value, Ctx);
+ Value = adjustFixupValue(Fixup, Value, Ctx, TheTriple, IsResolved);
// Shift the value into position.
Value <<= Info.TargetOffset;
@@ -338,6 +357,26 @@ bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
+bool AArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target) {
+ // The ADRP instruction adds some multiple of 0x1000 to the current PC &
+ // ~0xfff. This means that the required offset to reach a symbol can vary by
+ // up to one step depending on where the ADRP is in memory. For example:
+ //
+ // ADRP x0, there
+ // there:
+ //
+ // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and
+ // we'll need that as an offset. At any other address "there" will be in the
+ // same page as the ADRP and the instruction should encode 0x0. Assuming the
+ // section isn't 0x1000-aligned, we therefore need to delegate this decision
+ // to the linker -- a relocation!
+ if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
+ return true;
+ return false;
+}
+
namespace {
namespace CU {
@@ -389,10 +428,12 @@ class DarwinAArch64AsmBackend : public AArch64AsmBackend {
}
public:
- DarwinAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI)
- : AArch64AsmBackend(T, /*IsLittleEndian*/true), MRI(MRI) {}
+ DarwinAArch64AsmBackend(const Target &T, const Triple &TT,
+ const MCRegisterInfo &MRI)
+ : AArch64AsmBackend(T, TT, /*IsLittleEndian*/ true), MRI(MRI) {}
- MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ std::unique_ptr<MCObjectWriter>
+ createObjectWriter(raw_pwrite_stream &OS) const override {
return createAArch64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64,
MachO::CPU_SUBTYPE_ARM64_ALL);
}
@@ -537,47 +578,27 @@ public:
uint8_t OSABI;
bool IsILP32;
- ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian,
- bool IsILP32)
- : AArch64AsmBackend(T, IsLittleEndian), OSABI(OSABI), IsILP32(IsILP32) {}
+ ELFAArch64AsmBackend(const Target &T, const Triple &TT, uint8_t OSABI,
+ bool IsLittleEndian, bool IsILP32)
+ : AArch64AsmBackend(T, TT, IsLittleEndian), OSABI(OSABI),
+ IsILP32(IsILP32) {}
- MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ std::unique_ptr<MCObjectWriter>
+ createObjectWriter(raw_pwrite_stream &OS) const override {
return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian, IsILP32);
}
-
- bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override;
};
-bool ELFAArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
- const MCFixup &Fixup,
- const MCValue &Target) {
- // The ADRP instruction adds some multiple of 0x1000 to the current PC &
- // ~0xfff. This means that the required offset to reach a symbol can vary by
- // up to one step depending on where the ADRP is in memory. For example:
- //
- // ADRP x0, there
- // there:
- //
- // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and
- // we'll need that as an offset. At any other address "there" will be in the
- // same page as the ADRP and the instruction should encode 0x0. Assuming the
- // section isn't 0x1000-aligned, we therefore need to delegate this decision
- // to the linker -- a relocation!
- if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
- return true;
- return false;
-}
-
}
namespace {
class COFFAArch64AsmBackend : public AArch64AsmBackend {
public:
COFFAArch64AsmBackend(const Target &T, const Triple &TheTriple)
- : AArch64AsmBackend(T, /*IsLittleEndian*/true) {}
+ : AArch64AsmBackend(T, TheTriple, /*IsLittleEndian*/ true) {}
- MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ std::unique_ptr<MCObjectWriter>
+ createObjectWriter(raw_pwrite_stream &OS) const override {
return createAArch64WinCOFFObjectWriter(OS);
}
};
@@ -589,7 +610,7 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
StringRef CPU,
const MCTargetOptions &Options) {
if (TheTriple.isOSBinFormatMachO())
- return new DarwinAArch64AsmBackend(T, MRI);
+ return new DarwinAArch64AsmBackend(T, TheTriple, MRI);
if (TheTriple.isOSBinFormatCOFF())
return new COFFAArch64AsmBackend(T, TheTriple);
@@ -598,7 +619,8 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
bool IsILP32 = Options.getABIName() == "ilp32";
- return new ELFAArch64AsmBackend(T, OSABI, /*IsLittleEndian=*/true, IsILP32);
+ return new ELFAArch64AsmBackend(T, TheTriple, OSABI, /*IsLittleEndian=*/true,
+ IsILP32);
}
MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
@@ -610,5 +632,6 @@ MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
"Big endian is only supported for ELF targets!");
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
bool IsILP32 = Options.getABIName() == "ilp32";
- return new ELFAArch64AsmBackend(T, OSABI, /*IsLittleEndian=*/false, IsILP32);
+ return new ELFAArch64AsmBackend(T, TheTriple, OSABI, /*IsLittleEndian=*/false,
+ IsILP32);
}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 89c3e5b4c76e..2d90e67960f8 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
@@ -428,11 +429,10 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
llvm_unreachable("Unimplemented fixup -> relocation");
}
-MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
- uint8_t OSABI,
- bool IsLittleEndian,
- bool IsILP32) {
- MCELFObjectTargetWriter *MOTW =
- new AArch64ELFObjectWriter(OSABI, IsLittleEndian, IsILP32);
- return createELFObjectWriter(MOTW, OS, IsLittleEndian);
+std::unique_ptr<MCObjectWriter>
+llvm::createAArch64ELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
+ bool IsLittleEndian, bool IsILP32) {
+ auto MOTW =
+ llvm::make_unique<AArch64ELFObjectWriter>(OSABI, IsLittleEndian, IsILP32);
+ return createELFObjectWriter(std::move(MOTW), OS, IsLittleEndian);
}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index a0de3c39562b..8ee627d50df2 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -86,10 +86,11 @@ class AArch64ELFStreamer : public MCELFStreamer {
public:
friend class AArch64TargetELFStreamer;
- AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
- : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0),
- LastEMS(EMS_None) {}
+ AArch64ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
+ raw_pwrite_stream &OS,
+ std::unique_ptr<MCCodeEmitter> Emitter)
+ : MCELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)),
+ MappingSymbolCounter(0), LastEMS(EMS_None) {}
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
// We have to keep track of the mapping symbol state of any sections we
@@ -101,6 +102,14 @@ public:
MCELFStreamer::ChangeSection(Section, Subsection);
}
+ // Reset state between object emissions
+ void reset() override {
+ MappingSymbolCounter = 0;
+ MCELFStreamer::reset();
+ LastMappingSymbols.clear();
+ LastEMS = EMS_None;
+ }
+
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
@@ -198,10 +207,13 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
return new AArch64TargetAsmStreamer(S, OS);
}
-MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+MCELFStreamer *createAArch64ELFStreamer(MCContext &Context,
+ std::unique_ptr<MCAsmBackend> TAB,
raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter, bool RelaxAll) {
- AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
+ std::unique_ptr<MCCodeEmitter> Emitter,
+ bool RelaxAll) {
+ AArch64ELFStreamer *S =
+ new AArch64ELFStreamer(Context, std::move(TAB), OS, std::move(Emitter));
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
return S;
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
index ef48203c8bc0..19b188aa1c61 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -18,9 +18,11 @@
namespace llvm {
-MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+MCELFStreamer *createAArch64ELFStreamer(MCContext &Context,
+ std::unique_ptr<MCAsmBackend> TAB,
raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter, bool RelaxAll);
+ std::unique_ptr<MCCodeEmitter> Emitter,
+ bool RelaxAll);
}
#endif
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index c25bd8c8f6cc..12b5a27b7699 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -102,7 +102,24 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
}
AArch64MCAsmInfoCOFF::AArch64MCAsmInfoCOFF() {
- CommentString = ";";
PrivateGlobalPrefix = ".L";
PrivateLabelPrefix = ".L";
+
+ Data16bitsDirective = "\t.hword\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = "\t.xword\t";
+
+ AlignmentIsInBytes = false;
+ SupportsDebugInformation = true;
+ CodePointerSize = 8;
+}
+
+AArch64MCAsmInfoMicrosoftCOFF::AArch64MCAsmInfoMicrosoftCOFF() {
+ CommentString = ";";
+ ExceptionsType = ExceptionHandling::WinEH;
+}
+
+AArch64MCAsmInfoGNUCOFF::AArch64MCAsmInfoGNUCOFF() {
+ CommentString = "//";
+ ExceptionsType = ExceptionHandling::DwarfCFI;
}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 2d7107a37244..afde87b40929 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -38,6 +38,14 @@ struct AArch64MCAsmInfoCOFF : public MCAsmInfoCOFF {
explicit AArch64MCAsmInfoCOFF();
};
+struct AArch64MCAsmInfoMicrosoftCOFF : public AArch64MCAsmInfoCOFF {
+ explicit AArch64MCAsmInfoMicrosoftCOFF();
+};
+
+struct AArch64MCAsmInfoGNUCOFF : public AArch64MCAsmInfoCOFF {
+ explicit AArch64MCAsmInfoGNUCOFF();
+};
+
} // namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 97c92fa0778d..f606d272bcb0 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "AArch64MCExpr.h"
-#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index a2555496cdb9..c3458d625b83 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -16,6 +16,8 @@
#include "AArch64MCAsmInfo.h"
#include "AArch64WinCOFFStreamer.h"
#include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -49,9 +51,18 @@ createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
return createAArch64MCSubtargetInfoImpl(TT, CPU, FS);
}
+void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
+ for (unsigned Reg = AArch64::NoRegister + 1;
+ Reg < AArch64::NUM_TARGET_REGS; ++Reg) {
+ unsigned CV = MRI->getEncodingValue(Reg);
+ MRI->mapLLVMRegToCVReg(Reg, CV);
+ }
+}
+
static MCRegisterInfo *createAArch64MCRegisterInfo(const Triple &Triple) {
MCRegisterInfo *X = new MCRegisterInfo();
InitAArch64MCRegisterInfo(X, AArch64::LR);
+ AArch64_MC::initLLVMToCVRegMapping(X);
return X;
}
@@ -60,8 +71,10 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI;
if (TheTriple.isOSBinFormatMachO())
MAI = new AArch64MCAsmInfoDarwin();
+ else if (TheTriple.isWindowsMSVCEnvironment())
+ MAI = new AArch64MCAsmInfoMicrosoftCOFF();
else if (TheTriple.isOSBinFormatCOFF())
- MAI = new AArch64MCAsmInfoCOFF();
+ MAI = new AArch64MCAsmInfoGNUCOFF();
else {
assert(TheTriple.isOSBinFormatELF() && "Invalid target");
MAI = new AArch64MCAsmInfoELF(TheTriple);
@@ -75,28 +88,6 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM,
- CodeModel::Model &CM) {
- assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
- TT.isOSBinFormatCOFF()) && "Invalid target");
-
- if (CM == CodeModel::Default)
- CM = CodeModel::Small;
- // The default MCJIT memory managers make no guarantees about where they can
- // find an executable page; JITed code needs to be able to refer to globals
- // no matter how far away they are.
- else if (CM == CodeModel::JITDefault)
- CM = CodeModel::Large;
- else if (CM != CodeModel::Small && CM != CodeModel::Large) {
- if (!TT.isOSFuchsia())
- report_fatal_error(
- "Only small and large code models are allowed on AArch64");
- else if (CM != CodeModel::Kernel)
- report_fatal_error(
- "Only small, kernel, and large code models are allowed on AArch64");
- }
-}
-
static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
@@ -111,25 +102,32 @@ static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T,
}
static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
- MCAsmBackend &TAB, raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter, bool RelaxAll) {
- return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
+ std::unique_ptr<MCAsmBackend> &&TAB,
+ raw_pwrite_stream &OS,
+ std::unique_ptr<MCCodeEmitter> &&Emitter,
+ bool RelaxAll) {
+ return createAArch64ELFStreamer(Ctx, std::move(TAB), OS, std::move(Emitter),
+ RelaxAll);
}
-static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
+static MCStreamer *createMachOStreamer(MCContext &Ctx,
+ std::unique_ptr<MCAsmBackend> &&TAB,
raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter, bool RelaxAll,
+ std::unique_ptr<MCCodeEmitter> &&Emitter,
+ bool RelaxAll,
bool DWARFMustBeAtTheEnd) {
- return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
- DWARFMustBeAtTheEnd,
+ return createMachOStreamer(Ctx, std::move(TAB), OS, std::move(Emitter),
+ RelaxAll, DWARFMustBeAtTheEnd,
/*LabelSections*/ true);
}
-static MCStreamer *createWinCOFFStreamer(MCContext &Ctx, MCAsmBackend &TAB,
- raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter, bool RelaxAll,
- bool IncrementalLinkerCompatible) {
- return createAArch64WinCOFFStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
+static MCStreamer *
+createWinCOFFStreamer(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
+ raw_pwrite_stream &OS,
+ std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll,
+ bool IncrementalLinkerCompatible) {
+ return createAArch64WinCOFFStreamer(Ctx, std::move(TAB), OS,
+ std::move(Emitter), RelaxAll,
IncrementalLinkerCompatible);
}
@@ -144,9 +142,6 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn X(*T, createAArch64MCAsmInfo);
- // Register the MC codegen info.
- TargetRegistry::registerMCAdjustCodeGenOpts(*T, adjustCodeGenOpts);
-
// Register the MC instruction info.
TargetRegistry::RegisterMCInstrInfo(*T, createAArch64MCInstrInfo);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 1404926b8124..b9e1673b9317 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -16,6 +16,8 @@
#include "llvm/Support/DataTypes.h"
+#include <memory>
+
namespace llvm {
class formatted_raw_ostream;
class MCAsmBackend;
@@ -51,16 +53,16 @@ MCAsmBackend *createAArch64beAsmBackend(const Target &T,
const Triple &TT, StringRef CPU,
const MCTargetOptions &Options);
-MCObjectWriter *createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
- uint8_t OSABI,
- bool IsLittleEndian,
- bool IsILP32);
+std::unique_ptr<MCObjectWriter>
+createAArch64ELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
+ bool IsLittleEndian, bool IsILP32);
-MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS,
- uint32_t CPUType,
- uint32_t CPUSubtype);
+std::unique_ptr<MCObjectWriter>
+createAArch64MachObjectWriter(raw_pwrite_stream &OS, uint32_t CPUType,
+ uint32_t CPUSubtype);
-MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS);
+std::unique_ptr<MCObjectWriter>
+createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS);
MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
@@ -70,6 +72,10 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S,
const MCSubtargetInfo &STI);
+namespace AArch64_MC {
+void initLLVMToCVRegMapping(MCRegisterInfo *MRI);
+}
+
} // End llvm namespace
// Defines symbolic names for AArch64 registers. This defines a mapping from
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 19b2576f6895..55151c2b8d21 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -430,10 +430,10 @@ void AArch64MachObjectWriter::recordRelocation(
Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
-MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_pwrite_stream &OS,
- uint32_t CPUType,
- uint32_t CPUSubtype) {
+std::unique_ptr<MCObjectWriter>
+llvm::createAArch64MachObjectWriter(raw_pwrite_stream &OS, uint32_t CPUType,
+ uint32_t CPUSubtype) {
return createMachObjectWriter(
- new AArch64MachObjectWriter(CPUType, CPUSubtype), OS,
+ llvm::make_unique<AArch64MachObjectWriter>(CPUType, CPUSubtype), OS,
/*IsLittleEndian=*/true);
}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
index 31762b9e4cd5..d06c5e8862ae 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
@@ -14,6 +14,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/Support/ErrorHandling.h"
@@ -96,9 +97,10 @@ bool AArch64WinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
namespace llvm {
-MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS) {
- MCWinCOFFObjectTargetWriter *MOTW = new AArch64WinCOFFObjectWriter();
- return createWinCOFFObjectWriter(MOTW, OS);
+std::unique_ptr<MCObjectWriter>
+createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS) {
+ auto MOTW = llvm::make_unique<AArch64WinCOFFObjectWriter>();
+ return createWinCOFFObjectWriter(std::move(MOTW), OS);
}
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index 6c8da27e398f..c88363d2c250 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -8,6 +8,8 @@
//===----------------------------------------------------------------------===//
#include "AArch64WinCOFFStreamer.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCCodeEmitter.h"
using namespace llvm;
@@ -17,19 +19,28 @@ class AArch64WinCOFFStreamer : public MCWinCOFFStreamer {
public:
friend class AArch64TargetWinCOFFStreamer;
- AArch64WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE,
- raw_pwrite_stream &OS)
- : MCWinCOFFStreamer(C, AB, CE, OS) {}
+ AArch64WinCOFFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> AB,
+ std::unique_ptr<MCCodeEmitter> CE,
+ raw_pwrite_stream &OS)
+ : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), OS) {}
+
+ void FinishImpl() override;
};
+
+void AArch64WinCOFFStreamer::FinishImpl() {
+ EmitFrames(nullptr);
+
+ MCWinCOFFStreamer::FinishImpl();
+}
} // end anonymous namespace
namespace llvm {
-MCWinCOFFStreamer
-*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
- raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter, bool RelaxAll,
- bool IncrementalLinkerCompatible) {
- auto *S = new AArch64WinCOFFStreamer(Context, MAB, *Emitter, OS);
+MCWinCOFFStreamer *createAArch64WinCOFFStreamer(
+ MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
+ raw_pwrite_stream &OS, std::unique_ptr<MCCodeEmitter> Emitter,
+ bool RelaxAll, bool IncrementalLinkerCompatible) {
+ auto *S = new AArch64WinCOFFStreamer(Context, std::move(MAB),
+ std::move(Emitter), OS);
S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
return S;
}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
index 1b4fcd6804e2..b67a19e883e9 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
@@ -33,11 +33,10 @@ public:
namespace llvm {
-MCWinCOFFStreamer
-*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter, bool RelaxAll,
- bool IncrementalLinkerCompatible);
+MCWinCOFFStreamer *createAArch64WinCOFFStreamer(
+ MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
+ raw_pwrite_stream &OS, std::unique_ptr<MCCodeEmitter> Emitter,
+ bool RelaxAll, bool IncrementalLinkerCompatible);
} // end llvm namespace
#endif
diff --git a/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td b/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td
new file mode 100644
index 000000000000..15c1275f259d
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -0,0 +1,103 @@
+//=-- SVEInstrFormats.td - AArch64 SVE Instruction classes -*- tablegen -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Vector Extension (SVE) Instruction Class Definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Arithmetic - Unpredicated Group.
+//===----------------------------------------------------------------------===//
+
+class sve_int_bin_cons_arit_0<bits<2> sz8_64, bits<3> opc, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zd, $Zn, $Zm",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b000;
+ let Inst{12-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm> {
+ def _B : sve_int_bin_cons_arit_0<0b00, opc, asm, ZPR8>;
+ def _H : sve_int_bin_cons_arit_0<0b01, opc, asm, ZPR16>;
+ def _S : sve_int_bin_cons_arit_0<0b10, opc, asm, ZPR32>;
+ def _D : sve_int_bin_cons_arit_0<0b11, opc, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Permute - In Lane Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_perm_bin_perm_zz<bits<3> opc, bits<2> sz8_64, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zd, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b011;
+ let Inst{12-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm> {
+ def _B : sve_int_perm_bin_perm_zz<opc, 0b00, asm, ZPR8>;
+ def _H : sve_int_perm_bin_perm_zz<opc, 0b01, asm, ZPR16>;
+ def _S : sve_int_perm_bin_perm_zz<opc, 0b10, asm, ZPR32>;
+ def _D : sve_int_perm_bin_perm_zz<opc, 0b11, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Permute - Predicates Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_perm_bin_perm_pp<bits<3> opc, bits<2> sz8_64, string asm,
+ PPRRegOp pprty>
+: I<(outs pprty:$Pd), (ins pprty:$Pn, pprty:$Pm),
+ asm, "\t$Pd, $Pn, $Pm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<4> Pm;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = Pm;
+ let Inst{15-13} = 0b010;
+ let Inst{12-10} = opc;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = Pn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = Pd;
+}
+
+multiclass sve_int_perm_bin_perm_pp<bits<3> opc, string asm> {
+ def _B : sve_int_perm_bin_perm_pp<opc, 0b00, asm, PPR8>;
+ def _H : sve_int_perm_bin_perm_pp<opc, 0b01, asm, PPR16>;
+ def _S : sve_int_perm_bin_perm_pp<opc, 0b10, asm, PPR32>;
+ def _D : sve_int_perm_bin_perm_pp<opc, 0b11, asm, PPR64>;
+} \ No newline at end of file
diff --git a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
index 7ac9a5a08484..8fb161574c5b 100644
--- a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -29,11 +29,11 @@ extern "C" void LLVMInitializeAArch64TargetInfo() {
// Now register the "arm64" name for use with "-march". We don't want it to
// take possession of the Triple::aarch64 tag though.
TargetRegistry::RegisterTarget(getTheARM64Target(), "arm64",
- "ARM64 (little endian)",
+ "ARM64 (little endian)", "AArch64",
[](Triple::ArchType) { return false; }, true);
RegisterTarget<Triple::aarch64, /*HasJIT=*/true> Z(
- getTheAArch64leTarget(), "aarch64", "AArch64 (little endian)");
+ getTheAArch64leTarget(), "aarch64", "AArch64 (little endian)", "AArch64");
RegisterTarget<Triple::aarch64_be, /*HasJIT=*/true> W(
- getTheAArch64beTarget(), "aarch64_be", "AArch64 (big endian)");
+ getTheAArch64beTarget(), "aarch64_be", "AArch64 (big endian)", "AArch64");
}
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 5d76681cd97b..c1c799b7b349 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -517,7 +517,12 @@ namespace AArch64II {
/// thread-local symbol. On Darwin, only one type of thread-local access
/// exists (pre linker-relaxation), but on ELF the TLSModel used for the
/// referee will affect interpretation.
- MO_TLS = 0x40
+ MO_TLS = 0x40,
+
+ /// MO_DLLIMPORT - On a symbol operand, this represents that the reference
+ /// to the symbol is for an import stub. This is used for DLL import
+ /// storage class indication on Windows.
+ MO_DLLIMPORT = 0x80,
};
} // end namespace AArch64II