aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/AArch64
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-04-16 16:25:46 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-04-16 16:25:46 +0000
commit7a7e6055035bfd93ab507051819373a6f171258b (patch)
treedc9ac22b4fea4f445748feaf7232a146623f0dfa /contrib/llvm/lib/Target/AArch64
parentb96a714f453e7f5aeeb3c2df2c3e1e8ad749f96f (diff)
parent71d5a2540a98c81f5bcaeb48805e0e2881f530ef (diff)
Merge llvm trunk r300422 and resolve conflicts.
Notes
Notes: svn path=/projects/clang500-import/; revision=317029
Diffstat (limited to 'contrib/llvm/lib/Target/AArch64')
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64.h6
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64.td116
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp49
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp23
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp182
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h27
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp8
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp11
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp65
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp102
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp45
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def419
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp47
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp338
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h21
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td69
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp391
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h39
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td49
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp337
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h49
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp133
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h7
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp73
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp272
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.h29
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp358
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp213
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h77
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterBanks.td20
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedA53.td2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td106
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td523
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td361
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedKryoDetails.td62
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td3
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX.td352
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td (renamed from contrib/llvm/lib/Target/AArch64/AArch64SchedVulcan.td)354
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp22
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h33
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td134
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp36
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp56
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h16
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp25
-rw-r--r--contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp475
-rw-r--r--contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp281
-rw-r--r--contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h10
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp92
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp23
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp11
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp30
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h78
57 files changed, 4560 insertions, 2136 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.h b/contrib/llvm/lib/Target/AArch64/AArch64.h
index fd106a8d9b0b..b44b13e36e15 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.h
@@ -22,8 +22,11 @@
namespace llvm {
+class AArch64RegisterBankInfo;
+class AArch64Subtarget;
class AArch64TargetMachine;
class FunctionPass;
+class InstructionSelector;
class MachineFunctionPass;
FunctionPass *createAArch64DeadRegisterDefinitions();
@@ -45,6 +48,9 @@ FunctionPass *createAArch64A53Fix835769();
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
FunctionPass *createAArch64CollectLOHPass();
+InstructionSelector *
+createAArch64InstructionSelector(const AArch64TargetMachine &,
+ AArch64Subtarget &, AArch64RegisterBankInfo &);
void initializeAArch64A53Fix835769Pass(PassRegistry&);
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td
index 91c335fac32d..519ca2894683 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@@ -27,7 +27,7 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable Advanced SIMD instructions", [FeatureFPARMv8]>;
def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
- "Enable cryptographic instructions">;
+ "Enable cryptographic instructions", [FeatureNEON]>;
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
"Enable ARMv8 CRC-32 checksum instructions">;
@@ -38,6 +38,9 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
"Enable ARMv8.1 Large System Extension (LSE) atomic instructions">;
+def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
+ "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">;
+
def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
"Enable ARMv8 PMUv3 Performance Monitors extension">;
@@ -100,6 +103,14 @@ def FeatureArithmeticCbzFusion : SubtargetFeature<
"arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
"CPU fuses arithmetic + cbz/cbnz operations">;
+def FeatureFuseAES : SubtargetFeature<
+ "fuse-aes", "HasFuseAES", "true",
+ "CPU fuses AES crypto operations">;
+
+def FeatureFuseLiterals : SubtargetFeature<
+ "fuse-literals", "HasFuseLiterals", "true",
+ "CPU fuses literal generation operations">;
+
def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
@@ -108,12 +119,22 @@ def FeatureUseRSqrt : SubtargetFeature<
"use-reciprocal-square-root", "UseRSqrt", "true",
"Use the reciprocal square root approximation">;
+def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
+ "NegativeImmediates", "false",
+ "Convert immediates and instructions "
+ "to their negated or complemented "
+ "equivalent when the immediate does "
+ "not fit in the encoding.">;
+
+def FeatureLSLFast : SubtargetFeature<
+ "lsl-fast", "HasLSLFast", "true",
+ "CPU has a fastpath logical shift of up to 3 places">;
//===----------------------------------------------------------------------===//
// Architectures.
//
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
- "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE]>;
+ "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM]>;
def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
"Support ARM v8.2a instructions", [HasV8_1aOps, FeatureRAS]>;
@@ -123,6 +144,7 @@ def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
//===----------------------------------------------------------------------===//
include "AArch64RegisterInfo.td"
+include "AArch64RegisterBanks.td"
include "AArch64CallingConvention.td"
//===----------------------------------------------------------------------===//
@@ -149,7 +171,8 @@ include "AArch64SchedCyclone.td"
include "AArch64SchedFalkor.td"
include "AArch64SchedKryo.td"
include "AArch64SchedM1.td"
-include "AArch64SchedVulcan.td"
+include "AArch64SchedThunderX.td"
+include "AArch64SchedThunderX2T99.td"
def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
"Cortex-A35 ARM processors", [
@@ -180,6 +203,8 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureFuseLiterals,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
@@ -226,6 +251,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
+ FeatureFuseAES,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
@@ -256,7 +282,8 @@ def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
- FeatureZCZeroing
+ FeatureZCZeroing,
+ FeatureLSLFast
]>;
def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
@@ -269,19 +296,66 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
- FeatureZCZeroing
+ FeatureRDM,
+ FeatureZCZeroing,
+ FeatureLSLFast
]>;
-def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan",
- "Broadcom Vulcan processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeatureArithmeticBccFusion,
- FeatureNEON,
- FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- HasV8_1aOps]>;
+def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
+ "ThunderX2T99",
+ "Cavium ThunderX2 processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureArithmeticBccFusion,
+ FeatureNEON,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureLSE,
+ HasV8_1aOps]>;
+
+def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
+
+def ProcThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily",
+ "ThunderXT88",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
+
+def ProcThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily",
+ "ThunderXT81",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
+
+def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
+ "ThunderXT83",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
def : ProcessorModel<"generic", NoSchedModel, [
FeatureCRC,
@@ -291,11 +365,11 @@ def : ProcessorModel<"generic", NoSchedModel, [
FeaturePostRAScheduler
]>;
-// FIXME: Cortex-A35 is currently modelled as a Cortex-A53
+// FIXME: Cortex-A35 is currently modeled as a Cortex-A53.
def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
-// FIXME: Cortex-A72 and Cortex-A73 are currently modelled as an Cortex-A57.
+// FIXME: Cortex-A72 and Cortex-A73 are currently modeled as a Cortex-A57.
def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>;
def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
@@ -304,7 +378,13 @@ def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>;
def : ProcessorModel<"exynos-m3", ExynosM1Model, [ProcExynosM2]>;
def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
-def : ProcessorModel<"vulcan", VulcanModel, [ProcVulcan]>;
+// Cavium ThunderX/ThunderX T8X Processors
+def : ProcessorModel<"thunderx", ThunderXT8XModel, [ProcThunderX]>;
+def : ProcessorModel<"thunderxt88", ThunderXT8XModel, [ProcThunderXT88]>;
+def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>;
+def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>;
+// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan.
+def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
//===----------------------------------------------------------------------===//
// Assembly parser
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 0aa597bcdc56..4a7e0b2b803e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -493,43 +493,30 @@ bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
MachineBasicBlock &MBB) {
- RegScavenger RS;
- RS.enterBasicBlock(MBB);
- RS.forward(MachineBasicBlock::iterator(G->getStart()));
-
// Can we find an appropriate register that is available throughout the life
- // of the chain?
- unsigned RegClassID = G->getStart()->getDesc().OpInfo[0].RegClass;
- BitVector AvailableRegs = RS.getRegsAvailable(TRI->getRegClass(RegClassID));
- for (MachineBasicBlock::iterator I = G->begin(), E = G->end(); I != E; ++I) {
- RS.forward(I);
- AvailableRegs &= RS.getRegsAvailable(TRI->getRegClass(RegClassID));
-
- // Remove any registers clobbered by a regmask or any def register that is
- // immediately dead.
- for (auto J : I->operands()) {
- if (J.isRegMask())
- AvailableRegs.clearBitsNotInMask(J.getRegMask());
-
- if (J.isReg() && J.isDef()) {
- MCRegAliasIterator AI(J.getReg(), TRI, /*IncludeSelf=*/true);
- if (J.isDead())
- for (; AI.isValid(); ++AI)
- AvailableRegs.reset(*AI);
-#ifndef NDEBUG
- else
- for (; AI.isValid(); ++AI)
- assert(!AvailableRegs[*AI] &&
- "Non-dead def should have been removed by now!");
-#endif
- }
- }
+ // of the chain? Simulate liveness backwards until the end of the chain.
+ LiveRegUnits Units(*TRI);
+ Units.addLiveOuts(MBB);
+ MachineBasicBlock::iterator I = MBB.end();
+ MachineBasicBlock::iterator ChainEnd = G->end();
+ while (I != ChainEnd) {
+ --I;
+ Units.stepBackward(*I);
}
+ // Check which register units are alive throughout the chain.
+ MachineBasicBlock::iterator ChainBegin = G->begin();
+ assert(ChainBegin != ChainEnd && "Chain should contain instructions");
+ do {
+ --I;
+ Units.accumulateBackward(*I);
+ } while (I != ChainBegin);
+
// Make sure we allocate in-order, to get the cheapest registers first.
+ unsigned RegClassID = ChainBegin->getDesc().OpInfo[0].RegClass;
auto Ord = RCI.getOrder(TRI->getRegClass(RegClassID));
for (auto Reg : Ord) {
- if (!AvailableRegs[Reg])
+ if (!Units.available(Reg))
continue;
if (C == getColor(Reg))
return Reg;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 0cbb2db1134a..e1b8ee6d03c3 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -31,16 +31,23 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
using namespace llvm;
@@ -59,12 +66,12 @@ EnableMerge("aarch64-type-promotion-merge", cl::Hidden,
//===----------------------------------------------------------------------===//
namespace {
-class AArch64AddressTypePromotion : public FunctionPass {
+class AArch64AddressTypePromotion : public FunctionPass {
public:
static char ID;
- AArch64AddressTypePromotion()
- : FunctionPass(ID), Func(nullptr), ConsideredSExtType(nullptr) {
+
+ AArch64AddressTypePromotion() : FunctionPass(ID) {
initializeAArch64AddressTypePromotionPass(*PassRegistry::getPassRegistry());
}
@@ -76,10 +83,11 @@ public:
private:
/// The current function.
- Function *Func;
+ Function *Func = nullptr;
+
/// Filter out all sexts that does not have this type.
/// Currently initialized with Int64Ty.
- Type *ConsideredSExtType;
+ Type *ConsideredSExtType = nullptr;
// This transformation requires dominator info.
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -129,7 +137,8 @@ private:
void mergeSExts(ValueToInsts &ValToSExtendedUses,
SetOfInstructions &ToRemove);
};
-} // end anonymous namespace.
+
+} // end anonymous namespace
char AArch64AddressTypePromotion::ID = 0;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
index a4950af32097..b2f55a7e1e09 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/lib/Target/AArch64/AArch64CallLowering.cpp - Call lowering ---===//
+//===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,15 +15,36 @@
#include "AArch64CallLowering.h"
#include "AArch64ISelLowering.h"
-
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+
using namespace llvm;
#ifndef LLVM_BUILD_GLOBAL_ISEL
@@ -31,12 +52,12 @@ using namespace llvm;
#endif
AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
- : CallLowering(&TLI) {
-}
+ : CallLowering(&TLI) {}
struct IncomingArgHandler : public CallLowering::ValueHandler {
- IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
- : ValueHandler(MIRBuilder, MRI) {}
+ IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : ValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -45,6 +66,7 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
unsigned AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64));
MIRBuilder.buildFrameIndex(AddrReg, FI);
+ StackUsed = std::max(StackUsed, Size + Offset);
return AddrReg;
}
@@ -67,11 +89,14 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
/// parameters (it's a basic-block live-in), and a call instruction
/// (it's an implicit-def of the BL).
virtual void markPhysRegUsed(unsigned PhysReg) = 0;
+
+ uint64_t StackUsed;
};
struct FormalArgHandler : public IncomingArgHandler {
- FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
- : IncomingArgHandler(MIRBuilder, MRI) {}
+ FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIRBuilder.getMBB().addLiveIn(PhysReg);
@@ -80,8 +105,8 @@ struct FormalArgHandler : public IncomingArgHandler {
struct CallReturnHandler : public IncomingArgHandler {
CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder MIB)
- : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
+ MachineInstrBuilder MIB, CCAssignFn *AssignFn)
+ : IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIB.addDef(PhysReg, RegState::Implicit);
@@ -92,8 +117,10 @@ struct CallReturnHandler : public IncomingArgHandler {
struct OutgoingArgHandler : public CallLowering::ValueHandler {
OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder MIB)
- : ValueHandler(MIRBuilder, MRI), MIB(MIB) {}
+ MachineInstrBuilder MIB, CCAssignFn *AssignFn,
+ CCAssignFn *AssignFnVarArg)
+ : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
+ AssignFnVarArg(AssignFnVarArg), StackSize(0) {}
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -126,14 +153,29 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
MIRBuilder.buildStore(ValVReg, Addr, *MMO);
}
+ bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ const CallLowering::ArgInfo &Info,
+ CCState &State) override {
+ bool Res;
+ if (Info.IsFixed)
+ Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
+ else
+ Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
+
+ StackSize = State.getNextStackOffset();
+ return Res;
+ }
+
MachineInstrBuilder MIB;
+ CCAssignFn *AssignFnVarArg;
+ uint64_t StackSize;
};
-void AArch64CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
- SmallVectorImpl<ArgInfo> &SplitArgs,
- const DataLayout &DL,
- MachineRegisterInfo &MRI,
- SplitArgTy PerformArgSplit) const {
+void AArch64CallLowering::splitToValueTypes(
+ const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, MachineRegisterInfo &MRI,
+ const SplitArgTy &PerformArgSplit) const {
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
LLVMContext &Ctx = OrigArg.Ty->getContext();
@@ -145,7 +187,7 @@ void AArch64CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
// No splitting to do, but we want to replace the original type (e.g. [1 x
// double] -> double).
SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx),
- OrigArg.Flags);
+ OrigArg.Flags, OrigArg.IsFixed);
return;
}
@@ -154,19 +196,12 @@ void AArch64CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
// FIXME: set split flags if they're actually used (e.g. i128 on AAPCS).
Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
SplitArgs.push_back(
- ArgInfo{MRI.createGenericVirtualRegister(LLT{*SplitTy, DL}), SplitTy,
- OrigArg.Flags});
+ ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)),
+ SplitTy, OrigArg.Flags, OrigArg.IsFixed});
}
- SmallVector<uint64_t, 4> BitOffsets;
- for (auto Offset : Offsets)
- BitOffsets.push_back(Offset * 8);
-
- SmallVector<unsigned, 8> SplitRegs;
- for (auto I = &SplitArgs[FirstRegIdx]; I != SplitArgs.end(); ++I)
- SplitRegs.push_back(I->Reg);
-
- PerformArgSplit(SplitRegs, BitOffsets);
+ for (unsigned i = 0; i < Offsets.size(); ++i)
+ PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8);
}
bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
@@ -184,16 +219,16 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
auto &DL = F.getParent()->getDataLayout();
ArgInfo OrigArg{VReg, Val->getType()};
- setArgFlags(OrigArg, AttributeSet::ReturnIndex, DL, F);
+ setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
SmallVector<ArgInfo, 8> SplitArgs;
splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
- MIRBuilder.buildExtract(Regs, Offsets, VReg);
+ [&](unsigned Reg, uint64_t Offset) {
+ MIRBuilder.buildExtract(Reg, VReg, Offset);
});
- OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
- Success = handleAssignments(MIRBuilder, AssignFn, SplitArgs, Handler);
+ OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn);
+ Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
}
MIRBuilder.insertInstr(MIB);
@@ -203,7 +238,6 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<unsigned> VRegs) const {
- auto &Args = F.getArgumentList();
MachineFunction &MF = MIRBuilder.getMF();
MachineBasicBlock &MBB = MIRBuilder.getMBB();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -211,13 +245,27 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 8> SplitArgs;
unsigned i = 0;
- for (auto &Arg : Args) {
+ for (auto &Arg : F.args()) {
ArgInfo OrigArg{VRegs[i], Arg.getType()};
setArgFlags(OrigArg, i + 1, DL, F);
+ bool Split = false;
+ LLT Ty = MRI.getType(VRegs[i]);
+ unsigned Dst = VRegs[i];
+
splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
- MIRBuilder.buildSequence(VRegs[i], Regs, Offsets);
+ [&](unsigned Reg, uint64_t Offset) {
+ if (!Split) {
+ Split = true;
+ Dst = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildUndef(Dst);
+ }
+ unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset);
+ Dst = Tmp;
});
+
+ if (Dst != VRegs[i])
+ MIRBuilder.buildCopy(VRegs[i], Dst);
++i;
}
@@ -228,10 +276,25 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CCAssignFn *AssignFn =
TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
- FormalArgHandler Handler(MIRBuilder, MRI);
- if (!handleAssignments(MIRBuilder, AssignFn, SplitArgs, Handler))
+ FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
+ if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
+ if (F.isVarArg()) {
+ if (!MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) {
+ // FIXME: we need to reimplement saveVarArgsRegisters from
+ // AArch64ISelLowering.
+ return false;
+ }
+
+ // We currently pass all varargs at 8-byte alignment.
+ uint64_t StackOffset = alignTo(Handler.StackUsed, 8);
+
+ auto &MFI = MIRBuilder.getMF().getFrameInfo();
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
+ }
+
// Move back to the end of the basic block.
MIRBuilder.setMBB(MBB);
@@ -239,6 +302,7 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
}
bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
+ CallingConv::ID CallConv,
const MachineOperand &Callee,
const ArgInfo &OrigRet,
ArrayRef<ArgInfo> OrigArgs) const {
@@ -250,21 +314,25 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 8> SplitArgs;
for (auto &OrigArg : OrigArgs) {
splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
- MIRBuilder.buildExtract(Regs, Offsets, OrigArg.Reg);
+ [&](unsigned Reg, uint64_t Offset) {
+ MIRBuilder.buildExtract(Reg, OrigArg.Reg, Offset);
});
}
// Find out which ABI gets to decide where things go.
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
- CCAssignFn *CallAssignFn =
- TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
+ CCAssignFn *AssignFnFixed =
+ TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
+ CCAssignFn *AssignFnVarArg =
+ TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/true);
+
+ auto CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
auto MIB = MIRBuilder.buildInstrNoInsert(Callee.isReg() ? AArch64::BLR
: AArch64::BL);
- MIB.addOperand(Callee);
+ MIB.add(Callee);
// Tell the call which registers are clobbered.
auto TRI = MF.getSubtarget().getRegisterInfo();
@@ -272,8 +340,9 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Do the actual argument marshalling.
SmallVector<unsigned, 8> PhysRegs;
- OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
- if (!handleAssignments(MIRBuilder, CallAssignFn, SplitArgs, Handler))
+ OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
+ AssignFnVarArg);
+ if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
// Now we can add the actual call instruction to the correct basic block.
@@ -298,20 +367,23 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<uint64_t, 8> RegOffsets;
SmallVector<unsigned, 8> SplitRegs;
splitToValueTypes(OrigRet, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
- std::copy(Offsets.begin(), Offsets.end(),
- std::back_inserter(RegOffsets));
- std::copy(Regs.begin(), Regs.end(),
- std::back_inserter(SplitRegs));
+ [&](unsigned Reg, uint64_t Offset) {
+ RegOffsets.push_back(Offset);
+ SplitRegs.push_back(Reg);
});
- CallReturnHandler Handler(MIRBuilder, MRI, MIB);
- if (!handleAssignments(MIRBuilder, RetAssignFn, SplitArgs, Handler))
+ CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
+ if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
if (!RegOffsets.empty())
MIRBuilder.buildSequence(OrigRet.Reg, SplitRegs, RegOffsets);
}
+ CallSeqStart.addImm(Handler.StackSize);
+ MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
+ .addImm(Handler.StackSize)
+ .addImm(0);
+
return true;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
index ce6676249df6..d96ce95c4de0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
@@ -1,4 +1,4 @@
-//===-- llvm/lib/Target/AArch64/AArch64CallLowering.h - Call lowering -----===//
+//===--- AArch64CallLowering.h - Call lowering ------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,18 +12,20 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING
-#define LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/ValueTypes.h"
+#include <cstdint>
+#include <functional>
namespace llvm {
class AArch64TargetLowering;
class AArch64CallLowering: public CallLowering {
- public:
+public:
AArch64CallLowering(const AArch64TargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
@@ -32,8 +34,8 @@ class AArch64CallLowering: public CallLowering {
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
- bool lowerCall(MachineIRBuilder &MIRBuilder, const MachineOperand &Callee,
- const ArgInfo &OrigRet,
+ bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
+ const MachineOperand &Callee, const ArgInfo &OrigRet,
ArrayRef<ArgInfo> OrigArgs) const override;
private:
@@ -44,13 +46,14 @@ private:
typedef std::function<void(MachineIRBuilder &, int, CCValAssign &)>
MemHandler;
- typedef std::function<void(ArrayRef<unsigned>, ArrayRef<uint64_t>)>
- SplitArgTy;
+ typedef std::function<void(unsigned, uint64_t)> SplitArgTy;
void splitToValueTypes(const ArgInfo &OrigArgInfo,
SmallVectorImpl<ArgInfo> &SplitArgs,
const DataLayout &DL, MachineRegisterInfo &MRI,
- SplitArgTy SplitArg) const;
+ const SplitArgTy &SplitArg) const;
};
-} // End of namespace llvm;
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING_H
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index 8b186328d125..2dfcd2d1c393 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -265,10 +265,10 @@ void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,
// Change immediate in comparison instruction (ADDS or SUBS).
BuildMI(*MBB, CmpMI, CmpMI->getDebugLoc(), TII->get(Opc))
- .addOperand(CmpMI->getOperand(0))
- .addOperand(CmpMI->getOperand(1))
+ .add(CmpMI->getOperand(0))
+ .add(CmpMI->getOperand(1))
.addImm(Imm)
- .addOperand(CmpMI->getOperand(3));
+ .add(CmpMI->getOperand(3));
CmpMI->eraseFromParent();
// The fact that this comparison was picked ensures that it's related to the
@@ -278,7 +278,7 @@ void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,
// Change condition in branch instruction.
BuildMI(*MBB, BrMI, BrMI.getDebugLoc(), TII->get(AArch64::Bcc))
.addImm(Cmp)
- .addOperand(BrMI.getOperand(1));
+ .add(BrMI.getOperand(1));
BrMI.eraseFromParent();
MBB->updateTerminator();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index da09b36cac9c..00a0111f2bd2 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -594,7 +594,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
// Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
BuildMI(*Head, Head->end(), TermDL, MCID)
.addReg(DestReg, RegState::Define | RegState::Dead)
- .addOperand(HeadCond[2])
+ .add(HeadCond[2])
.addImm(0)
.addImm(0);
// SUBS uses the GPR*sp register classes.
@@ -650,13 +650,12 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
if (CmpMI->getOperand(FirstOp + 1).isReg())
MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
TII->getRegClass(MCID, 1, TRI, *MF));
- MachineInstrBuilder MIB =
- BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
- .addOperand(CmpMI->getOperand(FirstOp)); // Register Rn
+ MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
+ .add(CmpMI->getOperand(FirstOp)); // Register Rn
if (isZBranch)
MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
else
- MIB.addOperand(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
+ MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
MIB.addImm(NZCV).addImm(HeadCmpBBCC);
// If CmpMI was a terminator, we need a new conditional branch to replace it.
@@ -666,7 +665,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
CmpMI->getOpcode() == AArch64::CBNZX;
BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
.addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
- .addOperand(CmpMI->getOperand(1)); // Branch target.
+ .add(CmpMI->getOperand(1)); // Branch target.
}
CmpMI->eraseFromParent();
Head->updateTerminator();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index fe1c0beee0eb..d0c0956b87ca 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -17,6 +17,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
+#include "Utils/AArch64BaseInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -70,9 +71,9 @@ static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
const MachineOperand &MO = OldMI.getOperand(i);
assert(MO.isReg() && MO.getReg());
if (MO.isUse())
- UseMI.addOperand(MO);
+ UseMI.add(MO);
else
- DefMI.addOperand(MO);
+ DefMI.add(MO);
}
}
@@ -112,7 +113,7 @@ static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
// Create the ORR-immediate instruction.
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(AArch64::XZR)
.addImm(Encoding);
@@ -179,7 +180,7 @@ static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
// Create the ORR-immediate instruction.
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(AArch64::XZR)
.addImm(Encoding);
@@ -362,7 +363,7 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(AArch64::XZR)
.addImm(Encoding);
@@ -425,7 +426,7 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
.addImm(Encoding);
transferImpOps(MI, MIB, MIB);
@@ -539,15 +540,15 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
if (Imm != 0) {
unsigned LZ = countLeadingZeros(Imm);
unsigned TZ = countTrailingZeros(Imm);
- Shift = ((63 - LZ) / 16) * 16;
- LastShift = (TZ / 16) * 16;
+ Shift = (TZ / 16) * 16;
+ LastShift = ((63 - LZ) / 16) * 16;
}
unsigned Imm16 = (Imm >> Shift) & Mask;
bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
.addReg(DstReg, RegState::Define |
- getDeadRegState(DstIsDead && Shift == LastShift))
+ getDeadRegState(DstIsDead && Shift == LastShift))
.addImm(Imm16)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
@@ -564,15 +565,15 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
MachineInstrBuilder MIB2;
unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
- while (Shift != LastShift) {
- Shift -= 16;
+ while (Shift < LastShift) {
+ Shift += 16;
Imm16 = (Imm >> Shift) & Mask;
if (Imm16 == (isNeg ? Mask : 0))
continue; // This 16-bit portion is already set correctly.
MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
.addReg(DstReg,
RegState::Define |
- getDeadRegState(DstIsDead && Shift == LastShift))
+ getDeadRegState(DstIsDead && Shift == LastShift))
.addReg(DstReg)
.addImm(Imm16)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
@@ -627,7 +628,7 @@ bool AArch64ExpandPseudo::expandCMP_SWAP(
.addReg(Addr.getReg());
BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
.addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
- .addOperand(Desired)
+ .add(Desired)
.addImm(ExtendImm);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
.addImm(AArch64CC::NE)
@@ -643,9 +644,7 @@ bool AArch64ExpandPseudo::expandCMP_SWAP(
StoreBB->addLiveIn(New.getReg());
addPostLoopLiveIns(StoreBB, LiveRegs);
- BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
- .addOperand(New)
- .addOperand(Addr);
+ BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg).add(New).add(Addr);
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
.addReg(StatusReg, RegState::Kill)
.addMBB(LoadCmpBB);
@@ -710,7 +709,7 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
.addReg(Addr.getReg());
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
.addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
- .addOperand(DesiredLo)
+ .add(DesiredLo)
.addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
.addUse(AArch64::WZR)
@@ -718,7 +717,7 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
.addImm(AArch64CC::EQ);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
.addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
- .addOperand(DesiredHi)
+ .add(DesiredHi)
.addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
.addUse(StatusReg, RegState::Kill)
@@ -738,9 +737,9 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
StoreBB->addLiveIn(NewHi.getReg());
addPostLoopLiveIns(StoreBB, LiveRegs);
BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
- .addOperand(NewLo)
- .addOperand(NewHi)
- .addOperand(Addr);
+ .add(NewLo)
+ .add(NewHi)
+ .add(Addr);
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
.addReg(StatusReg, RegState::Kill)
.addMBB(LoadCmpBB);
@@ -825,8 +824,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(2))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
transferImpOps(MI, MIB1, MIB1);
MI.eraseFromParent();
@@ -842,7 +841,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(DstReg);
if (MO1.isGlobal()) {
@@ -878,19 +877,31 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
unsigned DstReg = MI.getOperand(0).getReg();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
- .addOperand(MI.getOperand(1));
+ .add(MI.getOperand(1));
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(DstReg)
- .addOperand(MI.getOperand(2))
+ .add(MI.getOperand(2))
.addImm(0);
transferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
}
+ case AArch64::MOVbaseTLS: {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ auto SysReg = AArch64SysReg::TPIDR_EL0;
+ MachineFunction *MF = MBB.getParent();
+ if (MF->getTarget().getTargetTriple().isOSFuchsia() &&
+ MF->getTarget().getCodeModel() == CodeModel::Kernel)
+ SysReg = AArch64SysReg::TPIDR_EL1;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
+ .addImm(SysReg);
+ MI.eraseFromParent();
+ return true;
+ }
case AArch64::MOVi32imm:
return expandMOVImm(MBB, MBBI, 32);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index fe2c2d4550a7..4e5e3e43a468 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -15,28 +15,62 @@
#include "AArch64.h"
#include "AArch64CallingConvention.h"
+#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
-#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
namespace {
@@ -50,48 +84,55 @@ class AArch64FastISel final : public FastISel {
} BaseKind;
private:
- BaseKind Kind;
- AArch64_AM::ShiftExtendType ExtType;
+ BaseKind Kind = RegBase;
+ AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
union {
unsigned Reg;
int FI;
} Base;
- unsigned OffsetReg;
- unsigned Shift;
- int64_t Offset;
- const GlobalValue *GV;
+ unsigned OffsetReg = 0;
+ unsigned Shift = 0;
+ int64_t Offset = 0;
+ const GlobalValue *GV = nullptr;
public:
- Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
- OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
+ Address() { Base.Reg = 0; }
+
void setKind(BaseKind K) { Kind = K; }
BaseKind getKind() const { return Kind; }
void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
+
void setReg(unsigned Reg) {
assert(isRegBase() && "Invalid base register access!");
Base.Reg = Reg;
}
+
unsigned getReg() const {
assert(isRegBase() && "Invalid base register access!");
return Base.Reg;
}
+
void setOffsetReg(unsigned Reg) {
OffsetReg = Reg;
}
+
unsigned getOffsetReg() const {
return OffsetReg;
}
+
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
Base.FI = FI;
}
+
unsigned getFI() const {
assert(isFIBase() && "Invalid base frame index access!");
return Base.FI;
}
+
void setOffset(int64_t O) { Offset = O; }
int64_t getOffset() { return Offset; }
void setShift(unsigned S) { Shift = S; }
@@ -417,7 +458,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
// MachO still uses GOT for large code-model accesses, but ELF requires
// movz/movk sequences, which FastISel doesn't handle yet.
- if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
+ if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
return 0;
unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
@@ -531,23 +572,23 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
switch (Opcode) {
default:
break;
- case Instruction::BitCast: {
+ case Instruction::BitCast:
// Look through bitcasts.
return computeAddress(U->getOperand(0), Addr, Ty);
- }
- case Instruction::IntToPtr: {
+
+ case Instruction::IntToPtr:
// Look past no-op inttoptrs.
if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
TLI.getPointerTy(DL))
return computeAddress(U->getOperand(0), Addr, Ty);
break;
- }
- case Instruction::PtrToInt: {
+
+ case Instruction::PtrToInt:
// Look past no-op ptrtoints.
if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return computeAddress(U->getOperand(0), Addr, Ty);
break;
- }
+
case Instruction::GetElementPtr: {
Address SavedAddr = Addr;
uint64_t TmpOffset = Addr.getOffset();
@@ -563,7 +604,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
- for (;;) {
+ while (true) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
@@ -2813,8 +2854,8 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
- assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
- "Unexpected value type.");
+ assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
+ "Unexpected value type.");
unsigned SrcReg = getRegForValue(I->getOperand(0));
if (!SrcReg)
@@ -3106,8 +3147,8 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
return false;
CodeModel::Model CM = TM.getCodeModel();
- // Only support the small and large code model.
- if (CM != CodeModel::Small && CM != CodeModel::Large)
+ // Only support the small-addressing and large code models.
+ if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
return false;
// FIXME: Add large code model support for ELF.
@@ -3158,7 +3199,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Issue the call.
MachineInstrBuilder MIB;
- if (CM == CodeModel::Small) {
+ if (Subtarget->useSmallAddressing()) {
const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
if (Symbol)
@@ -3369,8 +3410,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
MFI.setFrameAddressIsTaken(true);
- const AArch64RegisterInfo *RegInfo =
- static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo());
+ const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -3521,11 +3561,11 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
updateValueMap(II, ResultReg);
return true;
}
- case Intrinsic::trap: {
+ case Intrinsic::trap:
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
.addImm(1);
return true;
- }
+
case Intrinsic::sqrt: {
Type *RetTy = II->getCalledFunction()->getReturnType();
@@ -5092,8 +5132,10 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
}
namespace llvm {
-llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
+
+FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) {
return new AArch64FastISel(FuncInfo, LibInfo);
}
-}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index f5b8c35375f8..550174b22a89 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -90,21 +90,42 @@
#include "AArch64FrameLowering.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <vector>
using namespace llvm;
@@ -245,14 +266,13 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
if (&MF->front() == MBB)
return AArch64::X9;
- const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
- LivePhysRegs LiveRegs(&TRI);
+ const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ LivePhysRegs LiveRegs(TRI);
LiveRegs.addLiveIns(*MBB);
// Mark callee saved registers as used so we will not choose them.
- const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
- const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF);
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(MF);
for (unsigned i = 0; CSRegs[i]; ++i)
LiveRegs.addReg(CSRegs[i]);
@@ -319,7 +339,6 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
-
unsigned NewOpc;
bool NewIsUnscaled = false;
switch (MBBI->getOpcode()) {
@@ -362,7 +381,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
unsigned OpndIdx = 0;
for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
++OpndIdx)
- MIB.addOperand(MBBI->getOperand(OpndIdx));
+ MIB.add(MBBI->getOperand(OpndIdx));
assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
"Unexpected immediate offset in first/last callee-save save/restore "
@@ -863,22 +882,26 @@ static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
static bool produceCompactUnwindFrame(MachineFunction &MF) {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- AttributeSet Attrs = MF.getFunction()->getAttributes();
+ AttributeList Attrs = MF.getFunction()->getAttributes();
return Subtarget.isTargetMachO() &&
!(Subtarget.getTargetLowering()->supportSwiftError() &&
Attrs.hasAttrSomewhere(Attribute::SwiftError));
}
namespace {
+
struct RegPairInfo {
- RegPairInfo() : Reg1(AArch64::NoRegister), Reg2(AArch64::NoRegister) {}
- unsigned Reg1;
- unsigned Reg2;
+ unsigned Reg1 = AArch64::NoRegister;
+ unsigned Reg2 = AArch64::NoRegister;
int FrameIdx;
int Offset;
bool IsGPR;
+
+ RegPairInfo() = default;
+
bool isPaired() const { return Reg2 != AArch64::NoRegister; }
};
+
} // end anonymous namespace
static void computeCalleeSaveRegisterPairs(
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/contrib/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
index d472a54d9543..8b1c9740d2ad 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
+++ b/contrib/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
@@ -16,281 +16,198 @@
#endif
namespace llvm {
-namespace AArch64 {
-
-const uint32_t GPRCoverageData[] = {
- // Classes 0-31
- (1u << AArch64::GPR32allRegClassID) | (1u << AArch64::GPR32RegClassID) |
- (1u << AArch64::GPR32spRegClassID) |
- (1u << AArch64::GPR32commonRegClassID) |
- (1u << AArch64::GPR32sponlyRegClassID) |
- (1u << AArch64::GPR64allRegClassID) | (1u << AArch64::GPR64RegClassID) |
- (1u << AArch64::GPR64spRegClassID) |
- (1u << AArch64::GPR64commonRegClassID) |
- (1u << AArch64::tcGPR64RegClassID) |
- (1u << AArch64::GPR64sponlyRegClassID),
- // Classes 32-63
- 0,
- // FIXME: The entries below this point can be safely removed once this is
- // tablegenerated. It's only needed because of the hardcoded register class
- // limit.
- // Classes 64-96
- 0,
- // Classes 97-128
- 0,
- // Classes 129-160
- 0,
- // Classes 161-192
- 0,
- // Classes 193-224
- 0,
-};
-
-const uint32_t FPRCoverageData[] = {
- // Classes 0-31
- (1u << AArch64::FPR8RegClassID) | (1u << AArch64::FPR16RegClassID) |
- (1u << AArch64::FPR32RegClassID) | (1u << AArch64::FPR64RegClassID) |
- (1u << AArch64::DDRegClassID) | (1u << AArch64::FPR128RegClassID) |
- (1u << AArch64::FPR128_loRegClassID) | (1u << AArch64::DDDRegClassID) |
- (1u << AArch64::DDDDRegClassID),
- // Classes 32-63
- (1u << (AArch64::QQRegClassID - 32)) |
- (1u << (AArch64::QQ_with_qsub0_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQ_with_qsub1_in_FPR128_loRegClassID - 32)) |
- (1u
- << (AArch64::
- QQQ_with_qsub1_in_FPR128_lo_and_QQQ_with_qsub2_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQ_with_qsub0_in_FPR128_lo_and_QQQ_with_qsub2_in_FPR128_loRegClassID -
- 32)) |
- (1u << (AArch64::QQQQRegClassID - 32)) |
- (1u << (AArch64::QQQQ_with_qsub0_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQQ_with_qsub1_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQQ_with_qsub2_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQQ_with_qsub3_in_FPR128_loRegClassID - 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub0_in_FPR128_lo_and_QQQQ_with_qsub1_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub1_in_FPR128_lo_and_QQQQ_with_qsub2_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub2_in_FPR128_lo_and_QQQQ_with_qsub3_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub0_in_FPR128_lo_and_QQQQ_with_qsub2_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub1_in_FPR128_lo_and_QQQQ_with_qsub3_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub0_in_FPR128_lo_and_QQQQ_with_qsub3_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQ_with_qsub0_in_FPR128_lo_and_QQ_with_qsub1_in_FPR128_loRegClassID -
- 32)) |
- (1u << (AArch64::QQQRegClassID - 32)) |
- (1u << (AArch64::QQQ_with_qsub0_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQ_with_qsub1_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQ_with_qsub2_in_FPR128_loRegClassID - 32)) |
- (1u
- << (AArch64::
- QQQ_with_qsub0_in_FPR128_lo_and_QQQ_with_qsub1_in_FPR128_loRegClassID -
- 32)),
- // FIXME: The entries below this point can be safely removed once this
- // is tablegenerated. It's only needed because of the hardcoded register
- // class limit.
- // Classes 64-96
- 0,
- // Classes 97-128
- 0,
- // Classes 129-160
- 0,
- // Classes 161-192
- 0,
- // Classes 193-224
- 0,
-};
-
-const uint32_t CCRCoverageData[] = {
- // Classes 0-31
- 1u << AArch64::CCRRegClassID,
- // Classes 32-63
- 0,
- // FIXME: The entries below this point can be safely removed once this
- // is tablegenerated. It's only needed because of the hardcoded register
- // class limit.
- // Classes 64-96
- 0,
- // Classes 97-128
- 0,
- // Classes 129-160
- 0,
- // Classes 161-192
- 0,
- // Classes 193-224
- 0,
-};
-
-RegisterBank GPRRegBank(AArch64::GPRRegBankID, "GPR", 64, GPRCoverageData);
-RegisterBank FPRRegBank(AArch64::FPRRegBankID, "FPR", 512, FPRCoverageData);
-RegisterBank CCRRegBank(AArch64::CCRRegBankID, "CCR", 32, CCRCoverageData);
-
-RegisterBank *RegBanks[] = {&GPRRegBank, &FPRRegBank, &CCRRegBank};
-
-// PartialMappings.
-enum PartialMappingIdx {
- PMI_None = -1,
- PMI_GPR32 = 1,
- PMI_GPR64,
- PMI_FPR32,
- PMI_FPR64,
- PMI_FPR128,
- PMI_FPR256,
- PMI_FPR512,
- PMI_FirstGPR = PMI_GPR32,
- PMI_LastGPR = PMI_GPR64,
- PMI_FirstFPR = PMI_FPR32,
- PMI_LastFPR = PMI_FPR512,
- PMI_Min = PMI_FirstGPR,
-};
-
-static unsigned getRegBankBaseIdxOffset(unsigned Size) {
- assert(Size && "0-sized type!!");
- // Make anything smaller than 32 gets 32
- Size = ((Size + 31) / 32) * 32;
- // 32 is 0, 64 is 1, 128 is 2, and so on.
- return Log2_32(Size) - /*Log2_32(32)=*/ 5;
-}
-
-RegisterBankInfo::PartialMapping PartMappings[] {
- /* StartIdx, Length, RegBank */
- // 0: GPR 32-bit value.
- {0, 32, GPRRegBank},
- // 1: GPR 64-bit value.
- {0, 64, GPRRegBank},
- // 2: FPR 32-bit value.
- {0, 32, FPRRegBank},
- // 3: FPR 64-bit value.
- {0, 64, FPRRegBank},
- // 4: FPR 128-bit value.
- {0, 128, FPRRegBank},
- // 5: FPR 256-bit value.
- {0, 256, FPRRegBank},
- // 6: FPR 512-bit value.
- {0, 512, FPRRegBank}
-};
-
-enum ValueMappingIdx {
- First3OpsIdx = 0,
- Last3OpsIdx = 18,
- DistanceBetweenRegBanks = 3,
- FirstCrossRegCpyIdx = 21,
- LastCrossRegCpyIdx = 27,
- DistanceBetweenCrossRegCpy = 2
+RegisterBankInfo::PartialMapping AArch64GenRegisterBankInfo::PartMappings[]{
+ /* StartIdx, Length, RegBank */
+ // 0: FPR 32-bit value.
+ {0, 32, AArch64::FPRRegBank},
+ // 1: FPR 64-bit value.
+ {0, 64, AArch64::FPRRegBank},
+ // 2: FPR 128-bit value.
+ {0, 128, AArch64::FPRRegBank},
+ // 3: FPR 256-bit value.
+ {0, 256, AArch64::FPRRegBank},
+ // 4: FPR 512-bit value.
+ {0, 512, AArch64::FPRRegBank},
+ // 5: GPR 32-bit value.
+ {0, 32, AArch64::GPRRegBank},
+ // 6: GPR 64-bit value.
+ {0, 64, AArch64::GPRRegBank},
};
// ValueMappings.
-RegisterBankInfo::ValueMapping ValMappings[]{
+RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{
/* BreakDown, NumBreakDowns */
+ // 0: invalid
+ {nullptr, 0},
// 3-operands instructions (all binary operations should end up with one of
// those mapping).
- // 0: GPR 32-bit value. <-- This must match First3OpsIdx.
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- // 3: GPR 64-bit value.
- {&PartMappings[PMI_GPR64 - PMI_Min], 1},
- {&PartMappings[PMI_GPR64 - PMI_Min], 1},
- {&PartMappings[PMI_GPR64 - PMI_Min], 1},
- // 6: FPR 32-bit value.
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- // 9: FPR 64-bit value.
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- // 12: FPR 128-bit value.
- {&PartMappings[PMI_FPR128 - PMI_Min], 1},
- {&PartMappings[PMI_FPR128 - PMI_Min], 1},
- {&PartMappings[PMI_FPR128 - PMI_Min], 1},
- // 15: FPR 256-bit value.
- {&PartMappings[PMI_FPR256 - PMI_Min], 1},
- {&PartMappings[PMI_FPR256 - PMI_Min], 1},
- {&PartMappings[PMI_FPR256 - PMI_Min], 1},
- // 18: FPR 512-bit value. <-- This must match Last3OpsIdx.
- {&PartMappings[PMI_FPR512 - PMI_Min], 1},
- {&PartMappings[PMI_FPR512 - PMI_Min], 1},
- {&PartMappings[PMI_FPR512 - PMI_Min], 1},
+ // 1: FPR 32-bit value. <-- This must match First3OpsIdx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ // 4: FPR 64-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ // 7: FPR 128-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+ // 10: FPR 256-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+ // 13: FPR 512-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+ // 16: GPR 32-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ // 19: GPR 64-bit value. <-- This must match Last3OpsIdx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
// Cross register bank copies.
- // 21: GPR 32-bit value to FPR 32-bit value. <-- This must match
+ // 22: FPR 32-bit value to GPR 32-bit value. <-- This must match
// FirstCrossRegCpyIdx.
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- // 23: GPR 64-bit value to FPR 64-bit value.
- {&PartMappings[PMI_GPR64 - PMI_Min], 1},
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- // 25: FPR 32-bit value to GPR 32-bit value.
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- // 27: FPR 64-bit value to GPR 64-bit value. <-- This must match
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ // 24: FPR 64-bit value to GPR 64-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+ // 26: FPR 128-bit value to GPR 128-bit value (invalid)
+ {nullptr, 1},
+ {nullptr, 1},
+ // 28: FPR 256-bit value to GPR 256-bit value (invalid)
+ {nullptr, 1},
+ {nullptr, 1},
+ // 30: FPR 512-bit value to GPR 512-bit value (invalid)
+ {nullptr, 1},
+ {nullptr, 1},
+ // 32: GPR 32-bit value to FPR 32-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ // 34: GPR 64-bit value to FPR 64-bit value. <-- This must match
// LastCrossRegCpyIdx.
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- {&PartMappings[PMI_GPR64 - PMI_Min], 1}
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
};
-/// Get the pointer to the ValueMapping representing the RegisterBank
-/// at \p RBIdx with a size of \p Size.
-///
-/// The returned mapping works for instructions with the same kind of
-/// operands for up to 3 operands.
-///
-/// \pre \p RBIdx != PartialMappingIdx::None
+bool AArch64GenRegisterBankInfo::checkPartialMap(unsigned Idx,
+ unsigned ValStartIdx,
+ unsigned ValLength,
+ const RegisterBank &RB) {
+ const PartialMapping &Map = PartMappings[Idx - PartialMappingIdx::PMI_Min];
+ return Map.StartIdx == ValStartIdx && Map.Length == ValLength &&
+ Map.RegBank == &RB;
+}
+
+bool AArch64GenRegisterBankInfo::checkValueMapImpl(unsigned Idx,
+ unsigned FirstInBank,
+ unsigned Size,
+ unsigned Offset) {
+ unsigned PartialMapBaseIdx = Idx - PartialMappingIdx::PMI_Min;
+ const ValueMapping &Map =
+ AArch64GenRegisterBankInfo::getValueMapping((PartialMappingIdx)FirstInBank, Size)[Offset];
+ return Map.BreakDown == &PartMappings[PartialMapBaseIdx] &&
+ Map.NumBreakDowns == 1;
+}
+
+bool AArch64GenRegisterBankInfo::checkPartialMappingIdx(
+ PartialMappingIdx FirstAlias, PartialMappingIdx LastAlias,
+ ArrayRef<PartialMappingIdx> Order) {
+ if (Order.front() != FirstAlias)
+ return false;
+ if (Order.back() != LastAlias)
+ return false;
+ if (Order.front() > Order.back())
+ return false;
+
+ PartialMappingIdx Previous = Order.front();
+ bool First = true;
+ for (const auto &Current : Order) {
+ if (First) {
+ First = false;
+ continue;
+ }
+ if (Previous + 1 != Current)
+ return false;
+ Previous = Current;
+ }
+ return true;
+}
+
+unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
+ unsigned Size) {
+ if (RBIdx == PMI_FirstGPR) {
+ if (Size <= 32)
+ return 0;
+ if (Size <= 64)
+ return 1;
+ return -1;
+ }
+ if (RBIdx == PMI_FirstFPR) {
+ if (Size <= 32)
+ return 0;
+ if (Size <= 64)
+ return 1;
+ if (Size <= 128)
+ return 2;
+ if (Size <= 256)
+ return 3;
+ if (Size <= 512)
+ return 4;
+ return -1;
+ }
+ return -1;
+}
+
const RegisterBankInfo::ValueMapping *
-getValueMapping(PartialMappingIdx RBIdx, unsigned Size) {
+AArch64GenRegisterBankInfo::getValueMapping(PartialMappingIdx RBIdx,
+ unsigned Size) {
assert(RBIdx != PartialMappingIdx::PMI_None && "No mapping needed for that");
- unsigned ValMappingIdx = First3OpsIdx +
- (RBIdx - AArch64::PartialMappingIdx::PMI_Min +
- getRegBankBaseIdxOffset(Size)) *
- ValueMappingIdx::DistanceBetweenRegBanks;
- assert(ValMappingIdx >= AArch64::First3OpsIdx &&
- ValMappingIdx <= AArch64::Last3OpsIdx && "Mapping out of bound");
+ unsigned BaseIdxOffset = getRegBankBaseIdxOffset(RBIdx, Size);
+ if (BaseIdxOffset == -1u)
+ return &ValMappings[InvalidIdx];
+
+ unsigned ValMappingIdx =
+ First3OpsIdx + (RBIdx - PartialMappingIdx::PMI_Min + BaseIdxOffset) *
+ ValueMappingIdx::DistanceBetweenRegBanks;
+ assert(ValMappingIdx >= First3OpsIdx && ValMappingIdx <= Last3OpsIdx &&
+ "Mapping out of bound");
return &ValMappings[ValMappingIdx];
}
-/// Get the pointer to the ValueMapping of the operands of a copy
-/// instruction from a GPR or FPR register to a GPR or FPR register
-/// with a size of \p Size.
-///
-/// If \p DstIsGPR is true, the destination of the copy is on GPR,
-/// otherwise it is on FPR. Same thing for \p SrcIsGPR.
+AArch64GenRegisterBankInfo::PartialMappingIdx
+ AArch64GenRegisterBankInfo::BankIDToCopyMapIdx[]{
+ PMI_None, // CCR
+ PMI_FirstFPR, // FPR
+ PMI_FirstGPR, // GPR
+ };
+
const RegisterBankInfo::ValueMapping *
-getCopyMapping(bool DstIsGPR, bool SrcIsGPR, unsigned Size) {
- PartialMappingIdx DstRBIdx = DstIsGPR ? PMI_FirstGPR : PMI_FirstFPR;
- PartialMappingIdx SrcRBIdx = SrcIsGPR ? PMI_FirstGPR : PMI_FirstFPR;
+AArch64GenRegisterBankInfo::getCopyMapping(unsigned DstBankID,
+ unsigned SrcBankID, unsigned Size) {
+ assert(DstBankID < AArch64::NumRegisterBanks && "Invalid bank ID");
+ assert(SrcBankID < AArch64::NumRegisterBanks && "Invalid bank ID");
+ PartialMappingIdx DstRBIdx = BankIDToCopyMapIdx[DstBankID];
+ PartialMappingIdx SrcRBIdx = BankIDToCopyMapIdx[SrcBankID];
+ assert(DstRBIdx != PMI_None && "No such mapping");
+ assert(SrcRBIdx != PMI_None && "No such mapping");
+
if (DstRBIdx == SrcRBIdx)
return getValueMapping(DstRBIdx, Size);
+
assert(Size <= 64 && "GPR cannot handle that size");
unsigned ValMappingIdx =
FirstCrossRegCpyIdx +
- (DstRBIdx - PMI_Min + getRegBankBaseIdxOffset(Size)) *
+ (DstRBIdx - PMI_Min + getRegBankBaseIdxOffset(DstRBIdx, Size)) *
ValueMappingIdx::DistanceBetweenCrossRegCpy;
- assert(ValMappingIdx >= AArch64::FirstCrossRegCpyIdx &&
- ValMappingIdx <= AArch64::LastCrossRegCpyIdx &&
- "Mapping out of bound");
+ assert(ValMappingIdx >= FirstCrossRegCpyIdx &&
+ ValMappingIdx <= LastCrossRegCpyIdx && "Mapping out of bound");
return &ValMappings[ValMappingIdx];
}
-
-} // End AArch64 namespace.
} // End llvm namespace.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 3099383e5b32..ae01ea477bb9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -328,11 +328,52 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
}
}
+/// \brief Determine whether it is worth it to fold SHL into the addressing
+/// mode.
+static bool isWorthFoldingSHL(SDValue V) {
+ assert(V.getOpcode() == ISD::SHL && "invalid opcode");
+ // It is worth folding logical shift of up to three places.
+ auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ if (!CSD)
+ return false;
+ unsigned ShiftVal = CSD->getZExtValue();
+ if (ShiftVal > 3)
+ return false;
+
+ // Check if this particular node is reused in any non-memory related
+ // operation. If yes, do not try to fold this node into the address
+ // computation, since the computation will be kept.
+ const SDNode *Node = V.getNode();
+ for (SDNode *UI : Node->uses())
+ if (!isa<MemSDNode>(*UI))
+ for (SDNode *UII : UI->uses())
+ if (!isa<MemSDNode>(*UII))
+ return false;
+ return true;
+}
+
/// \brief Determine whether it is worth to fold V into an extended register.
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
- // it hurts if the value is used at least twice, unless we are optimizing
- // for code size.
- return ForCodeSize || V.hasOneUse();
+ // Trivial if we are optimizing for code size or if there is only
+ // one use of the value.
+ if (ForCodeSize || V.hasOneUse())
+ return true;
+ // If a subtarget has a fastpath LSL we can fold a logical shift into
+ // the addressing mode and save a cycle.
+ if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
+ isWorthFoldingSHL(V))
+ return true;
+ if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
+ const SDValue LHS = V.getOperand(0);
+ const SDValue RHS = V.getOperand(1);
+ if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
+ return true;
+ if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
+ return true;
+ }
+
+ // It hurts otherwise, since the value will be reused.
+ return false;
}
/// SelectShiftedRegister - Select a "shifted register" operand. If the value
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 849058bdfbdb..0d3289ac84c3 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -554,8 +555,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setSchedulingPreference(Sched::Hybrid);
- // Enable TBZ/TBNZ
- MaskAndBranchFoldingIsLegal = true;
EnableExtLdPromotion = true;
// Set required alignment.
@@ -793,7 +792,7 @@ EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
/// KnownZero/KnownOne bitsets.
void AArch64TargetLowering::computeKnownBitsForTargetNode(
const SDValue Op, APInt &KnownZero, APInt &KnownOne,
- const SelectionDAG &DAG, unsigned Depth) const {
+ const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
switch (Op.getOpcode()) {
default:
break;
@@ -2113,8 +2112,8 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
Entry.Node = Arg;
Entry.Ty = ArgTy;
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
const char *LibcallName =
@@ -2124,8 +2123,9 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
@@ -2231,19 +2231,13 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
}
static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
- if (N->getOpcode() == ISD::SIGN_EXTEND)
- return true;
- if (isExtendedBUILD_VECTOR(N, DAG, true))
- return true;
- return false;
+ return N->getOpcode() == ISD::SIGN_EXTEND ||
+ isExtendedBUILD_VECTOR(N, DAG, true);
}
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
- if (N->getOpcode() == ISD::ZERO_EXTEND)
- return true;
- if (isExtendedBUILD_VECTOR(N, DAG, false))
- return true;
- return false;
+ return N->getOpcode() == ISD::ZERO_EXTEND ||
+ isExtendedBUILD_VECTOR(N, DAG, false);
}
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
@@ -3578,7 +3572,7 @@ SDValue
AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
- assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
+ assert(Subtarget->useSmallAddressing() &&
"ELF TLS only supported in small memory model");
// Different choices can be made for the maximum size of the TLS area for a
// module. For the small address model, the default TLS size is 16MiB and the
@@ -3679,7 +3673,7 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
if (Subtarget->isTargetDarwin())
return LowerDarwinGlobalTLSAddress(Op, DAG);
- else if (Subtarget->isTargetELF())
+ if (Subtarget->isTargetELF())
return LowerELFGlobalTLSAddress(Op, DAG);
llvm_unreachable("Unexpected platform trying to use TLS");
@@ -4516,7 +4510,12 @@ unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const {
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("sp", AArch64::SP)
+ .Case("x18", AArch64::X18)
+ .Case("w18", AArch64::W18)
.Default(0);
+ if ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
+ !Subtarget->isX18Reserved())
+ Reg = 0;
if (Reg)
return Reg;
report_fatal_error(Twine("Invalid register name \""
@@ -6591,21 +6590,20 @@ FailedModImm:
if (!isConstant && !usesOnlyOneValue) {
SDValue Vec = DAG.getUNDEF(VT);
SDValue Op0 = Op.getOperand(0);
- unsigned ElemSize = VT.getScalarSizeInBits();
unsigned i = 0;
- // For 32 and 64 bit types, use INSERT_SUBREG for lane zero to
+
+ // Use SCALAR_TO_VECTOR for lane zero to
// a) Avoid a RMW dependency on the full vector register, and
// b) Allow the register coalescer to fold away the copy if the
- // value is already in an S or D register.
- // Do not do this for UNDEF/LOAD nodes because we have better patterns
- // for those avoiding the SCALAR_TO_VECTOR/BUILD_VECTOR.
- if (!Op0.isUndef() && Op0.getOpcode() != ISD::LOAD &&
- (ElemSize == 32 || ElemSize == 64)) {
- unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
- MachineSDNode *N =
- DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
- DAG.getTargetConstant(SubIdx, dl, MVT::i32));
- Vec = SDValue(N, 0);
+ // value is already in an S or D register, and we're forced to emit an
+ // INSERT_SUBREG that we can't fold anywhere.
+ //
+ // We also allow types like i8 and i16 which are illegal scalar but legal
+ // vector element types. After type-legalization the inserted value is
+ // extended (i32) and it is safe to cast them to the vector type by ignoring
+ // the upper bits of the lowest lane (e.g. v8i8, v4i16).
+ if (!Op0.isUndef()) {
+ Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
++i;
}
for (; i < NumElts; ++i) {
@@ -7249,6 +7247,33 @@ bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
return NumBits == 32 || NumBits == 64;
}
+/// A helper function for determining the number of interleaved accesses we
+/// will generate when lowering accesses of the given type.
+unsigned
+AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
+ const DataLayout &DL) const {
+ return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
+}
+
+bool AArch64TargetLowering::isLegalInterleavedAccessType(
+ VectorType *VecTy, const DataLayout &DL) const {
+
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
+ unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
+
+ // Ensure the number of vector elements is greater than 1.
+ if (VecTy->getNumElements() < 2)
+ return false;
+
+ // Ensure the element type is legal.
+ if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
+ return false;
+
+ // Ensure the total vector size is 64 or a multiple of 128. Types larger than
+ // 128 will be split into multiple interleaved accesses.
+ return VecSize == 64 || VecSize % 128 == 0;
+}
+
/// \brief Lower an interleaved load into a ldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -7272,12 +7297,15 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
const DataLayout &DL = LI->getModule()->getDataLayout();
VectorType *VecTy = Shuffles[0]->getType();
- unsigned VecSize = DL.getTypeSizeInBits(VecTy);
- // Skip if we do not have NEON and skip illegal vector types.
- if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128))
+ // Skip if we do not have NEON and skip illegal vector types. We can
+ // "legalize" wide vector types into multiple interleaved accesses as long as
+ // the vector types are divisible by 128.
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
return false;
+ unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
+
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
Type *EltTy = VecTy->getVectorElementType();
@@ -7285,6 +7313,25 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
VecTy =
VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
+ IRBuilder<> Builder(LI);
+
+ // The base address of the load.
+ Value *BaseAddr = LI->getPointerOperand();
+
+ if (NumLoads > 1) {
+ // If we're going to generate more than one load, reset the sub-vector type
+ // to something legal.
+ VecTy = VectorType::get(VecTy->getVectorElementType(),
+ VecTy->getVectorNumElements() / NumLoads);
+
+ // We will compute the pointer operand of each load from the original base
+ // address using GEPs. Cast the base address to a pointer to the scalar
+ // element type.
+ BaseAddr = Builder.CreateBitCast(
+ BaseAddr, VecTy->getVectorElementType()->getPointerTo(
+ LI->getPointerAddressSpace()));
+ }
+
Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
Type *Tys[2] = {VecTy, PtrTy};
static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
@@ -7293,39 +7340,49 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
Function *LdNFunc =
Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
- IRBuilder<> Builder(LI);
- Value *Ptr = Builder.CreateBitCast(LI->getPointerOperand(), PtrTy);
+ // Holds sub-vectors extracted from the load intrinsic return values. The
+ // sub-vectors are associated with the shufflevector instructions they will
+ // replace.
+ DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
- CallInst *LdN = Builder.CreateCall(LdNFunc, Ptr, "ldN");
+ for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
- // Replace uses of each shufflevector with the corresponding vector loaded
- // by ldN.
- for (unsigned i = 0; i < Shuffles.size(); i++) {
- ShuffleVectorInst *SVI = Shuffles[i];
- unsigned Index = Indices[i];
+ // If we're generating more than one load, compute the base address of
+ // subsequent loads as an offset from the previous.
+ if (LoadCount > 0)
+ BaseAddr = Builder.CreateConstGEP1_32(
+ BaseAddr, VecTy->getVectorNumElements() * Factor);
- Value *SubVec = Builder.CreateExtractValue(LdN, Index);
+ CallInst *LdN = Builder.CreateCall(
+ LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
- // Convert the integer vector to pointer vector if the element is pointer.
- if (EltTy->isPointerTy())
- SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
+ // Extract and store the sub-vectors returned by the load intrinsic.
+ for (unsigned i = 0; i < Shuffles.size(); i++) {
+ ShuffleVectorInst *SVI = Shuffles[i];
+ unsigned Index = Indices[i];
- SVI->replaceAllUsesWith(SubVec);
- }
+ Value *SubVec = Builder.CreateExtractValue(LdN, Index);
- return true;
-}
+ // Convert the integer vector to pointer vector if the element is pointer.
+ if (EltTy->isPointerTy())
+ SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
-/// \brief Get a mask consisting of sequential integers starting from \p Start.
-///
-/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
-static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
- unsigned NumElts) {
- SmallVector<Constant *, 16> Mask;
- for (unsigned i = 0; i < NumElts; i++)
- Mask.push_back(Builder.getInt32(Start + i));
+ SubVecs[SVI].push_back(SubVec);
+ }
+ }
+
+ // Replace uses of the shufflevector instructions with the sub-vectors
+ // returned by the load intrinsic. If a shufflevector instruction is
+ // associated with more than one sub-vector, those sub-vectors will be
+ // concatenated into a single wide vector.
+ for (ShuffleVectorInst *SVI : Shuffles) {
+ auto &SubVec = SubVecs[SVI];
+ auto *WideVec =
+ SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
+ SVI->replaceAllUsesWith(WideVec);
+ }
- return ConstantVector::get(Mask);
+ return true;
}
/// \brief Lower an interleaved store into a stN intrinsic.
@@ -7369,12 +7426,15 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
- unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
- // Skip if we do not have NEON and skip illegal vector types.
- if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128))
+ // Skip if we do not have NEON and skip illegal vector types. We can
+ // "legalize" wide vector types into multiple interleaved accesses as long as
+ // the vector types are divisible by 128.
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
return false;
+ unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
+
Value *Op0 = SVI->getOperand(0);
Value *Op1 = SVI->getOperand(1);
IRBuilder<> Builder(SI);
@@ -7394,6 +7454,25 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
SubVecTy = VectorType::get(IntTy, LaneLen);
}
+ // The base address of the store.
+ Value *BaseAddr = SI->getPointerOperand();
+
+ if (NumStores > 1) {
+ // If we're going to generate more than one store, reset the lane length
+ // and sub-vector type to something legal.
+ LaneLen /= NumStores;
+ SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);
+
+ // We will compute the pointer operand of each store from the original base
+ // address using GEPs. Cast the base address to a pointer to the scalar
+ // element type.
+ BaseAddr = Builder.CreateBitCast(
+ BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
+ SI->getPointerAddressSpace()));
+ }
+
+ auto Mask = SVI->getShuffleMask();
+
Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
Type *Tys[2] = {SubVecTy, PtrTy};
static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
@@ -7402,34 +7481,43 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
Function *StNFunc =
Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
- SmallVector<Value *, 5> Ops;
+ for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
- // Split the shufflevector operands into sub vectors for the new stN call.
- auto Mask = SVI->getShuffleMask();
- for (unsigned i = 0; i < Factor; i++) {
- if (Mask[i] >= 0) {
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
- } else {
- unsigned StartMask = 0;
- for (unsigned j = 1; j < LaneLen; j++) {
- if (Mask[j*Factor + i] >= 0) {
- StartMask = Mask[j*Factor + i] - j;
- break;
+ SmallVector<Value *, 5> Ops;
+
+ // Split the shufflevector operands into sub vectors for the new stN call.
+ for (unsigned i = 0; i < Factor; i++) {
+ unsigned IdxI = StoreCount * LaneLen * Factor + i;
+ if (Mask[IdxI] >= 0) {
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
+ } else {
+ unsigned StartMask = 0;
+ for (unsigned j = 1; j < LaneLen; j++) {
+ unsigned IdxJ = StoreCount * LaneLen * Factor + j;
+ if (Mask[IdxJ * Factor + IdxI] >= 0) {
+ StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
+ break;
+ }
}
+ // Note: Filling undef gaps with random elements is ok, since
+ // those elements were being written anyway (with undefs).
+ // In the case of all undefs we're defaulting to using elems from 0
+ // Note: StartMask cannot be negative, it's checked in
+ // isReInterleaveMask
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
}
- // Note: If all elements in a chunk are undefs, StartMask=0!
- // Note: Filling undef gaps with random elements is ok, since
- // those elements were being written anyway (with undefs).
- // In the case of all undefs we're defaulting to using elems from 0
- // Note: StartMask cannot be negative, it's checked in isReInterleaveMask
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
}
- }
- Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy));
- Builder.CreateCall(StNFunc, Ops);
+ // If we generating more than one store, we compute the base address of
+ // subsequent stores as an offset from the previous.
+ if (StoreCount > 0)
+ BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);
+
+ Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
+ Builder.CreateCall(StNFunc, Ops);
+ }
return true;
}
@@ -7690,7 +7778,7 @@ SDValue
AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
std::vector<SDNode *> *Created) const {
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
@@ -9267,7 +9355,7 @@ static SDValue performSTORECombine(SDNode *N,
return SDValue();
}
- /// This function handles the log2-shuffle pattern produced by the
+/// This function handles the log2-shuffle pattern produced by the
/// LoopVectorizer for the across vector reduction. It consists of
/// log2(NumVectorElements) steps and, in each step, 2^(s) elements
/// are reduced, where s is an induction variable from 0 to
@@ -10483,9 +10571,9 @@ void AArch64TargetLowering::ReplaceNodeResults(
}
bool AArch64TargetLowering::useLoadStackGuardNode() const {
- if (!Subtarget->isTargetAndroid())
- return true;
- return TargetLowering::useLoadStackGuardNode();
+ if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
+ return TargetLowering::useLoadStackGuardNode();
+ return true;
}
unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
@@ -10623,36 +10711,56 @@ bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
return false;
}
-Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
- if (!Subtarget->isTargetAndroid())
- return TargetLowering::getIRStackGuard(IRB);
-
- // Android provides a fixed TLS slot for the stack cookie. See the definition
- // of TLS_SLOT_STACK_GUARD in
- // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
- const unsigned TlsOffset = 0x28;
+static Value *UseTlsOffset(IRBuilder<> &IRB, unsigned Offset) {
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Function *ThreadPointerFunc =
Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
return IRB.CreatePointerCast(
- IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset),
+ IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), Offset),
Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
}
-Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
- if (!Subtarget->isTargetAndroid())
- return TargetLowering::getSafeStackPointerLocation(IRB);
+Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
+ // Android provides a fixed TLS slot for the stack cookie. See the definition
+ // of TLS_SLOT_STACK_GUARD in
+ // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
+ if (Subtarget->isTargetAndroid())
+ return UseTlsOffset(IRB, 0x28);
+ // Fuchsia is similar.
+ // <magenta/tls.h> defines MX_TLS_STACK_GUARD_OFFSET with this value.
+ if (Subtarget->isTargetFuchsia())
+ return UseTlsOffset(IRB, -0x10);
+
+ return TargetLowering::getIRStackGuard(IRB);
+}
+
+Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
// Android provides a fixed TLS slot for the SafeStack pointer. See the
// definition of TLS_SLOT_SAFESTACK in
// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
- const unsigned TlsOffset = 0x48;
- Module *M = IRB.GetInsertBlock()->getParent()->getParent();
- Function *ThreadPointerFunc =
- Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
- return IRB.CreatePointerCast(
- IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset),
- Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
+ if (Subtarget->isTargetAndroid())
+ return UseTlsOffset(IRB, 0x48);
+
+ // Fuchsia is similar.
+ // <magenta/tls.h> defines MX_TLS_UNSAFE_SP_OFFSET with this value.
+ if (Subtarget->isTargetFuchsia())
+ return UseTlsOffset(IRB, -0x8);
+
+ return TargetLowering::getSafeStackPointerLocation(IRB);
+}
+
+bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
+ const Instruction &AndI) const {
+ // Only sink 'and' mask to cmp use block if it is masking a single bit, since
+ // this is likely to be fold the and/cmp/br into a single tbz instruction. It
+ // may be beneficial to sink in other cases, but we would have to check that
+ // the cmp would not get folded into the br to form a cbz for these to be
+ // beneficial.
+ ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
+ if (!Mask)
+ return false;
+ return Mask->getUniqueInteger().isPowerOf2();
}
void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
@@ -10702,7 +10810,7 @@ void AArch64TargetLowering::insertCopiesSplitCSR(
}
}
-bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
+bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// Integer division on AArch64 is expensive. However, when aggressively
// optimizing for code size, we prefer to use a div instruction, as it is
// usually smaller than the alternative sequence.
@@ -10711,6 +10819,14 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
bool OptSize =
- Attr.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ Attr.hasAttribute(AttributeList::FunctionIndex, Attribute::MinSize);
return OptSize && !VT.isVector();
}
+
+unsigned
+AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
+ if (Subtarget->isTargetDarwin())
+ return getPointerTy(DL).getSizeInBits();
+
+ return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 054ccc31674f..2ad6c8b23df8 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -251,7 +251,8 @@ public:
/// Determine which of the bits specified in Mask are known to be either zero
/// or one and return them in the KnownZero/KnownOne bitsets.
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
- APInt &KnownOne, const SelectionDAG &DAG,
+ APInt &KnownOne, const APInt &DemandedElts,
+ const SelectionDAG &DAG,
unsigned Depth = 0) const override;
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
@@ -402,7 +403,7 @@ public:
return AArch64::X1;
}
- bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
+ bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool isCheapToSpeculateCttz() const override {
return true;
@@ -412,6 +413,8 @@ public:
return true;
}
+ bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+
bool hasAndNotCompare(SDValue) const override {
// 'bics'
return true;
@@ -435,6 +438,20 @@ public:
return true;
}
+ /// Returns the size of the platform's va_list object.
+ unsigned getVaListSizeInBits(const DataLayout &DL) const override;
+
+ /// Returns true if \p VecTy is a legal interleaved access type. This
+ /// function checks the vector element type and the overall width of the
+ /// vector.
+ bool isLegalInterleavedAccessType(VectorType *VecTy,
+ const DataLayout &DL) const;
+
+ /// Returns the number of interleaved accesses that will be generated when
+ /// lowering accesses of the given type.
+ unsigned getNumInterleavedAccesses(VectorType *VecTy,
+ const DataLayout &DL) const;
+
private:
bool isExtFreeImpl(const Instruction *Ext) const override;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index cefdf51b50d2..16be4432b160 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -39,6 +39,9 @@ class AArch64Inst<Format f, string cstr> : Instruction {
let Constraints = cstr;
}
+class InstSubst<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>, Requires<[UseNegativeImmediates]>;
+
// Pseudo instructions (don't have encoding information)
class Pseudo<dag oops, dag iops, list<dag> pattern, string cstr = "">
: AArch64Inst<PseudoFrm, cstr> {
@@ -257,6 +260,7 @@ def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;
class AsmImmRange<int Low, int High> : AsmOperandClass {
let Name = "Imm" # Low # "_" # High;
let DiagnosticType = "InvalidImm" # Low # "_" # High;
+ let PredicateMethod = "isImmInRange<" # Low # "," # High # ">";
}
def Imm1_8Operand : AsmImmRange<1, 8>;
@@ -264,6 +268,20 @@ def Imm1_16Operand : AsmImmRange<1, 16>;
def Imm1_32Operand : AsmImmRange<1, 32>;
def Imm1_64Operand : AsmImmRange<1, 64>;
+class BranchTarget<int N> : AsmOperandClass {
+ let Name = "BranchTarget" # N;
+ let DiagnosticType = "InvalidLabel";
+ let PredicateMethod = "isBranchTarget<" # N # ">";
+}
+
+class PCRelLabel<int N> : BranchTarget<N> {
+ let Name = "PCRelLabel" # N;
+}
+
+def BranchTarget14Operand : BranchTarget<14>;
+def BranchTarget26Operand : BranchTarget<26>;
+def PCRelLabel19Operand : PCRelLabel<19>;
+
def MovZSymbolG3AsmOperand : AsmOperandClass {
let Name = "MovZSymbolG3";
let RenderMethod = "addImmOperands";
@@ -500,7 +518,8 @@ def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
}
// imm0_255 predicate - True if the immediate is in the range [0,255].
-def Imm0_255Operand : AsmOperandClass { let Name = "Imm0_255"; }
+def Imm0_255Operand : AsmImmRange<0,255>;
+
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 256;
}]> {
@@ -673,6 +692,14 @@ def addsub_shifted_imm64 : addsub_shifted_imm<i64>;
def addsub_shifted_imm32_neg : addsub_shifted_imm_neg<i32>;
def addsub_shifted_imm64_neg : addsub_shifted_imm_neg<i64>;
+def gi_addsub_shifted_imm32 :
+ GIComplexOperandMatcher<s32, (ops i32imm, i32imm), "selectArithImmed">,
+ GIComplexPatternEquiv<addsub_shifted_imm32>;
+
+def gi_addsub_shifted_imm64 :
+ GIComplexOperandMatcher<s64, (ops i32imm, i32imm), "selectArithImmed">,
+ GIComplexPatternEquiv<addsub_shifted_imm64>;
+
class neg_addsub_shifted_imm<ValueType Ty>
: Operand<Ty>, ComplexPattern<Ty, 2, "SelectNegArithImmed", [imm]> {
let PrintMethod = "printAddSubImm";
@@ -1094,10 +1121,6 @@ def inv_ccode : Operand<i32> {
// Conditional branch target. 19-bit immediate. The low two bits of the target
// offset are implied zero and so are not part of the immediate.
-def PCRelLabel19Operand : AsmOperandClass {
- let Name = "PCRelLabel19";
- let DiagnosticType = "InvalidLabel";
-}
def am_brcond : Operand<OtherVT> {
let EncoderMethod = "getCondBranchTargetOpValue";
let DecoderMethod = "DecodePCRelLabel19";
@@ -1154,9 +1177,6 @@ multiclass CmpBranch<bit op, string asm, SDNode node> {
//---
// Test-and-branch target. 14-bit sign-extended immediate. The low two bits of
// the target offset are implied zero and so are not part of the immediate.
-def BranchTarget14Operand : AsmOperandClass {
- let Name = "BranchTarget14";
-}
def am_tbrcond : Operand<OtherVT> {
let EncoderMethod = "getTestBranchTargetOpValue";
let PrintMethod = "printAlignedLabel";
@@ -1166,11 +1186,12 @@ def am_tbrcond : Operand<OtherVT> {
// AsmOperand classes to emit (or not) special diagnostics
def TBZImm0_31Operand : AsmOperandClass {
let Name = "TBZImm0_31";
- let PredicateMethod = "isImm0_31";
+ let PredicateMethod = "isImmInRange<0,31>";
let RenderMethod = "addImm0_31Operands";
}
def TBZImm32_63Operand : AsmOperandClass {
let Name = "Imm32_63";
+ let PredicateMethod = "isImmInRange<32,63>";
let DiagnosticType = "InvalidImm0_63";
}
@@ -1232,10 +1253,6 @@ multiclass TestBranch<bit op, string asm, SDNode node> {
//---
// Unconditional branch (immediate) instructions.
//---
-def BranchTarget26Operand : AsmOperandClass {
- let Name = "BranchTarget26";
- let DiagnosticType = "InvalidLabel";
-}
def am_b_target : Operand<OtherVT> {
let EncoderMethod = "getBranchTargetOpValue";
let PrintMethod = "printAlignedLabel";
@@ -1784,10 +1801,10 @@ multiclass AddSub<bit isSub, string mnemonic, string alias,
}
// add Rd, Rb, -imm -> sub Rd, Rn, imm
- def : InstAlias<alias#"\t$Rd, $Rn, $imm",
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn,
addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<alias#"\t$Rd, $Rn, $imm",
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn,
addsub_shifted_imm64_neg:$imm), 0>;
@@ -1859,10 +1876,10 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
} // Defs = [NZCV]
// Support negative immediates, e.g. adds Rd, Rn, -imm -> subs Rd, Rn, imm
- def : InstAlias<alias#"\t$Rd, $Rn, $imm",
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn,
addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<alias#"\t$Rd, $Rn, $imm",
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn,
addsub_shifted_imm64_neg:$imm), 0>;
@@ -1883,9 +1900,9 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>;
// Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm
- def : InstAlias<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri")
+ def : InstSubst<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri")
WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri")
+ def : InstSubst<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri")
XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>;
// Compare shorthands
@@ -2100,10 +2117,10 @@ multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode,
let Inst{31} = 1;
}
- def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn,
logical_imm32_not:$imm), 0>;
- def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn,
logical_imm64_not:$imm), 0>;
}
@@ -2122,10 +2139,10 @@ multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode,
}
} // end Defs = [NZCV]
- def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32:$Rn,
logical_imm32_not:$imm), 0>;
- def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64:$Rn,
logical_imm64_not:$imm), 0>;
}
@@ -2454,7 +2471,7 @@ class PrefetchUI<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat>
// Load literal address: 19-bit immediate. The low two bits of the target
// offset are implied zero and so are not part of the immediate.
-def am_ldrlit : Operand<OtherVT> {
+def am_ldrlit : Operand<iPTR> {
let EncoderMethod = "getLoadLiteralOpValue";
let DecoderMethod = "DecodePCRelLabel19";
let PrintMethod = "printAlignedLabel";
@@ -9060,7 +9077,7 @@ multiclass SIMDLdSt4SingleAliases<string asm> {
// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract
//----------------------------------------------------------------------------
-let Predicates = [HasNEON, HasV8_1a] in {
+let Predicates = [HasNEON, HasRDM] in {
class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
RegisterOperand regtype, string asm,
@@ -9221,7 +9238,7 @@ multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
let Inst{21} = idx{0};
}
}
-} // let Predicates = [HasNeon, HasV8_1a]
+} // let Predicates = [HasNeon, HasRDM]
//----------------------------------------------------------------------------
// Crypto extensions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 4c789926e3e4..41fc8eceab5c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
@@ -369,7 +370,7 @@ void AArch64InstrInfo::instantiateCondBranch(
// Folded compare-and-branch
// Note that we use addOperand instead of addReg to keep the flags.
const MachineInstrBuilder MIB =
- BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
+ BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
if (Cond.size() > 3)
MIB.addImm(Cond[3].getImm());
MIB.addMBB(TBB);
@@ -762,6 +763,17 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
llvm_unreachable("Unknown opcode to check as cheap as a move!");
}
+bool AArch64InstrInfo::isFalkorLSLFast(const MachineInstr &MI) const {
+ if (MI.getNumOperands() < 4)
+ return false;
+ unsigned ShOpVal = MI.getOperand(3).getImm();
+ unsigned ShImm = AArch64_AM::getShiftValue(ShOpVal);
+ if (AArch64_AM::getShiftType(ShOpVal) == AArch64_AM::LSL &&
+ ShImm < 4)
+ return true;
+ return false;
+}
+
bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const {
@@ -1299,16 +1311,16 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addMemOperand(*MI.memoperands_begin());
} else if (TM.getCodeModel() == CodeModel::Large) {
BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
- .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
.addReg(Reg, RegState::Kill)
.addImm(0)
@@ -1345,14 +1357,6 @@ bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
case AArch64::BICSXrs:
case AArch64::BICWrs:
case AArch64::BICXrs:
- case AArch64::CRC32Brr:
- case AArch64::CRC32CBrr:
- case AArch64::CRC32CHrr:
- case AArch64::CRC32CWrr:
- case AArch64::CRC32CXrr:
- case AArch64::CRC32Hrr:
- case AArch64::CRC32Wrr:
- case AArch64::CRC32Xrr:
case AArch64::EONWrs:
case AArch64::EONXrs:
case AArch64::EORWrs:
@@ -1691,16 +1695,59 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
} else
return false;
- // Offset is calculated as the immediate operand multiplied by the scaling factor.
- // Unscaled instructions have scaling factor set to 1.
+ // Get the scaling factor for the instruction and set the width for the
+ // instruction.
unsigned Scale = 0;
- switch (LdSt.getOpcode()) {
+ int64_t Dummy1, Dummy2;
+
+ // If this returns false, then it's an instruction we don't want to handle.
+ if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
+ return false;
+
+ // Compute the offset. Offset is calculated as the immediate operand
+ // multiplied by the scaling factor. Unscaled instructions have scaling factor
+ // set to 1.
+ if (LdSt.getNumExplicitOperands() == 3) {
+ BaseReg = LdSt.getOperand(1).getReg();
+ Offset = LdSt.getOperand(2).getImm() * Scale;
+ } else {
+ assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
+ BaseReg = LdSt.getOperand(2).getReg();
+ Offset = LdSt.getOperand(3).getImm() * Scale;
+ }
+ return true;
+}
+
+MachineOperand&
+AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
+ assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+ MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands()-1);
+ assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
+ return OfsOp;
+}
+
+bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
+ unsigned &Width, int64_t &MinOffset,
+ int64_t &MaxOffset) const {
+ switch (Opcode) {
+ // Not a memory operation or something we want to handle.
default:
+ Scale = Width = 0;
+ MinOffset = MaxOffset = 0;
return false;
+ case AArch64::STRWpost:
+ case AArch64::LDRWpost:
+ Width = 32;
+ Scale = 4;
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
case AArch64::LDURQi:
case AArch64::STURQi:
Width = 16;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDURXi:
case AArch64::LDURDi:
@@ -1708,6 +1755,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STURDi:
Width = 8;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDURWi:
case AArch64::LDURSi:
@@ -1716,6 +1765,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STURSi:
Width = 4;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDURHi:
case AArch64::LDURHHi:
@@ -1725,6 +1776,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STURHHi:
Width = 2;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDURBi:
case AArch64::LDURBBi:
@@ -1734,6 +1787,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STURBBi:
Width = 1;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDPQi:
case AArch64::LDNPQi:
@@ -1741,10 +1796,14 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STNPQi:
Scale = 16;
Width = 32;
+ MinOffset = -64;
+ MaxOffset = 63;
break;
case AArch64::LDRQui:
case AArch64::STRQui:
Scale = Width = 16;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
case AArch64::LDPXi:
case AArch64::LDPDi:
@@ -1756,12 +1815,16 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STNPDi:
Scale = 8;
Width = 16;
+ MinOffset = -64;
+ MaxOffset = 63;
break;
case AArch64::LDRXui:
case AArch64::LDRDui:
case AArch64::STRXui:
case AArch64::STRDui:
Scale = Width = 8;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
case AArch64::LDPWi:
case AArch64::LDPSi:
@@ -1773,6 +1836,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STNPSi:
Scale = 4;
Width = 8;
+ MinOffset = -64;
+ MaxOffset = 63;
break;
case AArch64::LDRWui:
case AArch64::LDRSui:
@@ -1780,29 +1845,27 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STRWui:
case AArch64::STRSui:
Scale = Width = 4;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
case AArch64::LDRHui:
case AArch64::LDRHHui:
case AArch64::STRHui:
case AArch64::STRHHui:
Scale = Width = 2;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
case AArch64::LDRBui:
case AArch64::LDRBBui:
case AArch64::STRBui:
case AArch64::STRBBui:
Scale = Width = 1;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
}
- if (LdSt.getNumExplicitOperands() == 3) {
- BaseReg = LdSt.getOperand(1).getReg();
- Offset = LdSt.getOperand(2).getImm() * Scale;
- } else {
- assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
- BaseReg = LdSt.getOperand(2).getReg();
- Offset = LdSt.getOperand(3).getImm() * Scale;
- }
return true;
}
@@ -1903,88 +1966,6 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
return Offset1 + 1 == Offset2;
}
-bool AArch64InstrInfo::shouldScheduleAdjacent(
- const MachineInstr &First, const MachineInstr &Second) const {
- if (Subtarget.hasArithmeticBccFusion()) {
- // Fuse CMN, CMP, TST followed by Bcc.
- unsigned SecondOpcode = Second.getOpcode();
- if (SecondOpcode == AArch64::Bcc) {
- switch (First.getOpcode()) {
- default:
- return false;
- case AArch64::ADDSWri:
- case AArch64::ADDSWrr:
- case AArch64::ADDSXri:
- case AArch64::ADDSXrr:
- case AArch64::ANDSWri:
- case AArch64::ANDSWrr:
- case AArch64::ANDSXri:
- case AArch64::ANDSXrr:
- case AArch64::SUBSWri:
- case AArch64::SUBSWrr:
- case AArch64::SUBSXri:
- case AArch64::SUBSXrr:
- case AArch64::BICSWrr:
- case AArch64::BICSXrr:
- return true;
- case AArch64::ADDSWrs:
- case AArch64::ADDSXrs:
- case AArch64::ANDSWrs:
- case AArch64::ANDSXrs:
- case AArch64::SUBSWrs:
- case AArch64::SUBSXrs:
- case AArch64::BICSWrs:
- case AArch64::BICSXrs:
- // Shift value can be 0 making these behave like the "rr" variant...
- return !hasShiftedReg(Second);
- }
- }
- }
- if (Subtarget.hasArithmeticCbzFusion()) {
- // Fuse ALU operations followed by CBZ/CBNZ.
- unsigned SecondOpcode = Second.getOpcode();
- if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
- SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
- switch (First.getOpcode()) {
- default:
- return false;
- case AArch64::ADDWri:
- case AArch64::ADDWrr:
- case AArch64::ADDXri:
- case AArch64::ADDXrr:
- case AArch64::ANDWri:
- case AArch64::ANDWrr:
- case AArch64::ANDXri:
- case AArch64::ANDXrr:
- case AArch64::EORWri:
- case AArch64::EORWrr:
- case AArch64::EORXri:
- case AArch64::EORXrr:
- case AArch64::ORRWri:
- case AArch64::ORRWrr:
- case AArch64::ORRXri:
- case AArch64::ORRXrr:
- case AArch64::SUBWri:
- case AArch64::SUBWrr:
- case AArch64::SUBXri:
- case AArch64::SUBXrr:
- return true;
- case AArch64::ADDWrs:
- case AArch64::ADDXrs:
- case AArch64::ANDWrs:
- case AArch64::ANDXrs:
- case AArch64::SUBWrs:
- case AArch64::SUBXrs:
- case AArch64::BICWrs:
- case AArch64::BICXrs:
- // Shift value can be 0 making these behave like the "rr" variant...
- return !hasShiftedReg(Second);
- }
- }
- }
- return false;
-}
-
MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
const MDNode *Expr, const DebugLoc &DL) const {
@@ -3793,7 +3774,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
.addReg(ZeroReg)
- .addOperand(Root.getOperand(2));
+ .add(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
@@ -4286,3 +4267,199 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
{MO_TLS, "aarch64-tls"}};
return makeArrayRef(TargetFlags);
}
+
+unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize,
+ size_t Occurrences,
+ bool CanBeTailCall) const {
+ unsigned NotOutlinedSize = SequenceSize * Occurrences;
+ unsigned OutlinedSize;
+
+ // Is this candidate something we can outline as a tail call?
+ if (CanBeTailCall) {
+ // If yes, then we just outline the sequence and replace each of its
+ // occurrences with a branch instruction.
+ OutlinedSize = SequenceSize + Occurrences;
+ } else {
+ // If no, then we outline the sequence (SequenceSize), add a return (+1),
+ // and replace each occurrence with a save/restore to LR and a call
+ // (3 * Occurrences)
+ OutlinedSize = (SequenceSize + 1) + (3 * Occurrences);
+ }
+
+ // Return the number of instructions saved by outlining this sequence.
+ return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0;
+}
+
+bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
+ return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
+}
+
+AArch64GenInstrInfo::MachineOutlinerInstrType
+AArch64InstrInfo::getOutliningType(MachineInstr &MI) const {
+
+ MachineFunction *MF = MI.getParent()->getParent();
+ AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
+
+ // Don't outline LOHs.
+ if (FuncInfo->getLOHRelated().count(&MI))
+ return MachineOutlinerInstrType::Illegal;
+
+ // Don't allow debug values to impact outlining type.
+ if (MI.isDebugValue() || MI.isIndirectDebugValue())
+ return MachineOutlinerInstrType::Invisible;
+
+ // Is this a terminator for a basic block?
+ if (MI.isTerminator()) {
+
+ // Is this the end of a function?
+ if (MI.getParent()->succ_empty())
+ return MachineOutlinerInstrType::Legal;
+
+ // It's not, so don't outline it.
+ return MachineOutlinerInstrType::Illegal;
+ }
+
+ // Don't outline positions.
+ if (MI.isPosition())
+ return MachineOutlinerInstrType::Illegal;
+
+ // Make sure none of the operands are un-outlinable.
+ for (const MachineOperand &MOP : MI.operands())
+ if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
+ MOP.isTargetIndex())
+ return MachineOutlinerInstrType::Illegal;
+
+ // Don't outline anything that uses the link register.
+ if (MI.modifiesRegister(AArch64::LR, &RI) ||
+ MI.readsRegister(AArch64::LR, &RI))
+ return MachineOutlinerInstrType::Illegal;
+
+ // Does this use the stack?
+ if (MI.modifiesRegister(AArch64::SP, &RI) ||
+ MI.readsRegister(AArch64::SP, &RI)) {
+
+ // Is it a memory operation?
+ if (MI.mayLoadOrStore()) {
+ unsigned Base; // Filled with the base regiser of MI.
+ int64_t Offset; // Filled with the offset of MI.
+ unsigned DummyWidth;
+
+ // Does it allow us to offset the base register and is the base SP?
+ if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
+ Base != AArch64::SP)
+ return MachineOutlinerInstrType::Illegal;
+
+ // Find the minimum/maximum offset for this instruction and check if
+ // fixing it up would be in range.
+ int64_t MinOffset, MaxOffset;
+ unsigned DummyScale;
+ getMemOpInfo(MI.getOpcode(), DummyScale, DummyWidth, MinOffset,
+ MaxOffset);
+
+ // TODO: We should really test what happens if an instruction overflows.
+ // This is tricky to test with IR tests, but when the outliner is moved
+ // to a MIR test, it really ought to be checked.
+ if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset)
+ return MachineOutlinerInstrType::Illegal;
+
+ // It's in range, so we can outline it.
+ return MachineOutlinerInstrType::Legal;
+ }
+
+ // We can't fix it up, so don't outline it.
+ return MachineOutlinerInstrType::Illegal;
+ }
+
+ return MachineOutlinerInstrType::Legal;
+}
+
+void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
+ for (MachineInstr &MI : MBB) {
+ unsigned Base, Width;
+ int64_t Offset;
+
+ // Is this a load or store with an immediate offset with SP as the base?
+ if (!MI.mayLoadOrStore() ||
+ !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
+ Base != AArch64::SP)
+ continue;
+
+ // It is, so we have to fix it up.
+ unsigned Scale;
+ int64_t Dummy1, Dummy2;
+
+ MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
+ assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
+ getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
+ assert(Scale != 0 && "Unexpected opcode!");
+
+ // We've pushed the return address to the stack, so add 16 to the offset.
+ // This is safe, since we already checked if it would overflow when we
+ // checked if this instruction was legal to outline.
+ int64_t NewImm = (Offset + 16)/Scale;
+ StackOffsetOperand.setImm(NewImm);
+ }
+}
+
+void AArch64InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const {
+
+ // If this is a tail call outlined function, then there's already a return.
+ if (IsTailCall)
+ return;
+
+ // It's not a tail call, so we have to insert the return ourselves.
+ MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
+ .addReg(AArch64::LR, RegState::Undef);
+ MBB.insert(MBB.end(), ret);
+
+ // Walk over the basic block and fix up all the stack accesses.
+ fixupPostOutline(MBB);
+}
+
+void AArch64InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const {}
+
+MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
+ Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
+ MachineFunction &MF, bool IsTailCall) const {
+
+ // Are we tail calling?
+ if (IsTailCall) {
+ // If yes, then we can just branch to the label.
+ It = MBB.insert(It,
+ BuildMI(MF, DebugLoc(), get(AArch64::B))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
+ return It;
+ }
+
+ // We're not tail calling, so we have to save LR before the call and restore
+ // it after.
+ MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP)
+ .addImm(-16);
+ It = MBB.insert(It, STRXpre);
+ It++;
+
+ // Insert the call.
+ It = MBB.insert(It,
+ BuildMI(MF, DebugLoc(), get(AArch64::BL))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
+
+ It++;
+
+ // Restore the link register.
+ MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP)
+ .addImm(16);
+ It = MBB.insert(It, LDRXpost);
+
+ return It;
+}
+
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 5037866925d3..bacce441f6c5 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -133,12 +133,19 @@ public:
int64_t &Offset, unsigned &Width,
const TargetRegisterInfo *TRI) const;
+ /// Return the immediate offset of the base register in a load/store \p LdSt.
+ MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const;
+
+ /// \brief Returns true if opcode \p Opc is a memory operation. If it is, set
+ /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
+ ///
+ /// For unscaled instructions, \p Scale is set to 1.
+ bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
+ int64_t &MinOffset, int64_t &MaxOffset) const;
+
bool shouldClusterMemOps(MachineInstr &FirstLdSt, MachineInstr &SecondLdSt,
unsigned NumLoads) const override;
- bool shouldScheduleAdjacent(const MachineInstr &First,
- const MachineInstr &Second) const override;
-
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
uint64_t Offset, const MDNode *Var,
const MDNode *Expr,
@@ -245,7 +252,33 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const override;
+ bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;
+ unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences,
+ bool CanBeTailCall) const override;
+ AArch64GenInstrInfo::MachineOutlinerInstrType
+ getOutliningType(MachineInstr &MI) const override;
+ void insertOutlinerEpilogue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const override;
+ void insertOutlinerPrologue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool isTailCall) const override;
+ MachineBasicBlock::iterator
+ insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &It,
+ MachineFunction &MF,
+ bool IsTailCall) const override;
+ /// Returns true if the instruction has a shift by immediate that can be
+ /// executed in one cycle less.
+ bool isFalkorLSLFast(const MachineInstr &MI) const;
private:
+
+ /// \brief Sets the offsets on outlined instructions in \p MBB which use SP
+ /// so that they will be valid post-outlining.
+ ///
+ /// \param MBB A \p MachineBasicBlock in an outlined function.
+ void fixupPostOutline(MachineBasicBlock &MBB) const;
+
void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL,
MachineBasicBlock *TBB,
ArrayRef<MachineOperand> Cond) const;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 2244baacca17..4449412532f3 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -30,6 +30,8 @@ def HasLSE : Predicate<"Subtarget->hasLSE()">,
AssemblerPredicate<"FeatureLSE", "lse">;
def HasRAS : Predicate<"Subtarget->hasRAS()">,
AssemblerPredicate<"FeatureRAS", "ras">;
+def HasRDM : Predicate<"Subtarget->hasRDM()">,
+ AssemblerPredicate<"FeatureRDM", "rdm">;
def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
@@ -41,6 +43,11 @@ def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def UseAlternateSExtLoadCVTF32
: Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
+def UseNegativeImmediates
+ : Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates",
+ "NegativeImmediates">;
+
+
//===----------------------------------------------------------------------===//
// AArch64-specific DAG Nodes.
//
@@ -424,8 +431,10 @@ def MSRpstateImm1 : MSRpstateImm0_1;
def MSRpstateImm4 : MSRpstateImm0_15;
// The thread pointer (on Linux, at least, where this has been implemented) is
-// TPIDR_EL0.
-def : Pat<(AArch64threadpointer), (MRS 0xde82)>;
+// TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects.
+let hasSideEffects = 0 in
+def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
+ [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[]>;
// The cycle counter PMC register is PMCCNTR_EL0.
let Predicates = [HasPerfMon] in
@@ -574,31 +583,31 @@ def : Pat<(f64 fpimm:$in),
// sequences.
def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
tglobaladdr:$g1, tglobaladdr:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48),
- tglobaladdr:$g2, 32),
- tglobaladdr:$g1, 16),
- tglobaladdr:$g0, 0)>;
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
+ tglobaladdr:$g1, 16),
+ tglobaladdr:$g2, 32),
+ tglobaladdr:$g3, 48)>;
def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
tblockaddress:$g1, tblockaddress:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48),
- tblockaddress:$g2, 32),
- tblockaddress:$g1, 16),
- tblockaddress:$g0, 0)>;
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
+ tblockaddress:$g1, 16),
+ tblockaddress:$g2, 32),
+ tblockaddress:$g3, 48)>;
def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
tconstpool:$g1, tconstpool:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48),
- tconstpool:$g2, 32),
- tconstpool:$g1, 16),
- tconstpool:$g0, 0)>;
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
+ tconstpool:$g1, 16),
+ tconstpool:$g2, 32),
+ tconstpool:$g3, 48)>;
def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
tjumptable:$g1, tjumptable:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g3, 48),
- tjumptable:$g2, 32),
- tjumptable:$g1, 16),
- tjumptable:$g0, 0)>;
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
+ tjumptable:$g1, 16),
+ tjumptable:$g2, 32),
+ tjumptable:$g3, 48)>;
//===----------------------------------------------------------------------===//
@@ -3284,7 +3293,7 @@ defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>
defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
-let Predicates = [HasV8_1a] in {
+let Predicates = [HasRDM] in {
defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
def : Pat<(i32 (int_aarch64_neon_sqadd
@@ -5029,7 +5038,7 @@ class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
0),
dsub)))>,
Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
-
+
def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index b51473524c72..878dac6bff1e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -12,17 +12,19 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
-#include "AArch64InstructionSelector.h"
#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterBankInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
@@ -36,13 +38,61 @@ using namespace llvm;
#error "You shouldn't build this"
#endif
+namespace {
+
+class AArch64InstructionSelector : public InstructionSelector {
+public:
+ AArch64InstructionSelector(const AArch64TargetMachine &TM,
+ const AArch64Subtarget &STI,
+ const AArch64RegisterBankInfo &RBI);
+
+ bool select(MachineInstr &I) const override;
+
+private:
+ /// tblgen-erated 'select' implementation, used as the initial selector for
+ /// the patterns that don't require complex C++.
+ bool selectImpl(MachineInstr &I) const;
+
+ bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
+ MachineRegisterInfo &MRI) const;
+ bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
+ MachineRegisterInfo &MRI) const;
+
+ bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
+ MachineRegisterInfo &MRI) const;
+
+ bool selectArithImmed(MachineOperand &Root, MachineOperand &Result1,
+ MachineOperand &Result2) const;
+
+ const AArch64TargetMachine &TM;
+ const AArch64Subtarget &STI;
+ const AArch64InstrInfo &TII;
+ const AArch64RegisterInfo &TRI;
+ const AArch64RegisterBankInfo &RBI;
+
+// We declare the temporaries used by selectImpl() in the class to minimize the
+// cost of constructing placeholder values.
+#define GET_GLOBALISEL_TEMPORARIES_DECL
+#include "AArch64GenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_DECL
+};
+
+} // end anonymous namespace
+
+#define GET_GLOBALISEL_IMPL
#include "AArch64GenGlobalISel.inc"
+#undef GET_GLOBALISEL_IMPL
AArch64InstructionSelector::AArch64InstructionSelector(
const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI)
- : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), RBI(RBI) {}
+ : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
+ TRI(*STI.getRegisterInfo()), RBI(RBI)
+#define GET_GLOBALISEL_TEMPORARIES_INIT
+#include "AArch64GenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_INIT
+{
+}
// FIXME: This should be target-independent, inferred from the types declared
// for each class in the bank.
@@ -119,67 +169,34 @@ static bool unsupportedBinOp(const MachineInstr &I,
}
/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
-/// (such as G_OR or G_ADD), appropriate for the register bank \p RegBankID
+/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
/// and of size \p OpSize.
/// \returns \p GenericOpc if the combination is unsupported.
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
unsigned OpSize) {
switch (RegBankID) {
case AArch64::GPRRegBankID:
- if (OpSize <= 32) {
- assert((OpSize == 32 || (GenericOpc != TargetOpcode::G_SDIV &&
- GenericOpc != TargetOpcode::G_UDIV &&
- GenericOpc != TargetOpcode::G_LSHR &&
- GenericOpc != TargetOpcode::G_ASHR)) &&
- "operation should have been legalized before now");
-
+ if (OpSize == 32) {
switch (GenericOpc) {
- case TargetOpcode::G_OR:
- return AArch64::ORRWrr;
- case TargetOpcode::G_XOR:
- return AArch64::EORWrr;
- case TargetOpcode::G_AND:
- return AArch64::ANDWrr;
- case TargetOpcode::G_ADD:
- assert(OpSize != 32 && "s32 G_ADD should have been selected");
- return AArch64::ADDWrr;
- case TargetOpcode::G_SUB:
- return AArch64::SUBWrr;
case TargetOpcode::G_SHL:
return AArch64::LSLVWr;
case TargetOpcode::G_LSHR:
return AArch64::LSRVWr;
case TargetOpcode::G_ASHR:
return AArch64::ASRVWr;
- case TargetOpcode::G_SDIV:
- return AArch64::SDIVWr;
- case TargetOpcode::G_UDIV:
- return AArch64::UDIVWr;
default:
return GenericOpc;
}
} else if (OpSize == 64) {
switch (GenericOpc) {
- case TargetOpcode::G_OR:
- return AArch64::ORRXrr;
- case TargetOpcode::G_XOR:
- return AArch64::EORXrr;
- case TargetOpcode::G_AND:
- return AArch64::ANDXrr;
case TargetOpcode::G_GEP:
return AArch64::ADDXrr;
- case TargetOpcode::G_SUB:
- return AArch64::SUBXrr;
case TargetOpcode::G_SHL:
return AArch64::LSLVXr;
case TargetOpcode::G_LSHR:
return AArch64::LSRVXr;
case TargetOpcode::G_ASHR:
return AArch64::ASRVXr;
- case TargetOpcode::G_SDIV:
- return AArch64::SDIVXr;
- case TargetOpcode::G_UDIV:
- return AArch64::UDIVXr;
default:
return GenericOpc;
}
@@ -473,6 +490,82 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
}
}
+bool AArch64InstructionSelector::selectCompareBranch(
+ MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+
+ const unsigned CondReg = I.getOperand(0).getReg();
+ MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+ MachineInstr *CCMI = MRI.getVRegDef(CondReg);
+ if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
+ return false;
+
+ unsigned LHS = CCMI->getOperand(2).getReg();
+ unsigned RHS = CCMI->getOperand(3).getReg();
+ if (!getConstantVRegVal(RHS, MRI))
+ std::swap(RHS, LHS);
+
+ const auto RHSImm = getConstantVRegVal(RHS, MRI);
+ if (!RHSImm || *RHSImm != 0)
+ return false;
+
+ const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
+ if (RB.getID() != AArch64::GPRRegBankID)
+ return false;
+
+ const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
+ if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
+ return false;
+
+ const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
+ unsigned CBOpc = 0;
+ if (CmpWidth <= 32)
+ CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
+ else if (CmpWidth == 64)
+ CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
+ else
+ return false;
+
+ auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
+ .addUse(LHS)
+ .addMBB(DestMBB);
+
+ constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectVaStartAAPCS(
+ MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+ return false;
+}
+
+bool AArch64InstructionSelector::selectVaStartDarwin(
+ MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ unsigned ListReg = I.getOperand(0).getReg();
+
+ unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+
+ auto MIB =
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
+ .addDef(ArgsAddrReg)
+ .addFrameIndex(FuncInfo->getVarArgsStackIndex())
+ .addImm(0)
+ .addImm(0);
+
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+
+ MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
+ .addUse(ArgsAddrReg)
+ .addUse(ListReg)
+ .addImm(0)
+ .addMemOperand(*I.memoperands_begin());
+
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
bool AArch64InstructionSelector::select(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -549,6 +642,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
const unsigned CondReg = I.getOperand(0).getReg();
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+ if (selectCompareBranch(I, MF, MRI))
+ return true;
+
auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
.addUse(CondReg)
.addImm(/*bit offset=*/0)
@@ -558,6 +654,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
}
+ case TargetOpcode::G_BRINDIRECT: {
+ I.setDesc(TII.get(AArch64::BR));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_CONSTANT: {
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
@@ -629,9 +730,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
// FIXME: Is going through int64_t always correct?
ImmOp.ChangeToImmediate(
ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
- } else {
+ } else if (I.getOperand(1).isCImm()) {
uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
I.getOperand(1).ChangeToImmediate(Val);
+ } else if (I.getOperand(1).isImm()) {
+ uint64_t Val = I.getOperand(1).getImm();
+ I.getOperand(1).ChangeToImmediate(Val);
}
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
@@ -686,10 +790,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
return false;
}
-#ifndef NDEBUG
- // Sanity-check the pointer register.
+ auto &MemOp = **I.memoperands_begin();
+ if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
+ DEBUG(dbgs() << "Atomic load/store not supported yet\n");
+ return false;
+ }
+
const unsigned PtrReg = I.getOperand(1).getReg();
+#ifndef NDEBUG
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
+ // Sanity-check the pointer register.
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
"Load/Store pointer operand isn't a GPR");
assert(MRI.getType(PtrReg).isPointer() &&
@@ -706,11 +816,46 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
I.setDesc(TII.get(NewOpc));
- I.addOperand(MachineOperand::CreateImm(0));
+ uint64_t Offset = 0;
+ auto *PtrMI = MRI.getVRegDef(PtrReg);
+
+ // Try to fold a GEP into our unsigned immediate addressing mode.
+ if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
+ if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
+ int64_t Imm = *COff;
+ const unsigned Size = MemTy.getSizeInBits() / 8;
+ const unsigned Scale = Log2_32(Size);
+ if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
+ unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
+ I.getOperand(1).setReg(Ptr2Reg);
+ PtrMI = MRI.getVRegDef(Ptr2Reg);
+ Offset = Imm / Size;
+ }
+ }
+ }
+
+ // If we haven't folded anything into our addressing mode yet, try to fold
+ // a frame index into the base+offset.
+ if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
+ I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
+
+ I.addOperand(MachineOperand::CreateImm(Offset));
+
+ // If we're storing a 0, use WZR/XZR.
+ if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
+ if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
+ if (I.getOpcode() == AArch64::STRWui)
+ I.getOperand(0).setReg(AArch64::WZR);
+ else if (I.getOpcode() == AArch64::STRXui)
+ I.getOperand(0).setReg(AArch64::XZR);
+ }
+ }
+
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- case TargetOpcode::G_MUL: {
+ case TargetOpcode::G_SMULH:
+ case TargetOpcode::G_UMULH: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
@@ -719,48 +864,33 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
if (RB.getID() != AArch64::GPRRegBankID) {
- DEBUG(dbgs() << "G_MUL on bank: " << RB << ", expected: GPR\n");
+ DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
return false;
}
- unsigned ZeroReg;
- unsigned NewOpc;
- if (Ty.isScalar() && Ty.getSizeInBits() <= 32) {
- NewOpc = AArch64::MADDWrrr;
- ZeroReg = AArch64::WZR;
- } else if (Ty == LLT::scalar(64)) {
- NewOpc = AArch64::MADDXrrr;
- ZeroReg = AArch64::XZR;
- } else {
- DEBUG(dbgs() << "G_MUL has type: " << Ty << ", expected: "
- << LLT::scalar(32) << " or " << LLT::scalar(64) << '\n');
+ if (Ty != LLT::scalar(64)) {
+ DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
+ << ", expected: " << LLT::scalar(64) << '\n');
return false;
}
+ unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
+ : AArch64::UMULHrr;
I.setDesc(TII.get(NewOpc));
- I.addOperand(MachineOperand::CreateReg(ZeroReg, /*isDef=*/false));
-
// Now that we selected an opcode, we need to constrain the register
// operands to use appropriate classes.
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
-
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_OR:
- case TargetOpcode::G_XOR:
- case TargetOpcode::G_AND:
case TargetOpcode::G_SHL:
case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR:
- case TargetOpcode::G_SDIV:
- case TargetOpcode::G_UDIV:
- case TargetOpcode::G_ADD:
- case TargetOpcode::G_SUB:
case TargetOpcode::G_GEP: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
@@ -783,6 +913,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+ case TargetOpcode::G_PTR_MASK: {
+ uint64_t Align = I.getOperand(2).getImm();
+ if (Align >= 64 || Align == 0)
+ return false;
+
+ uint64_t Mask = ~((1ULL << Align) - 1);
+ I.setDesc(TII.get(AArch64::ANDXri));
+ I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
+
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
case TargetOpcode::G_PTRTOINT:
case TargetOpcode::G_TRUNC: {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
@@ -1026,7 +1167,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
if (Ty == LLT::scalar(32)) {
CSelOpc = AArch64::CSELWr;
- } else if (Ty == LLT::scalar(64)) {
+ } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
CSelOpc = AArch64::CSELXr;
} else {
return false;
@@ -1134,7 +1275,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
.addDef(Def1Reg)
.addUse(AArch64::WZR)
.addUse(AArch64::WZR)
- .addImm(CC1);
+ .addImm(getInvertedCondCode(CC1));
if (CC2 != AArch64CC::AL) {
unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
@@ -1143,7 +1284,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
.addDef(Def2Reg)
.addUse(AArch64::WZR)
.addUse(AArch64::WZR)
- .addImm(CC2);
+ .addImm(getInvertedCondCode(CC2));
MachineInstr &OrMI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
.addDef(DefReg)
@@ -1159,7 +1300,69 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
I.eraseFromParent();
return true;
}
+ case TargetOpcode::G_VASTART:
+ return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
+ : selectVaStartAAPCS(I, MF, MRI);
}
return false;
}
+
+/// SelectArithImmed - Select an immediate value that can be represented as
+/// a 12-bit value shifted left by either 0 or 12. If so, return true with
+/// Val set to the 12-bit value and Shift set to the shifter operand.
+bool AArch64InstructionSelector::selectArithImmed(
+ MachineOperand &Root, MachineOperand &Result1,
+ MachineOperand &Result2) const {
+ MachineInstr &MI = *Root.getParent();
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // This function is called from the addsub_shifted_imm ComplexPattern,
+ // which lists [imm] as the list of opcode it's interested in, however
+ // we still need to check whether the operand is actually an immediate
+ // here because the ComplexPattern opcode list is only used in
+ // root-level opcode matching.
+ uint64_t Immed;
+ if (Root.isImm())
+ Immed = Root.getImm();
+ else if (Root.isCImm())
+ Immed = Root.getCImm()->getZExtValue();
+ else if (Root.isReg()) {
+ MachineInstr *Def = MRI.getVRegDef(Root.getReg());
+ if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
+ return false;
+ MachineOperand &Op1 = Def->getOperand(1);
+ if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
+ return false;
+ Immed = Op1.getCImm()->getZExtValue();
+ } else
+ return false;
+
+ unsigned ShiftAmt;
+
+ if (Immed >> 12 == 0) {
+ ShiftAmt = 0;
+ } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
+ ShiftAmt = 12;
+ Immed = Immed >> 12;
+ } else
+ return false;
+
+ unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
+ Result1.ChangeToImmediate(Immed);
+ Result1.clearParent();
+ Result2.ChangeToImmediate(ShVal);
+ Result2.clearParent();
+ return true;
+}
+
+namespace llvm {
+InstructionSelector *
+createAArch64InstructionSelector(const AArch64TargetMachine &TM,
+ AArch64Subtarget &Subtarget,
+ AArch64RegisterBankInfo &RBI) {
+ return new AArch64InstructionSelector(TM, Subtarget, RBI);
+}
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h
deleted file mode 100644
index 2c6e5a912fb7..000000000000
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===- AArch64InstructionSelector --------------------------------*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file declares the targeting of the InstructionSelector class for
-/// AArch64.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
-#define LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
-
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-
-namespace llvm {
-
-class AArch64InstrInfo;
-class AArch64RegisterBankInfo;
-class AArch64RegisterInfo;
-class AArch64Subtarget;
-class AArch64TargetMachine;
-
-class AArch64InstructionSelector : public InstructionSelector {
-public:
- AArch64InstructionSelector(const AArch64TargetMachine &TM,
- const AArch64Subtarget &STI,
- const AArch64RegisterBankInfo &RBI);
-
- bool select(MachineInstr &I) const override;
-
-private:
- /// tblgen-erated 'select' implementation, used as the initial selector for
- /// the patterns that don't require complex C++.
- bool selectImpl(MachineInstr &I) const;
-
- const AArch64TargetMachine &TM;
- const AArch64Subtarget &STI;
- const AArch64InstrInfo &TII;
- const AArch64RegisterInfo &TRI;
- const AArch64RegisterBankInfo &RBI;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 83f276a8161b..6e6daf812295 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -13,7 +13,10 @@
//===----------------------------------------------------------------------===//
#include "AArch64LegalizerInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Target/TargetOpcodes.h"
@@ -36,11 +39,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
const LLT v4s32 = LLT::vector(4, 32);
const LLT v2s64 = LLT::vector(2, 64);
- for (auto BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) {
+ for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) {
// These operations naturally get the right answer when used on
// GPR32, even if the actual type is narrower.
- for (auto Ty : {s1, s8, s16, s32, s64, v2s32, v4s32, v2s64})
+ for (auto Ty : {s32, s64, v2s32, v4s32, v2s64})
setAction({BinOp, Ty}, Legal);
+
+ for (auto Ty : {s1, s8, s16})
+ setAction({BinOp, Ty}, WidenScalar);
}
setAction({G_GEP, p0}, Legal);
@@ -49,7 +55,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
for (auto Ty : {s1, s8, s16, s32})
setAction({G_GEP, 1, Ty}, WidenScalar);
- for (auto BinOp : {G_LSHR, G_ASHR, G_SDIV, G_UDIV}) {
+ setAction({G_PTR_MASK, p0}, Legal);
+
+ for (unsigned BinOp : {G_LSHR, G_ASHR, G_SDIV, G_UDIV}) {
for (auto Ty : {s32, s64})
setAction({BinOp, Ty}, Legal);
@@ -57,25 +65,41 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({BinOp, Ty}, WidenScalar);
}
- for (auto BinOp : { G_SREM, G_UREM })
+ for (unsigned BinOp : {G_SREM, G_UREM})
for (auto Ty : { s1, s8, s16, s32, s64 })
setAction({BinOp, Ty}, Lower);
- for (auto Op : { G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_SMULO, G_UMULO }) {
+ for (unsigned Op : {G_SMULO, G_UMULO})
+ setAction({Op, s64}, Lower);
+
+ for (unsigned Op : {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_SMULH, G_UMULH}) {
for (auto Ty : { s32, s64 })
setAction({Op, Ty}, Legal);
setAction({Op, 1, s1}, Legal);
}
- for (auto BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
+ for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
for (auto Ty : {s32, s64})
setAction({BinOp, Ty}, Legal);
- setAction({G_FREM, s32}, Libcall);
- setAction({G_FREM, s64}, Libcall);
+ for (unsigned BinOp : {G_FREM, G_FPOW}) {
+ setAction({BinOp, s32}, Libcall);
+ setAction({BinOp, s64}, Libcall);
+ }
- for (auto MemOp : {G_LOAD, G_STORE}) {
+ for (auto Ty : {s32, s64, p0}) {
+ setAction({G_INSERT, Ty}, Legal);
+ setAction({G_INSERT, 1, Ty}, Legal);
+ }
+ for (auto Ty : {s1, s8, s16}) {
+ setAction({G_INSERT, Ty}, WidenScalar);
+ setAction({G_INSERT, 1, Ty}, Legal);
+ // FIXME: Can't widen the sources because that violates the constraints on
+ // G_INSERT (It seems entirely reasonable that inputs shouldn't overlap).
+ }
+
+ for (unsigned MemOp : {G_LOAD, G_STORE}) {
for (auto Ty : {s8, s16, s32, s64, p0, v2s32})
setAction({MemOp, Ty}, Legal);
@@ -141,12 +165,18 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_TRUNC, 1, Ty}, Legal);
// Conversions
- for (auto Ty : { s1, s8, s16, s32, s64 }) {
+ for (auto Ty : { s32, s64 }) {
setAction({G_FPTOSI, 0, Ty}, Legal);
setAction({G_FPTOUI, 0, Ty}, Legal);
setAction({G_SITOFP, 1, Ty}, Legal);
setAction({G_UITOFP, 1, Ty}, Legal);
}
+ for (auto Ty : { s1, s8, s16 }) {
+ setAction({G_FPTOSI, 0, Ty}, WidenScalar);
+ setAction({G_FPTOUI, 0, Ty}, WidenScalar);
+ setAction({G_SITOFP, 1, Ty}, WidenScalar);
+ setAction({G_UITOFP, 1, Ty}, WidenScalar);
+ }
for (auto Ty : { s32, s64 }) {
setAction({G_FPTOSI, 1, Ty}, Legal);
@@ -158,9 +188,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
// Control-flow
for (auto Ty : {s1, s8, s16, s32})
setAction({G_BRCOND, Ty}, Legal);
+ setAction({G_BRINDIRECT, p0}, Legal);
// Select
- for (auto Ty : {s1, s8, s16, s32, s64})
+ for (auto Ty : {s1, s8, s16})
+ setAction({G_SELECT, Ty}, WidenScalar);
+
+ for (auto Ty : {s32, s64, p0})
setAction({G_SELECT, Ty}, Legal);
setAction({G_SELECT, 1, s1}, Legal);
@@ -200,5 +234,82 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_BITCAST, 1, LLT::vector(32/EltSize, EltSize)}, Legal);
}
+ setAction({G_VASTART, p0}, Legal);
+
+ // va_list must be a pointer, but most sized types are pretty easy to handle
+ // as the destination.
+ setAction({G_VAARG, 1, p0}, Legal);
+
+ for (auto Ty : {s8, s16, s32, s64, p0})
+ setAction({G_VAARG, Ty}, Custom);
+
computeTables();
}
+
+bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ switch (MI.getOpcode()) {
+ default:
+ // No idea what to do.
+ return false;
+ case TargetOpcode::G_VAARG:
+ return legalizeVaArg(MI, MRI, MIRBuilder);
+ }
+
+ llvm_unreachable("expected switch to return");
+}
+
+bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ MIRBuilder.setInstr(MI);
+ MachineFunction &MF = MIRBuilder.getMF();
+ unsigned Align = MI.getOperand(2).getImm();
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned ListPtr = MI.getOperand(1).getReg();
+
+ LLT PtrTy = MRI.getType(ListPtr);
+ LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
+
+ const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
+ unsigned List = MRI.createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildLoad(
+ List, ListPtr,
+ *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
+ PtrSize, /* Align = */ PtrSize));
+
+ unsigned DstPtr;
+ if (Align > PtrSize) {
+ // Realign the list to the actual required alignment.
+ unsigned AlignMinus1 = MRI.createGenericVirtualRegister(IntPtrTy);
+ MIRBuilder.buildConstant(AlignMinus1, Align - 1);
+
+ unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildGEP(ListTmp, List, AlignMinus1);
+
+ DstPtr = MRI.createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
+ } else
+ DstPtr = List;
+
+ uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
+ MIRBuilder.buildLoad(
+ Dst, DstPtr,
+ *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
+ ValSize, std::max(Align, PtrSize)));
+
+ unsigned SizeReg = MRI.createGenericVirtualRegister(IntPtrTy);
+ MIRBuilder.buildConstant(SizeReg, alignTo(ValSize, PtrSize));
+
+ unsigned NewList = MRI.createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildGEP(NewList, DstPtr, SizeReg);
+
+ MIRBuilder.buildStore(
+ NewList, ListPtr,
+ *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
+ PtrSize, /* Align = */ PtrSize));
+
+ MI.eraseFromParent();
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
index feacbef9f147..42d4ac130c5c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
@@ -25,6 +25,13 @@ class LLVMContext;
class AArch64LegalizerInfo : public LegalizerInfo {
public:
AArch64LegalizerInfo();
+
+ bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const override;
+
+private:
+ bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
};
} // End llvm namespace.
#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 8e312dcf276f..976498aa70d6 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -16,19 +16,29 @@
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+
using namespace llvm;
#define DEBUG_TYPE "aarch64-ldst-opt"
@@ -58,15 +68,15 @@ typedef struct LdStPairFlags {
// If a matching instruction is found, MergeForward is set to true if the
// merge is to remove the first instruction and replace the second with
// a pair-wise insn, and false if the reverse is true.
- bool MergeForward;
+ bool MergeForward = false;
// SExtIdx gives the index of the result of the load pair that must be
// extended. The value of SExtIdx assumes that the paired load produces the
// value in this order: (I, returned iterator), i.e., -1 means no value has
// to be extended, 0 means I, and 1 means the returned iterator.
- int SExtIdx;
+ int SExtIdx = -1;
- LdStPairFlags() : MergeForward(false), SExtIdx(-1) {}
+ LdStPairFlags() = default;
void setMergeForward(bool V = true) { MergeForward = V; }
bool getMergeForward() const { return MergeForward; }
@@ -78,10 +88,12 @@ typedef struct LdStPairFlags {
struct AArch64LoadStoreOpt : public MachineFunctionPass {
static char ID;
+
AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry());
}
+ AliasAnalysis *AA;
const AArch64InstrInfo *TII;
const TargetRegisterInfo *TRI;
const AArch64Subtarget *Subtarget;
@@ -89,6 +101,11 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Track which registers have been modified and used.
BitVector ModifiedRegs, UsedRegs;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
// Scan the instructions looking for a load/store that can be combined
// with the current instruction into a load/store pair.
// Return the matching instruction if one is found, else MBB->end().
@@ -162,8 +179,10 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
};
+
char AArch64LoadStoreOpt::ID = 0;
-} // namespace
+
+} // end anonymous namespace
INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
AARCH64_LOAD_STORE_OPT_NAME, false, false)
@@ -246,7 +265,7 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
default:
if (IsValidLdStrOpc)
*IsValidLdStrOpc = false;
- return UINT_MAX;
+ return std::numeric_limits<unsigned>::max();
case AArch64::STRDui:
case AArch64::STURDi:
case AArch64::STRQui:
@@ -595,7 +614,7 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
MachineInstrBuilder MIB;
MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
.addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
- .addOperand(BaseRegOp)
+ .add(BaseRegOp)
.addImm(OffsetImm)
.setMemRefs(I->mergeMemRefsWith(*MergeMI));
(void)MIB;
@@ -709,9 +728,9 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
}
}
MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc)))
- .addOperand(RegOp0)
- .addOperand(RegOp1)
- .addOperand(BaseRegOp)
+ .add(RegOp0)
+ .add(RegOp1)
+ .add(BaseRegOp)
.addImm(OffsetImm)
.setMemRefs(I->mergeMemRefsWith(*Paired));
@@ -923,7 +942,7 @@ static int alignTo(int Num, int PowOf2) {
}
static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb,
- const AArch64InstrInfo *TII) {
+ AliasAnalysis *AA) {
// One of the instructions must modify memory.
if (!MIa.mayStore() && !MIb.mayStore())
return false;
@@ -932,14 +951,14 @@ static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb,
if (!MIa.mayLoadOrStore() && !MIb.mayLoadOrStore())
return false;
- return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb);
+ return MIa.mayAlias(AA, MIb, /*UseTBAA*/false);
}
static bool mayAlias(MachineInstr &MIa,
SmallVectorImpl<MachineInstr *> &MemInsns,
- const AArch64InstrInfo *TII) {
+ AliasAnalysis *AA) {
for (MachineInstr *MIb : MemInsns)
- if (mayAlias(MIa, *MIb, TII))
+ if (mayAlias(MIa, *MIb, AA))
return true;
return false;
@@ -997,7 +1016,7 @@ bool AArch64LoadStoreOpt::findMatchingStore(
return false;
// If we encounter a store aliased with the load, return early.
- if (MI.mayStore() && mayAlias(LoadMI, MI, TII))
+ if (MI.mayStore() && mayAlias(LoadMI, MI, AA))
return false;
} while (MBBI != B && Count < Limit);
return false;
@@ -1167,7 +1186,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// first.
if (!ModifiedRegs[getLdStRegOp(MI).getReg()] &&
!(MI.mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) &&
- !mayAlias(MI, MemInsns, TII)) {
+ !mayAlias(MI, MemInsns, AA)) {
Flags.setMergeForward(false);
return MBBI;
}
@@ -1178,7 +1197,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// into the second.
if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] &&
!(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) &&
- !mayAlias(FirstMI, MemInsns, TII)) {
+ !mayAlias(FirstMI, MemInsns, AA)) {
Flags.setMergeForward(true);
return MBBI;
}
@@ -1233,19 +1252,19 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
if (!isPairedLdSt(*I)) {
// Non-paired instruction.
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(getLdStRegOp(*Update))
- .addOperand(getLdStRegOp(*I))
- .addOperand(getLdStBaseOp(*I))
+ .add(getLdStRegOp(*Update))
+ .add(getLdStRegOp(*I))
+ .add(getLdStBaseOp(*I))
.addImm(Value)
.setMemRefs(I->memoperands_begin(), I->memoperands_end());
} else {
// Paired instruction.
int Scale = getMemScale(*I);
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(getLdStRegOp(*Update))
- .addOperand(getLdStRegOp(*I, 0))
- .addOperand(getLdStRegOp(*I, 1))
- .addOperand(getLdStBaseOp(*I))
+ .add(getLdStRegOp(*Update))
+ .add(getLdStRegOp(*I, 0))
+ .add(getLdStRegOp(*I, 1))
+ .add(getLdStBaseOp(*I))
.addImm(Value / Scale)
.setMemRefs(I->memoperands_begin(), I->memoperands_end());
}
@@ -1545,7 +1564,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
case AArch64::LDURBBi:
case AArch64::LDURHHi:
case AArch64::LDURWi:
- case AArch64::LDURXi: {
+ case AArch64::LDURXi:
if (tryToPromoteLoadFromStore(MBBI)) {
Modified = true;
break;
@@ -1553,7 +1572,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
++MBBI;
break;
}
- }
}
// 2) Merge adjacent zero stores into a wider store.
// e.g.,
@@ -1722,6 +1740,7 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
TRI = Subtarget->getRegisterInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
// Resize the modified and used register bitfield trackers. We do this once
// per function and then clear the bitfield each time we optimize a load or
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
new file mode 100644
index 000000000000..a6926a6700e1
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -0,0 +1,272 @@
+//===- AArch64MacroFusion.cpp - AArch64 Macro Fusion ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file This file contains the AArch64 implementation of the DAG scheduling mutation
+// to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MacroFusion.h"
+#include "AArch64Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define DEBUG_TYPE "misched"
+
+STATISTIC(NumFused, "Number of instr pairs fused");
+
+using namespace llvm;
+
+static cl::opt<bool> EnableMacroFusion("aarch64-misched-fusion", cl::Hidden,
+ cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
+namespace {
+
+/// \brief Verify that the instr pair, FirstMI and SecondMI, should be fused
+/// together. Given an anchor instr, when the other instr is unspecified, then
+/// check if the anchor instr may be part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr *SecondMI) {
+ assert((FirstMI || SecondMI) && "At least one instr must be specified");
+
+ const AArch64InstrInfo &II = static_cast<const AArch64InstrInfo&>(TII);
+ const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI);
+
+ // Assume wildcards for unspecified instrs.
+ unsigned FirstOpcode =
+ FirstMI ? FirstMI->getOpcode()
+ : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode =
+ SecondMI ? SecondMI->getOpcode()
+ : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
+
+ if (ST.hasArithmeticBccFusion())
+ // Fuse CMN, CMP, TST followed by Bcc.
+ if (SecondOpcode == AArch64::Bcc)
+ switch (FirstOpcode) {
+ default:
+ return false;
+ case AArch64::ADDSWri:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSXri:
+ case AArch64::ADDSXrr:
+ case AArch64::ANDSWri:
+ case AArch64::ANDSWrr:
+ case AArch64::ANDSXri:
+ case AArch64::ANDSXrr:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXri:
+ case AArch64::SUBSXrr:
+ case AArch64::BICSWrr:
+ case AArch64::BICSXrr:
+ return true;
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !II.hasShiftedReg(*FirstMI);
+ case AArch64::INSTRUCTION_LIST_END:
+ return true;
+ }
+
+ if (ST.hasArithmeticCbzFusion())
+ // Fuse ALU operations followed by CBZ/CBNZ.
+ if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
+ SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX)
+ switch (FirstOpcode) {
+ default:
+ return false;
+ case AArch64::ADDWri:
+ case AArch64::ADDWrr:
+ case AArch64::ADDXri:
+ case AArch64::ADDXrr:
+ case AArch64::ANDWri:
+ case AArch64::ANDWrr:
+ case AArch64::ANDXri:
+ case AArch64::ANDXrr:
+ case AArch64::EORWri:
+ case AArch64::EORWrr:
+ case AArch64::EORXri:
+ case AArch64::EORXrr:
+ case AArch64::ORRWri:
+ case AArch64::ORRWrr:
+ case AArch64::ORRXri:
+ case AArch64::ORRXrr:
+ case AArch64::SUBWri:
+ case AArch64::SUBWrr:
+ case AArch64::SUBXri:
+ case AArch64::SUBXrr:
+ return true;
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !II.hasShiftedReg(*FirstMI);
+ case AArch64::INSTRUCTION_LIST_END:
+ return true;
+ }
+
+ if (ST.hasFuseAES())
+ // Fuse AES crypto operations.
+ switch(FirstOpcode) {
+ // AES encode.
+ case AArch64::AESErr:
+ return SecondOpcode == AArch64::AESMCrr ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // AES decode.
+ case AArch64::AESDrr:
+ return SecondOpcode == AArch64::AESIMCrr ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ }
+
+ if (ST.hasFuseLiterals())
+ // Fuse literal generation operations.
+ switch (FirstOpcode) {
+ // PC relative address.
+ case AArch64::ADRP:
+ return SecondOpcode == AArch64::ADDXri ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // 32 bit immediate.
+ case AArch64::MOVZWi:
+ return (SecondOpcode == AArch64::MOVKWi &&
+ SecondMI->getOperand(3).getImm() == 16) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // Lower half of 64 bit immediate.
+ case AArch64::MOVZXi:
+ return (SecondOpcode == AArch64::MOVKXi &&
+ SecondMI->getOperand(3).getImm() == 16) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // Upper half of 64 bit immediate.
+ case AArch64::MOVKXi:
+ return FirstMI->getOperand(3).getImm() == 32 &&
+ ((SecondOpcode == AArch64::MOVKXi &&
+ SecondMI->getOperand(3).getImm() == 48) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END);
+ }
+
+ return false;
+}
+
+/// \brief Implement the fusion of instr pairs in the scheduling DAG,
+/// anchored at the instr in AnchorSU..
+static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) {
+ const MachineInstr *AnchorMI = AnchorSU.getInstr();
+ if (!AnchorMI || AnchorMI->isPseudo() || AnchorMI->isTransient())
+ return false;
+
+ // If the anchor instr is the ExitSU, then consider its predecessors;
+ // otherwise, its successors.
+ bool Preds = (&AnchorSU == &DAG->ExitSU);
+ SmallVectorImpl<SDep> &AnchorDeps = Preds ? AnchorSU.Preds : AnchorSU.Succs;
+
+ const MachineInstr *FirstMI = Preds ? nullptr : AnchorMI;
+ const MachineInstr *SecondMI = Preds ? AnchorMI : nullptr;
+
+ // Check if the anchor instr may be fused.
+ if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(),
+ FirstMI, SecondMI))
+ return false;
+
+ // Explorer for fusion candidates among the dependencies of the anchor instr.
+ for (SDep &Dep : AnchorDeps) {
+ // Ignore dependencies that don't enforce ordering.
+ if (Dep.isWeak())
+ continue;
+
+ SUnit &DepSU = *Dep.getSUnit();
+ // Ignore the ExitSU if the dependents are successors.
+ if (!Preds && &DepSU == &DAG->ExitSU)
+ continue;
+
+ const MachineInstr *DepMI = DepSU.getInstr();
+ if (!DepMI || DepMI->isPseudo() || DepMI->isTransient())
+ continue;
+
+ FirstMI = Preds ? DepMI : AnchorMI;
+ SecondMI = Preds ? AnchorMI : DepMI;
+ if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(),
+ FirstMI, SecondMI))
+ continue;
+
+ // Create a single weak edge between the adjacent instrs. The only effect is
+ // to cause bottom-up scheduling to heavily prioritize the clustered instrs.
+ SUnit &FirstSU = Preds ? DepSU : AnchorSU;
+ SUnit &SecondSU = Preds ? AnchorSU : DepSU;
+ DAG->addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster));
+
+ // Adjust the latency between the anchor instr and its
+ // predecessors/successors.
+ for (SDep &IDep : AnchorDeps)
+ if (IDep.getSUnit() == &DepSU)
+ IDep.setLatency(0);
+
+ // Adjust the latency between the dependent instr and its
+ // successors/predecessors.
+ for (SDep &IDep : Preds ? DepSU.Succs : DepSU.Preds)
+ if (IDep.getSUnit() == &AnchorSU)
+ IDep.setLatency(0);
+
+ DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse ";
+ FirstSU.print(dbgs(), DAG); dbgs() << " - ";
+ SecondSU.print(dbgs(), DAG); dbgs() << " / ";
+ dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " <<
+ DAG->TII->getName(SecondMI->getOpcode()) << '\n'; );
+
+ ++NumFused;
+ return true;
+ }
+
+ return false;
+}
+
+/// \brief Post-process the DAG to create cluster edges between instrs that may
+/// be fused by the processor into a single operation.
+class AArch64MacroFusion : public ScheduleDAGMutation {
+public:
+ AArch64MacroFusion() {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+};
+
+void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
+ // For each of the SUnits in the scheduling block, try to fuse the instr in it
+ // with one in its successors.
+ for (SUnit &ISU : DAG->SUnits)
+ scheduleAdjacentImpl(DAG, ISU);
+
+ // Try to fuse the instr in the ExitSU with one in its predecessors.
+ scheduleAdjacentImpl(DAG, DAG->ExitSU);
+}
+
+} // end namespace
+
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () {
+ return EnableMacroFusion ? make_unique<AArch64MacroFusion>() : nullptr;
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.h b/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.h
new file mode 100644
index 000000000000..e5efedd9fbfd
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.h
@@ -0,0 +1,29 @@
+//===- AArch64MacroFusion.h - AArch64 Macro Fusion ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// \fileThis file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 definition of the DAG scheduling mutation
+// to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+
+//===----------------------------------------------------------------------===//
+// AArch64MacroFusion - DAG post-processing to encourage fusion of macro ops.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+/// Note that you have to add:
+/// DAG.addMutation(createAArch64MacroFusionDAGMutation());
+/// to AArch64PassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation();
+
+} // llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index 8f45e6a80a36..f3c8e7e9bdc2 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -12,13 +12,14 @@
// CBZW %W0, <BB#2>
// BB#2:
// %W0 = COPY %WZR
-// This pass should be run after register allocation.
+// Similarly, this pass also handles non-zero copies.
+// BB#0:
+// cmp x0, #1
+// b.eq .LBB0_1
+// .LBB0_1:
+// orr x0, xzr, #0x1
//
-// FIXME: This should be extended to handle any constant other than zero. E.g.,
-// cmp w0, #1
-// b.eq .BB1
-// BB1:
-// mov w0, #1
+// This pass should be run after register allocation.
//
// FIXME: This could also be extended to check the whole dominance subtree below
// the comparison if the compile time regression is acceptable.
@@ -26,6 +27,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
@@ -43,6 +45,7 @@ namespace {
class AArch64RedundantCopyElimination : public MachineFunctionPass {
const MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
+ BitVector ClobberedRegs;
public:
static char ID;
@@ -50,6 +53,16 @@ public:
initializeAArch64RedundantCopyEliminationPass(
*PassRegistry::getPassRegistry());
}
+
+ struct RegImm {
+ MCPhysReg Reg;
+ int32_t Imm;
+ RegImm(MCPhysReg Reg, int32_t Imm) : Reg(Reg), Imm(Imm) {}
+ };
+
+ Optional<RegImm> knownRegValInBlock(MachineInstr &CondBr,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &FirstUse);
bool optimizeCopy(MachineBasicBlock *MBB);
bool runOnMachineFunction(MachineFunction &MF) override;
MachineFunctionProperties getRequiredProperties() const override {
@@ -66,18 +79,120 @@ char AArch64RedundantCopyElimination::ID = 0;
INITIALIZE_PASS(AArch64RedundantCopyElimination, "aarch64-copyelim",
"AArch64 redundant copy elimination pass", false, false)
-static bool guaranteesZeroRegInBlock(MachineInstr &MI, MachineBasicBlock *MBB) {
- unsigned Opc = MI.getOpcode();
+/// Remember what registers the specified instruction modifies.
+static void trackRegDefs(const MachineInstr &MI, BitVector &ClobberedRegs,
+ const TargetRegisterInfo *TRI) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask()) {
+ ClobberedRegs.setBitsNotInMask(MO.getRegMask());
+ continue;
+ }
+
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (!MO.isDef())
+ continue;
+
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ ClobberedRegs.set(*AI);
+ }
+}
+
+/// It's possible to determine the value of a register based on a dominating
+/// condition. To do so, this function checks to see if the basic block \p MBB
+/// is the target to which a conditional branch \p CondBr jumps and whose
+/// equality comparison is against a constant. If so, return a known physical
+/// register and constant value pair. Otherwise, return None.
+Optional<AArch64RedundantCopyElimination::RegImm>
+AArch64RedundantCopyElimination::knownRegValInBlock(
+ MachineInstr &CondBr, MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &FirstUse) {
+ unsigned Opc = CondBr.getOpcode();
+
// Check if the current basic block is the target block to which the
// CBZ/CBNZ instruction jumps when its Wt/Xt is zero.
- if ((Opc == AArch64::CBZW || Opc == AArch64::CBZX) &&
- MBB == MI.getOperand(1).getMBB())
- return true;
- else if ((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) &&
- MBB != MI.getOperand(1).getMBB())
- return true;
-
- return false;
+ if (((Opc == AArch64::CBZW || Opc == AArch64::CBZX) &&
+ MBB == CondBr.getOperand(1).getMBB()) ||
+ ((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) &&
+ MBB != CondBr.getOperand(1).getMBB())) {
+ FirstUse = CondBr;
+ return RegImm(CondBr.getOperand(0).getReg(), 0);
+ }
+
+ // Otherwise, must be a conditional branch.
+ if (Opc != AArch64::Bcc)
+ return None;
+
+ // Must be an equality check (i.e., == or !=).
+ AArch64CC::CondCode CC = (AArch64CC::CondCode)CondBr.getOperand(0).getImm();
+ if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
+ return None;
+
+ MachineBasicBlock *BrTarget = CondBr.getOperand(1).getMBB();
+ if ((CC == AArch64CC::EQ && BrTarget != MBB) ||
+ (CC == AArch64CC::NE && BrTarget == MBB))
+ return None;
+
+ // Stop if we get to the beginning of PredMBB.
+ MachineBasicBlock *PredMBB = *MBB->pred_begin();
+ assert(PredMBB == CondBr.getParent() &&
+ "Conditional branch not in predecessor block!");
+ if (CondBr == PredMBB->begin())
+ return None;
+
+ // Registers clobbered in PredMBB between CondBr instruction and current
+ // instruction being checked in loop.
+ ClobberedRegs.reset();
+
+ // Find compare instruction that sets NZCV used by CondBr.
+ MachineBasicBlock::reverse_iterator RIt = CondBr.getReverseIterator();
+ for (MachineInstr &PredI : make_range(std::next(RIt), PredMBB->rend())) {
+
+ // Track clobbered registers.
+ trackRegDefs(PredI, ClobberedRegs, TRI);
+
+ bool IsCMN = false;
+ switch (PredI.getOpcode()) {
+ default:
+ break;
+
+ // CMN is an alias for ADDS with a dead destination register.
+ case AArch64::ADDSWri:
+ case AArch64::ADDSXri:
+ IsCMN = true;
+ // CMP is an alias for SUBS with a dead destination register.
+ case AArch64::SUBSWri:
+ case AArch64::SUBSXri: {
+ MCPhysReg SrcReg = PredI.getOperand(1).getReg();
+
+ // Must not be a symbolic immediate.
+ if (!PredI.getOperand(2).isImm())
+ return None;
+
+ // The src register must not be modified between the cmp and conditional
+ // branch. This includes a self-clobbering compare.
+ if (ClobberedRegs[SrcReg])
+ return None;
+
+ // We've found the Cmp that sets NZCV.
+ int32_t KnownImm = PredI.getOperand(2).getImm();
+ int32_t Shift = PredI.getOperand(3).getImm();
+ KnownImm <<= Shift;
+ if (IsCMN)
+ KnownImm = -KnownImm;
+ FirstUse = PredI;
+ return RegImm(SrcReg, KnownImm);
+ }
+ }
+
+ // Bail if we see an instruction that defines NZCV that we don't handle.
+ if (PredI.definesRegister(AArch64::NZCV))
+ return None;
+ }
+ return None;
}
bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
@@ -85,79 +200,187 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
if (MBB->pred_size() != 1)
return false;
+ // Check if the predecessor has two successors, implying the block ends in a
+ // conditional branch.
MachineBasicBlock *PredMBB = *MBB->pred_begin();
- MachineBasicBlock::iterator CompBr = PredMBB->getLastNonDebugInstr();
- if (CompBr == PredMBB->end() || PredMBB->succ_size() != 2)
+ if (PredMBB->succ_size() != 2)
+ return false;
+
+ MachineBasicBlock::iterator CondBr = PredMBB->getLastNonDebugInstr();
+ if (CondBr == PredMBB->end())
return false;
- ++CompBr;
+ // Keep track of the earliest point in the PredMBB block where kill markers
+ // need to be removed if a COPY is removed.
+ MachineBasicBlock::iterator FirstUse;
+ // After calling knownRegValInBlock, FirstUse will either point to a CBZ/CBNZ
+ // or a compare (i.e., SUBS). In the latter case, we must take care when
+ // updating FirstUse when scanning for COPY instructions. In particular, if
+ // there's a COPY in between the compare and branch the COPY should not
+ // update FirstUse.
+ bool SeenFirstUse = false;
+ // Registers that contain a known value at the start of MBB.
+ SmallVector<RegImm, 4> KnownRegs;
+
+ MachineBasicBlock::iterator Itr = std::next(CondBr);
do {
- --CompBr;
- if (guaranteesZeroRegInBlock(*CompBr, MBB))
- break;
- } while (CompBr != PredMBB->begin() && CompBr->isTerminator());
+ --Itr;
- // We've not found a CBZ/CBNZ, time to bail out.
- if (!guaranteesZeroRegInBlock(*CompBr, MBB))
- return false;
+ Optional<RegImm> KnownRegImm = knownRegValInBlock(*Itr, MBB, FirstUse);
+ if (KnownRegImm == None)
+ continue;
- unsigned TargetReg = CompBr->getOperand(0).getReg();
- if (!TargetReg)
- return false;
- assert(TargetRegisterInfo::isPhysicalRegister(TargetReg) &&
- "Expect physical register");
+ KnownRegs.push_back(*KnownRegImm);
+
+ // Reset the clobber list, which is used by knownRegValInBlock.
+ ClobberedRegs.reset();
+
+ // Look backward in PredMBB for COPYs from the known reg to find other
+ // registers that are known to be a constant value.
+ for (auto PredI = Itr;; --PredI) {
+ if (FirstUse == PredI)
+ SeenFirstUse = true;
+
+ if (PredI->isCopy()) {
+ MCPhysReg CopyDstReg = PredI->getOperand(0).getReg();
+ MCPhysReg CopySrcReg = PredI->getOperand(1).getReg();
+ for (auto &KnownReg : KnownRegs) {
+ if (ClobberedRegs[KnownReg.Reg])
+ continue;
+ // If we have X = COPY Y, and Y is known to be zero, then now X is
+ // known to be zero.
+ if (CopySrcReg == KnownReg.Reg && !ClobberedRegs[CopyDstReg]) {
+ KnownRegs.push_back(RegImm(CopyDstReg, KnownReg.Imm));
+ if (SeenFirstUse)
+ FirstUse = PredI;
+ break;
+ }
+ // If we have X = COPY Y, and X is known to be zero, then now Y is
+ // known to be zero.
+ if (CopyDstReg == KnownReg.Reg && !ClobberedRegs[CopySrcReg]) {
+ KnownRegs.push_back(RegImm(CopySrcReg, KnownReg.Imm));
+ if (SeenFirstUse)
+ FirstUse = PredI;
+ break;
+ }
+ }
+ }
+
+ // Stop if we get to the beginning of PredMBB.
+ if (PredI == PredMBB->begin())
+ break;
+
+ trackRegDefs(*PredI, ClobberedRegs, TRI);
+ // Stop if all of the known-zero regs have been clobbered.
+ if (all_of(KnownRegs, [&](RegImm KnownReg) {
+ return ClobberedRegs[KnownReg.Reg];
+ }))
+ break;
+ }
+ break;
+
+ } while (Itr != PredMBB->begin() && Itr->isTerminator());
- // Remember all registers aliasing with TargetReg.
- SmallSetVector<unsigned, 8> TargetRegs;
- for (MCRegAliasIterator AI(TargetReg, TRI, true); AI.isValid(); ++AI)
- TargetRegs.insert(*AI);
+ // We've not found a registers with a known value, time to bail out.
+ if (KnownRegs.empty())
+ return false;
bool Changed = false;
+ // UsedKnownRegs is the set of KnownRegs that have had uses added to MBB.
+ SmallSetVector<unsigned, 4> UsedKnownRegs;
MachineBasicBlock::iterator LastChange = MBB->begin();
- unsigned SmallestDef = TargetReg;
- // Remove redundant Copy instructions unless TargetReg is modified.
+ // Remove redundant Copy instructions unless KnownReg is modified.
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
MachineInstr *MI = &*I;
++I;
- if (MI->isCopy() && MI->getOperand(0).isReg() &&
- MI->getOperand(1).isReg()) {
-
- unsigned DefReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
-
- if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) &&
- !MRI->isReserved(DefReg) &&
- (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) {
- DEBUG(dbgs() << "Remove redundant Copy : ");
- DEBUG((MI)->print(dbgs()));
-
- MI->eraseFromParent();
- Changed = true;
- LastChange = I;
- NumCopiesRemoved++;
- SmallestDef =
- TRI->isSubRegister(SmallestDef, DefReg) ? DefReg : SmallestDef;
- continue;
+ bool RemovedMI = false;
+ bool IsCopy = MI->isCopy();
+ bool IsMoveImm = MI->isMoveImmediate();
+ if (IsCopy || IsMoveImm) {
+ MCPhysReg DefReg = MI->getOperand(0).getReg();
+ MCPhysReg SrcReg = IsCopy ? MI->getOperand(1).getReg() : 0;
+ int64_t SrcImm = IsMoveImm ? MI->getOperand(1).getImm() : 0;
+ if (!MRI->isReserved(DefReg) &&
+ ((IsCopy && (SrcReg == AArch64::XZR || SrcReg == AArch64::WZR)) ||
+ IsMoveImm)) {
+ for (RegImm &KnownReg : KnownRegs) {
+ if (KnownReg.Reg != DefReg &&
+ !TRI->isSuperRegister(DefReg, KnownReg.Reg))
+ continue;
+
+ // For a copy, the known value must be a zero.
+ if (IsCopy && KnownReg.Imm != 0)
+ continue;
+
+ if (IsMoveImm) {
+ // For a move immediate, the known immediate must match the source
+ // immediate.
+ if (KnownReg.Imm != SrcImm)
+ continue;
+
+ // Don't remove a move immediate that implicitly defines the upper
+ // bits when only the lower 32 bits are known.
+ MCPhysReg CmpReg = KnownReg.Reg;
+ if (any_of(MI->implicit_operands(), [CmpReg](MachineOperand &O) {
+ return !O.isDead() && O.isReg() && O.isDef() &&
+ O.getReg() != CmpReg;
+ }))
+ continue;
+ }
+
+ if (IsCopy)
+ DEBUG(dbgs() << "Remove redundant Copy : " << *MI);
+ else
+ DEBUG(dbgs() << "Remove redundant Move : " << *MI);
+
+ MI->eraseFromParent();
+ Changed = true;
+ LastChange = I;
+ NumCopiesRemoved++;
+ UsedKnownRegs.insert(KnownReg.Reg);
+ RemovedMI = true;
+ break;
+ }
}
}
- if (MI->modifiesRegister(TargetReg, TRI))
+ // Skip to the next instruction if we removed the COPY/MovImm.
+ if (RemovedMI)
+ continue;
+
+ // Remove any regs the MI clobbers from the KnownConstRegs set.
+ for (unsigned RI = 0; RI < KnownRegs.size();)
+ if (MI->modifiesRegister(KnownRegs[RI].Reg, TRI)) {
+ std::swap(KnownRegs[RI], KnownRegs[KnownRegs.size() - 1]);
+ KnownRegs.pop_back();
+ // Don't increment RI since we need to now check the swapped-in
+ // KnownRegs[RI].
+ } else {
+ ++RI;
+ }
+
+ // Continue until the KnownRegs set is empty.
+ if (KnownRegs.empty())
break;
}
if (!Changed)
return false;
- // Otherwise, we have to fixup the use-def chain, starting with the
- // CBZ/CBNZ. Conservatively mark as much as we can live.
- CompBr->clearRegisterKills(SmallestDef, TRI);
+ // Add newly used regs to the block's live-in list if they aren't there
+ // already.
+ for (MCPhysReg KnownReg : UsedKnownRegs)
+ if (!MBB->isLiveIn(KnownReg))
+ MBB->addLiveIn(KnownReg);
- if (none_of(TargetRegs, [&](unsigned Reg) { return MBB->isLiveIn(Reg); }))
- MBB->addLiveIn(TargetReg);
-
- // Clear any kills of TargetReg between CompBr and the last removed COPY.
+ // Clear kills in the range where changes were made. This is conservative,
+ // but should be okay since kill markers are being phased out.
+ DEBUG(dbgs() << "Clearing kill flags.\n\tFirstUse: " << *FirstUse
+ << "\tLastChange: " << *LastChange);
+ for (MachineInstr &MMI : make_range(FirstUse, PredMBB->end()))
+ MMI.clearKillInfo();
for (MachineInstr &MMI : make_range(MBB->begin(), LastChange))
- MMI.clearRegisterKills(SmallestDef, TRI);
+ MMI.clearKillInfo();
return true;
}
@@ -168,6 +391,11 @@ bool AArch64RedundantCopyElimination::runOnMachineFunction(
return false;
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
+
+ // Resize the clobber register bitfield tracker. We do this once per
+ // function and then clear the bitfield each time we optimize a copy.
+ ClobberedRegs.resize(TRI->getNumRegs());
+
bool Changed = false;
for (MachineBasicBlock &MBB : MF)
Changed |= optimizeCopy(&MBB);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index b292c9c87dcd..20a5979f9b4b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -1,4 +1,4 @@
-//===- AArch64RegisterBankInfo.cpp -------------------------------*- C++ -*-==//
+//===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,13 +13,24 @@
//===----------------------------------------------------------------------===//
#include "AArch64RegisterBankInfo.h"
-#include "AArch64InstrInfo.h" // For XXXRegClassID.
+#include "AArch64InstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+
+#define GET_TARGET_REGBANK_IMPL
+#include "AArch64GenRegisterBank.inc"
// This file will be TableGen'ed at some point.
#include "AArch64GenRegisterBankInfo.def"
@@ -31,7 +42,7 @@ using namespace llvm;
#endif
AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
- : RegisterBankInfo(AArch64::RegBanks, AArch64::NumRegisterBanks) {
+ : AArch64GenRegisterBankInfo() {
static bool AlreadyInit = false;
// We have only one set of register banks, whatever the subtarget
// is. Therefore, the initialization of the RegBanks table should be
@@ -78,44 +89,21 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
// Check that the TableGen'ed like file is in sync we our expectations.
// First, the Idx.
- assert(AArch64::PartialMappingIdx::PMI_GPR32 ==
- AArch64::PartialMappingIdx::PMI_FirstGPR &&
- "GPR32 index not first in the GPR list");
- assert(AArch64::PartialMappingIdx::PMI_GPR64 ==
- AArch64::PartialMappingIdx::PMI_LastGPR &&
- "GPR64 index not last in the GPR list");
- assert(AArch64::PartialMappingIdx::PMI_FirstGPR <=
- AArch64::PartialMappingIdx::PMI_LastGPR &&
- "GPR list is backward");
- assert(AArch64::PartialMappingIdx::PMI_FPR32 ==
- AArch64::PartialMappingIdx::PMI_FirstFPR &&
- "FPR32 index not first in the FPR list");
- assert(AArch64::PartialMappingIdx::PMI_FPR512 ==
- AArch64::PartialMappingIdx::PMI_LastFPR &&
- "FPR512 index not last in the FPR list");
- assert(AArch64::PartialMappingIdx::PMI_FirstFPR <=
- AArch64::PartialMappingIdx::PMI_LastFPR &&
- "FPR list is backward");
- assert(AArch64::PartialMappingIdx::PMI_FPR32 + 1 ==
- AArch64::PartialMappingIdx::PMI_FPR64 &&
- AArch64::PartialMappingIdx::PMI_FPR64 + 1 ==
- AArch64::PartialMappingIdx::PMI_FPR128 &&
- AArch64::PartialMappingIdx::PMI_FPR128 + 1 ==
- AArch64::PartialMappingIdx::PMI_FPR256 &&
- AArch64::PartialMappingIdx::PMI_FPR256 + 1 ==
- AArch64::PartialMappingIdx::PMI_FPR512 &&
- "FPR indices not properly ordered");
+ assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
+ {PMI_GPR32, PMI_GPR64}) &&
+ "PartialMappingIdx's are incorrectly ordered");
+ assert(checkPartialMappingIdx(
+ PMI_FirstFPR, PMI_LastFPR,
+ {PMI_FPR32, PMI_FPR64, PMI_FPR128, PMI_FPR256, PMI_FPR512}) &&
+ "PartialMappingIdx's are incorrectly ordered");
// Now, the content.
// Check partial mapping.
#define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \
do { \
- const PartialMapping &Map = \
- AArch64::PartMappings[AArch64::PartialMappingIdx::Idx - \
- AArch64::PartialMappingIdx::PMI_Min]; \
- (void)Map; \
- assert(Map.StartIdx == ValStartIdx && Map.Length == ValLength && \
- Map.RegBank == &RB && #Idx " is incorrectly initialized"); \
- } while (0)
+ assert( \
+ checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
+ #Idx " is incorrectly initialized"); \
+ } while (false)
CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
@@ -128,17 +116,11 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
// Check value mapping.
#define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \
do { \
- unsigned PartialMapBaseIdx = \
- AArch64::PartialMappingIdx::PMI_##RBName##Size - \
- AArch64::PartialMappingIdx::PMI_Min; \
- (void)PartialMapBaseIdx; \
- const ValueMapping &Map = AArch64::getValueMapping( \
- AArch64::PartialMappingIdx::PMI_First##RBName, Size)[Offset]; \
- (void)Map; \
- assert(Map.BreakDown == &AArch64::PartMappings[PartialMapBaseIdx] && \
- Map.NumBreakDowns == 1 && #RBName #Size \
- " " #Offset " is incorrectly initialized"); \
- } while (0)
+ assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \
+ PartialMappingIdx::PMI_First##RBName, Size, \
+ Offset) && \
+ #RBName #Size " " #Offset " is incorrectly initialized"); \
+ } while (false)
#define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
@@ -157,7 +139,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
CHECK_VALUEMAP_IMPL(RBName, Size, 0); \
CHECK_VALUEMAP_IMPL(RBName, Size, 1); \
CHECK_VALUEMAP_IMPL(RBName, Size, 2); \
- } while (0)
+ } while (false)
CHECK_VALUEMAP_3OPS(GPR, 32);
CHECK_VALUEMAP_3OPS(GPR, 64);
@@ -169,24 +151,23 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
#define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \
do { \
- unsigned PartialMapDstIdx = \
- AArch64::PMI_##RBNameDst##Size - AArch64::PMI_Min; \
- unsigned PartialMapSrcIdx = \
- AArch64::PMI_##RBNameSrc##Size - AArch64::PMI_Min; \
- (void) PartialMapDstIdx; \
- (void) PartialMapSrcIdx; \
- const ValueMapping *Map = AArch64::getCopyMapping( \
- AArch64::PMI_First##RBNameDst == AArch64::PMI_FirstGPR, \
- AArch64::PMI_First##RBNameSrc == AArch64::PMI_FirstGPR, Size); \
- (void) Map; \
- assert(Map[0].BreakDown == &AArch64::PartMappings[PartialMapDstIdx] && \
+ unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \
+ unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \
+ (void)PartialMapDstIdx; \
+ (void)PartialMapSrcIdx; \
+ const ValueMapping *Map = getCopyMapping( \
+ AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \
+ (void)Map; \
+ assert(Map[0].BreakDown == \
+ &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
Map[0].NumBreakDowns == 1 && #RBNameDst #Size \
" Dst is incorrectly initialized"); \
- assert(Map[1].BreakDown == &AArch64::PartMappings[PartialMapSrcIdx] && \
+ assert(Map[1].BreakDown == \
+ &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \
" Src is incorrectly initialized"); \
\
- } while (0)
+ } while (false)
CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
@@ -280,12 +261,10 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
break;
InstructionMappings AltMappings;
InstructionMapping GPRMapping(
- /*ID*/ 1, /*Cost*/ 1,
- AArch64::getValueMapping(AArch64::PMI_FirstGPR, Size),
+ /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
/*NumOperands*/ 3);
InstructionMapping FPRMapping(
- /*ID*/ 2, /*Cost*/ 1,
- AArch64::getValueMapping(AArch64::PMI_FirstFPR, Size),
+ /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
/*NumOperands*/ 3);
AltMappings.emplace_back(std::move(GPRMapping));
@@ -305,21 +284,21 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
InstructionMappings AltMappings;
InstructionMapping GPRMapping(
/*ID*/ 1, /*Cost*/ 1,
- AArch64::getCopyMapping(/*DstIsGPR*/ true, /*SrcIsGPR*/ true, Size),
+ getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
/*NumOperands*/ 2);
InstructionMapping FPRMapping(
/*ID*/ 2, /*Cost*/ 1,
- AArch64::getCopyMapping(/*DstIsGPR*/ false, /*SrcIsGPR*/ false, Size),
+ getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
/*NumOperands*/ 2);
InstructionMapping GPRToFPRMapping(
/*ID*/ 3,
/*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
- AArch64::getCopyMapping(/*DstIsGPR*/ false, /*SrcIsGPR*/ true, Size),
+ getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
/*NumOperands*/ 2);
InstructionMapping FPRToGPRMapping(
/*ID*/ 3,
/*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
- AArch64::getCopyMapping(/*DstIsGPR*/ true, /*SrcIsGPR*/ false, Size),
+ getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
/*NumOperands*/ 2);
AltMappings.emplace_back(std::move(GPRMapping));
@@ -341,17 +320,15 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
InstructionMappings AltMappings;
InstructionMapping GPRMapping(
/*ID*/ 1, /*Cost*/ 1,
- getOperandsMapping(
- {AArch64::getValueMapping(AArch64::PMI_FirstGPR, Size),
- // Addresses are GPR 64-bit.
- AArch64::getValueMapping(AArch64::PMI_FirstGPR, 64)}),
+ getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
+ // Addresses are GPR 64-bit.
+ getValueMapping(PMI_FirstGPR, 64)}),
/*NumOperands*/ 2);
InstructionMapping FPRMapping(
/*ID*/ 2, /*Cost*/ 1,
- getOperandsMapping(
- {AArch64::getValueMapping(AArch64::PMI_FirstFPR, Size),
- // Addresses are GPR 64-bit.
- AArch64::getValueMapping(AArch64::PMI_FirstGPR, 64)}),
+ getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
+ // Addresses are GPR 64-bit.
+ getValueMapping(PMI_FirstGPR, 64)}),
/*NumOperands*/ 2);
AltMappings.emplace_back(std::move(GPRMapping));
@@ -369,13 +346,12 @@ void AArch64RegisterBankInfo::applyMappingImpl(
switch (OpdMapper.getMI().getOpcode()) {
case TargetOpcode::G_OR:
case TargetOpcode::G_BITCAST:
- case TargetOpcode::G_LOAD: {
+ case TargetOpcode::G_LOAD:
// Those ID must match getInstrAlternativeMappings.
assert((OpdMapper.getInstrMapping().getID() >= 1 &&
OpdMapper.getInstrMapping().getID() <= 4) &&
"Don't know how to handle that ID");
return applyDefaultMapping(OpdMapper);
- }
default:
llvm_unreachable("Don't know how to handle that operation");
}
@@ -411,6 +387,8 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(const MachineInstr &MI) {
unsigned Size = Ty.getSizeInBits();
bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
+ PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
+
#ifndef NDEBUG
// Make sure all the operands are using similar size and type.
// Should probably be checked by the machine verifier.
@@ -422,20 +400,19 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(const MachineInstr &MI) {
// for each types.
for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
- assert(AArch64::getRegBankBaseIdxOffset(OpTy.getSizeInBits()) ==
- AArch64::getRegBankBaseIdxOffset(Size) &&
- "Operand has incompatible size");
+ assert(
+ AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
+ RBIdx, OpTy.getSizeInBits()) ==
+ AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
+ "Operand has incompatible size");
bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
(void)OpIsFPR;
assert(IsFPR == OpIsFPR && "Operand has incompatible type");
}
#endif // End NDEBUG.
- AArch64::PartialMappingIdx RBIdx =
- IsFPR ? AArch64::PMI_FirstFPR : AArch64::PMI_FirstGPR;
-
- return InstructionMapping{DefaultMappingID, 1,
- AArch64::getValueMapping(RBIdx, Size), NumOperands};
+ return InstructionMapping{DefaultMappingID, 1, getValueMapping(RBIdx, Size),
+ NumOperands};
}
RegisterBankInfo::InstructionMapping
@@ -485,9 +462,10 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
const RegisterBank &SrcRB =
SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
- return InstructionMapping{DefaultMappingID, copyCost(DstRB, SrcRB, Size),
- AArch64::getCopyMapping(DstIsGPR, SrcIsGPR, Size),
- /*NumOperands*/ 2};
+ return InstructionMapping{
+ DefaultMappingID, copyCost(DstRB, SrcRB, Size),
+ getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
+ /*NumOperands*/ 2};
}
case TargetOpcode::G_SEQUENCE:
// FIXME: support this, but the generic code is really not going to do
@@ -501,7 +479,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Track the size and bank of each register. We don't do partial mappings.
SmallVector<unsigned, 4> OpSize(NumOperands);
- SmallVector<AArch64::PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
+ SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
auto &MO = MI.getOperand(Idx);
if (!MO.isReg())
@@ -513,9 +491,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
// For floating-point instructions, scalars go in FPRs.
if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc))
- OpRegBankIdx[Idx] = AArch64::PMI_FirstFPR;
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
else
- OpRegBankIdx[Idx] = AArch64::PMI_FirstGPR;
+ OpRegBankIdx[Idx] = PMI_FirstGPR;
}
unsigned Cost = 1;
@@ -523,49 +501,50 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// fine-tune the computed mapping.
switch (Opc) {
case TargetOpcode::G_SITOFP:
- case TargetOpcode::G_UITOFP: {
- OpRegBankIdx = {AArch64::PMI_FirstFPR, AArch64::PMI_FirstGPR};
+ case TargetOpcode::G_UITOFP:
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
break;
- }
case TargetOpcode::G_FPTOSI:
- case TargetOpcode::G_FPTOUI: {
- OpRegBankIdx = {AArch64::PMI_FirstGPR, AArch64::PMI_FirstFPR};
+ case TargetOpcode::G_FPTOUI:
+ OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
break;
- }
- case TargetOpcode::G_FCMP: {
- OpRegBankIdx = {AArch64::PMI_FirstGPR,
- /* Predicate */ AArch64::PMI_None, AArch64::PMI_FirstFPR,
- AArch64::PMI_FirstFPR};
+ case TargetOpcode::G_FCMP:
+ OpRegBankIdx = {PMI_FirstGPR,
+ /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
break;
- }
- case TargetOpcode::G_BITCAST: {
+ case TargetOpcode::G_BITCAST:
// This is going to be a cross register bank copy and this is expensive.
if (OpRegBankIdx[0] != OpRegBankIdx[1])
- Cost =
- copyCost(*AArch64::PartMappings[OpRegBankIdx[0]].RegBank,
- *AArch64::PartMappings[OpRegBankIdx[1]].RegBank, OpSize[0]);
+ Cost = copyCost(
+ *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
+ *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
+ OpSize[0]);
break;
- }
- case TargetOpcode::G_LOAD: {
+ case TargetOpcode::G_LOAD:
// Loading in vector unit is slightly more expensive.
// This is actually only true for the LD1R and co instructions,
// but anyway for the fast mode this number does not matter and
// for the greedy mode the cost of the cross bank copy will
// offset this number.
// FIXME: Should be derived from the scheduling model.
- if (OpRegBankIdx[0] >= AArch64::PMI_FirstFPR)
+ if (OpRegBankIdx[0] >= PMI_FirstFPR)
Cost = 2;
- }
+ break;
}
// Finally construct the computed mapping.
RegisterBankInfo::InstructionMapping Mapping =
InstructionMapping{DefaultMappingID, Cost, nullptr, NumOperands};
SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
- for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
- if (MI.getOperand(Idx).isReg())
- OpdsMapping[Idx] =
- AArch64::getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+ if (MI.getOperand(Idx).isReg()) {
+ auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
+ if (!Mapping->isValid())
+ return InstructionMapping();
+
+ OpdsMapping[Idx] = Mapping;
+ }
+ }
Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
return Mapping;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
index f763235049d4..0a795a42c0b1 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
@@ -16,25 +16,78 @@
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#define GET_REGBANK_DECLARATIONS
+#include "AArch64GenRegisterBank.inc"
+
namespace llvm {
class TargetRegisterInfo;
-namespace AArch64 {
-enum {
- GPRRegBankID = 0, /// General Purpose Registers: W, X.
- FPRRegBankID = 1, /// Floating Point/Vector Registers: B, H, S, D, Q.
- CCRRegBankID = 2, /// Conditional register: NZCV.
- NumRegisterBanks
-};
+class AArch64GenRegisterBankInfo : public RegisterBankInfo {
+protected:
+
+ enum PartialMappingIdx {
+ PMI_None = -1,
+ PMI_FPR32 = 1,
+ PMI_FPR64,
+ PMI_FPR128,
+ PMI_FPR256,
+ PMI_FPR512,
+ PMI_GPR32,
+ PMI_GPR64,
+ PMI_FirstGPR = PMI_GPR32,
+ PMI_LastGPR = PMI_GPR64,
+ PMI_FirstFPR = PMI_FPR32,
+ PMI_LastFPR = PMI_FPR512,
+ PMI_Min = PMI_FirstFPR,
+ };
+
+ static RegisterBankInfo::PartialMapping PartMappings[];
+ static RegisterBankInfo::ValueMapping ValMappings[];
+ static PartialMappingIdx BankIDToCopyMapIdx[];
+
+ enum ValueMappingIdx {
+ InvalidIdx = 0,
+ First3OpsIdx = 1,
+ Last3OpsIdx = 19,
+ DistanceBetweenRegBanks = 3,
+ FirstCrossRegCpyIdx = 22,
+ LastCrossRegCpyIdx = 34,
+ DistanceBetweenCrossRegCpy = 2
+ };
+
+ static bool checkPartialMap(unsigned Idx, unsigned ValStartIdx,
+ unsigned ValLength, const RegisterBank &RB);
+ static bool checkValueMapImpl(unsigned Idx, unsigned FirstInBank,
+ unsigned Size, unsigned Offset);
+ static bool checkPartialMappingIdx(PartialMappingIdx FirstAlias,
+ PartialMappingIdx LastAlias,
+ ArrayRef<PartialMappingIdx> Order);
-extern RegisterBank GPRRegBank;
-extern RegisterBank FPRRegBank;
-extern RegisterBank CCRRegBank;
-} // End AArch64 namespace.
+ static unsigned getRegBankBaseIdxOffset(unsigned RBIdx, unsigned Size);
+
+ /// Get the pointer to the ValueMapping representing the RegisterBank
+ /// at \p RBIdx with a size of \p Size.
+ ///
+ /// The returned mapping works for instructions with the same kind of
+ /// operands for up to 3 operands.
+ ///
+ /// \pre \p RBIdx != PartialMappingIdx::None
+ static const RegisterBankInfo::ValueMapping *
+ getValueMapping(PartialMappingIdx RBIdx, unsigned Size);
+
+ /// Get the pointer to the ValueMapping of the operands of a copy
+ /// instruction from the \p SrcBankID register bank to the \p DstBankID
+ /// register bank with a size of \p Size.
+ static const RegisterBankInfo::ValueMapping *
+ getCopyMapping(unsigned DstBankID, unsigned SrcBankID, unsigned Size);
+
+#define GET_TARGET_REGBANK_CLASS
+#include "AArch64GenRegisterBank.inc"
+};
/// This class provides the information for the target register banks.
-class AArch64RegisterBankInfo final : public RegisterBankInfo {
+class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
/// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBanks.td b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBanks.td
new file mode 100644
index 000000000000..c2b6c0b04e9b
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBanks.td
@@ -0,0 +1,20 @@
+//=- AArch64RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+/// General Purpose Registers: W, X.
+def GPRRegBank : RegisterBank<"GPR", [GPR64all]>;
+
+/// Floating Point/Vector Registers: B, H, S, D, Q.
+def FPRRegBank : RegisterBank<"FPR", [QQQQ]>;
+
+/// Conditional register: NZCV.
+def CCRRegBank : RegisterBank<"CCR", [CCR]>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 98fad71aa18a..baf15ac540cf 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -118,25 +118,17 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
- markSuperRegs(Reserved, AArch64::SP);
- markSuperRegs(Reserved, AArch64::XZR);
markSuperRegs(Reserved, AArch64::WSP);
markSuperRegs(Reserved, AArch64::WZR);
- if (TFI->hasFP(MF) || TT.isOSDarwin()) {
- markSuperRegs(Reserved, AArch64::FP);
+ if (TFI->hasFP(MF) || TT.isOSDarwin())
markSuperRegs(Reserved, AArch64::W29);
- }
- if (MF.getSubtarget<AArch64Subtarget>().isX18Reserved()) {
- markSuperRegs(Reserved, AArch64::X18); // Platform register
- markSuperRegs(Reserved, AArch64::W18);
- }
+ if (MF.getSubtarget<AArch64Subtarget>().isX18Reserved())
+ markSuperRegs(Reserved, AArch64::W18); // Platform register
- if (hasBasePointer(MF)) {
- markSuperRegs(Reserved, AArch64::X19);
+ if (hasBasePointer(MF))
markSuperRegs(Reserved, AArch64::W19);
- }
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedA53.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedA53.td
index 93ca079275c8..18d000ace94c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedA53.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedA53.td
@@ -13,7 +13,7 @@
// ===---------------------------------------------------------------------===//
// The following definitions describe the simpler per-operand machine model.
-// This works with MachineScheduler. See MCSchedModel.h for details.
+// This works with MachineScheduler. See MCSchedule.h for details.
// Cortex-A53 machine model for scheduling and other instruction cost heuristics.
def CortexA53Model : SchedMachineModel {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td
index 99c48d0146e4..303398ea0b7f 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td
@@ -162,7 +162,9 @@ def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>;
// Cryptography Extensions
// -----------------------------------------------------------------------------
-def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>;
+def A57ReadAES : SchedReadAdvance<3, [A57Write_3cyc_1W]>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^AES[DE]")>;
+def : InstRW<[A57Write_3cyc_1W, A57ReadAES], (instregex "^AESI?MC")>;
def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
index 19a6d6f2a1ad..eec089087fe0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
@@ -17,10 +17,112 @@
// instruction cost model.
def FalkorModel : SchedMachineModel {
- let IssueWidth = 4; // 4-wide issue for expanded uops.
+ let IssueWidth = 8; // 8 uops are dispatched per cycle.
let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer.
let LoopMicroOpBufferSize = 16;
let LoadLatency = 3; // Optimistic load latency.
let MispredictPenalty = 11; // Minimum branch misprediction penalty.
- let CompleteModel = 0;
+ let CompleteModel = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Falkor.
+
+let SchedModel = FalkorModel in {
+
+ def FalkorUnitB : ProcResource<1>; // Branch
+ def FalkorUnitLD : ProcResource<1>; // Load pipe
+ def FalkorUnitSD : ProcResource<1>; // Store data
+ def FalkorUnitST : ProcResource<1>; // Store pipe
+ def FalkorUnitX : ProcResource<1>; // Complex arithmetic
+ def FalkorUnitY : ProcResource<1>; // Simple arithmetic
+ def FalkorUnitZ : ProcResource<1>; // Simple arithmetic
+
+ def FalkorUnitVSD : ProcResource<1>; // Vector store data
+ def FalkorUnitVX : ProcResource<1>; // Vector X-pipe
+ def FalkorUnitVY : ProcResource<1>; // Vector Y-pipe
+
+ def FalkorUnitGTOV : ProcResource<1>; // Scalar to Vector
+ def FalkorUnitVTOG : ProcResource<1>; // Vector to Scalar
+
+ // Define the resource groups.
+ def FalkorUnitXY : ProcResGroup<[FalkorUnitX, FalkorUnitY]>;
+ def FalkorUnitXYZ : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ]>;
+ def FalkorUnitXYZB : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ,
+ FalkorUnitB]>;
+ def FalkorUnitZB : ProcResGroup<[FalkorUnitZ, FalkorUnitB]>;
+ def FalkorUnitVXVY : ProcResGroup<[FalkorUnitVX, FalkorUnitVY]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latency for
+// Falkor.
+
+let SchedModel = FalkorModel in {
+
+def : WriteRes<WriteImm, [FalkorUnitXYZ]> { let Latency = 1; }
+def : WriteRes<WriteI, [FalkorUnitXYZ]> { let Latency = 1; }
+def : WriteRes<WriteISReg, [FalkorUnitVXVY, FalkorUnitVXVY]>
+ { let Latency = 1; let NumMicroOps = 2; }
+def : WriteRes<WriteIEReg, [FalkorUnitXYZ, FalkorUnitXYZ]>
+ { let Latency = 2; let NumMicroOps = 2; }
+def : WriteRes<WriteExtr, [FalkorUnitXYZ, FalkorUnitXYZ]>
+ { let Latency = 2; let NumMicroOps = 2; }
+def : WriteRes<WriteIS, [FalkorUnitXYZ]> { let Latency = 1; }
+def : WriteRes<WriteID32, [FalkorUnitX, FalkorUnitZ]>
+ { let Latency = 8; let NumMicroOps = 2; }
+def : WriteRes<WriteID64, [FalkorUnitX, FalkorUnitZ]>
+ { let Latency = 16; let NumMicroOps = 2; }
+def : WriteRes<WriteIM32, [FalkorUnitX]> { let Latency = 4; }
+def : WriteRes<WriteIM64, [FalkorUnitX]> { let Latency = 5; }
+def : WriteRes<WriteBr, [FalkorUnitB]> { let Latency = 1; }
+def : WriteRes<WriteBrReg, [FalkorUnitB]> { let Latency = 1; }
+def : WriteRes<WriteLD, [FalkorUnitLD]> { let Latency = 3; }
+def : WriteRes<WriteST, [FalkorUnitLD, FalkorUnitST, FalkorUnitSD]>
+ { let Latency = 3; let NumMicroOps = 3; }
+def : WriteRes<WriteSTP, [FalkorUnitST, FalkorUnitSD]>
+ { let Latency = 0; let NumMicroOps = 2; }
+def : WriteRes<WriteAdr, [FalkorUnitXYZ]> { let Latency = 5; }
+def : WriteRes<WriteLDIdx, [FalkorUnitLD]> { let Latency = 5; }
+def : WriteRes<WriteSTIdx, [FalkorUnitLD, FalkorUnitST, FalkorUnitSD]>
+ { let Latency = 4; let NumMicroOps = 3; }
+def : WriteRes<WriteF, [FalkorUnitVXVY, FalkorUnitVXVY]>
+ { let Latency = 3; let NumMicroOps = 2; }
+def : WriteRes<WriteFCmp, [FalkorUnitVXVY]> { let Latency = 2; }
+def : WriteRes<WriteFCvt, [FalkorUnitVXVY]> { let Latency = 4; }
+def : WriteRes<WriteFCopy, [FalkorUnitVXVY]> { let Latency = 4; }
+def : WriteRes<WriteFImm, [FalkorUnitVXVY]> { let Latency = 4; }
+def : WriteRes<WriteFMul, [FalkorUnitVXVY, FalkorUnitVXVY]>
+ { let Latency = 6; let NumMicroOps = 2; }
+def : WriteRes<WriteFDiv, [FalkorUnitVXVY, FalkorUnitVXVY]>
+ { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
+def : WriteRes<WriteV, [FalkorUnitVXVY]> { let Latency = 6; }
+def : WriteRes<WriteVLD, [FalkorUnitLD]> { let Latency = 3; }
+def : WriteRes<WriteVST, [FalkorUnitST, FalkorUnitVSD]>
+ { let Latency = 0; let NumMicroOps = 2; }
+
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteLDHi, []> { let Latency = 3; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// No forwarding logic is modelled yet.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+// Detailed Refinements
+// -----------------------------------------------------------------------------
+include "AArch64SchedFalkorDetails.td"
+
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
new file mode 100644
index 000000000000..6bce4ef6b652
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -0,0 +1,523 @@
+//==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the uop and latency details for the machine model for the
+// Qualcomm Falkor subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+include "AArch64SchedFalkorWriteRes.td"
+
+//===----------------------------------------------------------------------===//
+// Specialize the coarse model by associating instruction groups with the
+// subtarget-defined types. As the modeled is refined, this will override most
+// of the earlier mappings.
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// SIMD Floating-point Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v4f16|v2i16p|v2i32p)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(16|32|64)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(16|32|64|v2f32|v4f16|v2i32|v4i16)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i16|v1i32|v1i64|v2i32|v4i16)rz$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?V(v4i16|v4i32|v8i16)v$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)(v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i16p|v2i32p|v2i64p|v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i32|v4i16)(_shift)?$")>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>;
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instrs FMULX16, FMULX32)>;
+
+def : InstRW<[FalkorWr_1VXVY_6cyc], (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
+def : InstRW<[FalkorWr_1VXVY_6cyc], (instrs FMULX64)>;
+
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32|v8f16)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v8f16|v2i64p)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32|v8i16)rz$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32|v8f16)$")>;
+
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)(v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32|v8f16)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32|v8f16)$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(FCVTL|FCVTL2)(v2i32|v4i16|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32|v8i16)(_shift)?$")>;
+
+def : InstRW<[FalkorWr_2VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
+
+def : InstRW<[FalkorWr_3VXVY_4cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v2i32|v4i16|v4i32|v8i16|v4f32)$")>;
+
+def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32|v8f16)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc, FalkorReadVMA],(instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc, FalkorReadVMA],(instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^FML(A|S)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>;
+def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^FML(A|S)v1i64_indexed$")>;
+def : InstRW<[FalkorWr_2VXVY_5cyc, FalkorReadFMA],(instregex "^FML(A|S)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>;
+def : InstRW<[FalkorWr_2VXVY_6cyc, FalkorReadFMA],(instregex "^FML(A|S)v2i64_indexed$")>;
+// SIMD Integer Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs ADDPv2i64p)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v8i8$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIC|ORR)(v2i32|v4i16)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^NEG(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHLv1i64$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs PMULv8i8)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(ABD|ADALP)(v8i8|v4i16|v2i32)(_v.*)?$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)ADDLVv4i16v$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(s|h|b)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs ADDVv4i16v)>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQABS(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)ADDLVv8i8v$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQDMULL(i16|i32)$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQRDML(A|S)?H(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs ADDVv4i32v)>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs ADDVv8i16v)>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(ADD|SUB)HNv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)ABA(v2i32|v4i16|v8i8)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_5cyc], (instrs ADDVv16i8v)>;
+
+def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32)_shift?$")>;
+def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^R(ADD|SUB)HNv.*$")>;
+
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^ADD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs ADDPv2i64)>; // sz==11
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v16i8$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIC|ORR)(v8i16|v4i32)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(NEG|SUB)(v16i8|v8i16|v4i32|v2i64)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)ADDLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v16i8|v2i64|v4i32|v8i16)(_v.*)?$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHR(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SUBLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; // sz!=11
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL2?(v8i8|v16i8)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABDLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16|v4i32|v2i64)(_v.*)?$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL2?(v1i64|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^SQDMULLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>;
+
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(S|U)ADDLVv8i16v$")>;
+
+def : InstRW<[FalkorWr_3VXVY_6cyc], (instregex "^(S|U)ADDLVv16i8v$")>;
+
+def : InstRW<[FalkorWr_4VXVY_2cyc], (instregex "^(S|U)(ADD|SUB)Wv.*$")>;
+
+def : InstRW<[FalkorWr_4VXVY_3cyc], (instregex "^(S|U)ABALv.*$")>;
+
+def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc, FalkorReadVMA],(instregex "^SQD(MLAL|MLSL)(i16|i32)$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc, FalkorReadVMA],(instregex "^SQD(MLAL|MLSL)v.*$")>;
+// SIMD Load Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[WriteVLD], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
+def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD], (instrs LD2i64)>;
+def : InstRW<[WriteVLD, WriteAdr], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
+def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteVLD, WriteAdr], (instrs LD2i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "LD1i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD3i64)>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD4i64)>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD3i64_POST)>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD4i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, WriteAdr], (instregex "^LD2i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_3LD_3cyc], (instrs LD3Threev2d)>;
+def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instrs LD3Threev2d_POST)>;
+def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_3VXVY_4cyc], (instregex "LD3i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, WriteAdr], (instregex "LD3i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_4LD_3cyc], (instrs LD4Fourv2d)>;
+def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instrs LD4Fourv2d_POST)>;
+def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, WriteAdr], (instregex "^LD4i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, WriteAdr],(instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc], (instregex "^LD4Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, WriteAdr],(instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "LD3Threev(16b|8h|4s)$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD4Fourv(16b|8h|4s)$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, WriteAdr],(instregex "LD3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, WriteAdr],(instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
+
+// Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_ADD], (instregex "^ADD(S)?(W|X)r(s|x)$")>;
+def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^SUB(S)?(W|X)r(s|x)$")>;
+
+// SIMD Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN|XTN2)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
+
+def : InstRW<[FalkorWr_1VXVY_6cyc], (instrs FRECPS64, FRSQRTS64)>;
+
+def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],(instregex "^INSv(i32|i64)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>;
+
+def : InstRW<[FalkorWr_2VXVY_5cyc], (instrs FRECPSv4f32, FRSQRTSv4f32)>;
+
+def : InstRW<[FalkorWr_2VXVY_6cyc], (instrs FRECPSv2f64, FRSQRTSv2f64)>;
+
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>;
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>;
+
+def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBL(v8i8Four|v16i8Three)$")>;
+def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBX(v8i8Three|v16i8Three)$")>;
+
+def : InstRW<[FalkorWr_5VXVY_7cyc], (instrs TBLv16i8Four)>;
+def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>;
+
+// SIMD Store Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[WriteVST], (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[WriteVST], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>;
+def : InstRW<[WriteVST, WriteAdr], (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
+def : InstRW<[WriteVST, WriteAdr], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>;
+
+def : InstRW<[WriteVST, WriteVST], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[WriteVST, WriteVST], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[WriteVST, WriteVST], (instregex "^ST3(i8|i16|i32|i64)$")>;
+def : InstRW<[WriteVST, WriteVST], (instregex "^ST4(i8|i16|i32|i64)$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
+
+def : InstRW<[WriteV, WriteVST, WriteVST], (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>;
+def : InstRW<[WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>;
+
+def : InstRW<[WriteVST, WriteVST, WriteVST], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST], (instrs ST3Threev2d)>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST3Threev2d_POST)>;
+
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST], (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>;
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>;
+
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instrs ST4Fourv2d)>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST4Fourv2d_POST)>;
+
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST3Three(v16b|v8h|v4s)$")>;
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
+
+def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST4Four(v16b|v8h|v4s)$")>;
+def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1none_0cyc], (instrs B)>;
+def : InstRW<[FalkorWr_1Z_0cyc], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>;
+def : InstRW<[FalkorWr_1ZB_0cyc], (instrs Bcc)>;
+def : InstRW<[FalkorWr_1XYZB_0cyc], (instrs BL)>;
+def : InstRW<[FalkorWr_1Z_1XY_0cyc], (instrs BLR)>;
+
+// Cryptography Extensions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs SHA1Hrr)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs AESIMCrr, AESMCrr)>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs AESDrr, AESErr)>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>;
+def : InstRW<[FalkorWr_1VX_1VY_4cyc], (instregex "^SHA1(C|M|P)rrr$")>;
+def : InstRW<[FalkorWr_1VX_1VY_5cyc], (instrs SHA256H2rrr, SHA256Hrrr)>;
+def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>;
+
+// FP Load Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[WriteLD], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
+def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
+def : InstRW<[WriteLD], (instregex "^LDUR(Q|D|S|H|B)i$")>;
+def : InstRW<[FalkorWr_LDR], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDNPQi)>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDPQi)>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDNP(D|S)i$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDP(D|S)i$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi, WriteAdr],(instregex "LDP(D|S)(pre|post)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi, WriteAdr],(instregex "^LDPQ(pre|post)$")>;
+
+// FP Data Processing Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(H|S|D)r(r|i)$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P)(S|U)U(W|X)(H|S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(H|S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(H|S|D)rrr$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(16|32|64)p$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTHSr, FCVTHDr)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(H|S|D)r$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(16|32|64)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTSHr, FCVTDHr)>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^F(N)?MUL(H|S)rr$")>;
+
+def : InstRW<[FalkorWr_1VXVY_6cyc], (instregex "^F(N)?MULDrr$")>;
+
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(H|S|D)r$")>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>;
+def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)Drrr$")>;
+// FP Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(S|U)(W|X)(D|S)ri?$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(WH|WS|XH|XD|XDHigh)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Hi|Hr|S0|Si|Sr|D0|Di|Dr|v.*_ns)$")>;
+
+def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i16|v1i32|v2i32|v1i64|v4i16|v2f32|v4f16|d|s)(_shift)?")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v2f64|v4f32|v8f16)(_shift)?")>;
+
+
+// Load Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>;
+def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>;
+
+def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDNP(W|X)i$")>;
+def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDP(W|X)i$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(B|H|W|X)ui$")>;
+def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(B|H|W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(W|X)l$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDTR(B|H|W|X)i$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(B|H|W|X)i$")>;
+
+def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
+def : InstRW<[FalkorWr_1LD_4cyc], (instrs LDRSWl)>;
+def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
+def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
+
+def : InstRW<[FalkorWr_PRFM], (instregex "^PRFMro(W|X)$")>;
+def : InstRW<[FalkorWr_LDR], (instregex "^LDR(B|H|W|X)ro(W|X)$")>;
+
+def : InstRW<[FalkorWr_LDRS], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
+
+def : InstRW<[FalkorWr_1LD_4cyc, WriteAdr],(instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
+def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi],(instrs LDPSWi)>;
+def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi, WriteAdr],(instregex "^LDPSW(post|pre)$")>;
+// Miscellaneous Data-Processing Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>;
+def : InstRW<[FalkorWr_1X_2cyc], (instregex "^CRC32.*$")>;
+def : InstRW<[FalkorWr_1XYZ_2cyc], (instregex "^(CLS|CLZ|RBIT|REV|REV16|REV32)(W|X)r$")>;
+def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^EXTR(W|X)rri$")>;
+
+// Divide and Multiply Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1X_4cyc], (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
+def : InstRW<[FalkorWr_1X_4cyc], (instregex "^M(ADD|SUB)Wrrr$")>;
+
+def : InstRW<[FalkorWr_1X_5cyc], (instregex "^(S|U)MULHrr$")>;
+def : InstRW<[FalkorWr_1X_5cyc], (instregex "^M(ADD|SUB)Xrrr$")>;
+
+def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>;
+def : InstRW<[FalkorWr_1X_1Z_16cyc], (instregex "^(S|U)DIVXr$")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)(MLAL|MLSL|MULL)v.*$")>;
+
+// Move and Shift Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV|MOVK)(W|X).*")>;
+def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^ADRP?$")>;
+def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^MOVN(W|X)i$")>;
+def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>;
+
+// Other Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1LD_0cyc], (instrs CLREX, DMB, DSB)>;
+def : InstRW<[FalkorWr_1none_0cyc], (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HLT, HVC, ISB, SMC, SVC)>;
+def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>;
+def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>;
+
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^(LDAR(B|H|W|X)|LDAXP(W|X)|LDAXR(B|H|W|X)|LDXP(W|X)|LDXR(B|H|W|X))$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS)>;
+
+def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>;
+
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
+def : InstRW<[WriteVST], (instrs STNPDi, STNPSi)>;
+def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>;
+def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>;
+
+def : InstRW<[WriteST], (instregex "^LDC.*$")>;
+def : InstRW<[WriteST], (instregex "^STLR(B|H|W|X)$")>;
+def : InstRW<[WriteST], (instregex "^STXP(W|X)$")>;
+def : InstRW<[WriteST], (instregex "^STXR(B|H|W|X)$")>;
+
+def : InstRW<[WriteSTX], (instregex "^STLXP(W|X)$")>;
+def : InstRW<[WriteSTX], (instregex "^STLXR(B|H|W|X)$")>;
+def : InstRW<[WriteVST, WriteVST], (instrs STNPQi)>;
+
+// Store Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[WriteVST], (instregex "^STP(D|S)(i|post|pre)$")>;
+def : InstRW<[WriteST], (instregex "^STP(W|X)(i|post|pre)$")>;
+def : InstRW<[WriteST], (instregex "^STR(Q|D|S|BB|HH)ui$")>;
+def : InstRW<[WriteST], (instregex "^STUR(Q|D|S|BB|HH)i$")>;
+def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)(post|pre|ui)$")>;
+def : InstRW<[WriteST], (instregex "^STTR(B|H|W|X)i$")>;
+def : InstRW<[WriteST], (instregex "^STUR(B|H|W|X)i$")>;
+
+def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)ro(W|X)$")>;
+
+def : InstRW<[WriteVST, WriteVST], (instregex "^STPQ(i|post|pre)$")>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
new file mode 100644
index 000000000000..9cdb4be4246b
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
@@ -0,0 +1,361 @@
+//=- AArch64SchedFalkorWrRes.td - Falkor Write Res ---*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains all of the Falkor specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+// Prefix: FalkorWr
+// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD)
+// Latency: #cyc
+//
+// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued
+// down one Z pipe, six SD pipes, four VX pipes and the total latency is
+// six cycles.
+//
+// Contains all of the Falkor specific ReadAdvance types for forwarding logic.
+//
+// Contains all of the Falkor specific WriteVariant types for immediate zero
+// and LSLFast.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Define 1 micro-op types
+
+
+def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; }
+def FalkorWr_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
+def FalkorWr_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; }
+def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; }
+def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; }
+def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; }
+def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; }
+def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
+def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
+def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
+def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
+def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; }
+
+def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
+def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
+def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; }
+def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
+def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
+def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
+
+def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; }
+def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; }
+def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; }
+
+def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
+def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
+def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
+
+//===----------------------------------------------------------------------===//
+// Define 2 micro-op types
+
+def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
+ let Latency = 8;
+ let ResourceCycles = [2, 8];
+}
+
+def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
+ let Latency = 16;
+ let ResourceCycles = [2, 16];
+}
+
+def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 3 micro-op types
+
+def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitZ]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 4 micro-op types
+
+def FalkorWr_2VX_2VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
+ FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 5 micro-op types
+
+def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+}
+def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+}
+def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 6 micro-op types
+
+def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 8 micro-op types
+
+def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 8;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 9 micro-op types
+
+def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitXYZ,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 9;
+}
+
+def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitXYZ,
+ FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 9;
+}
+
+// Forwarding logic is modeled for vector multiply and accumulate
+// -----------------------------------------------------------------------------
+def FalkorReadVMA : SchedReadAdvance<2, [FalkorWr_1VXVY_4cyc,
+ FalkorWr_2VXVY_4cyc]>;
+def FalkorReadFMA : SchedReadAdvance<3, [FalkorWr_1VXVY_5cyc,
+ FalkorWr_1VXVY_6cyc,
+ FalkorWr_2VXVY_5cyc,
+ FalkorWr_2VXVY_6cyc]>;
+
+// SchedPredicates and WriteVariants for Immediate Zero and LSLFast
+// -----------------------------------------------------------------------------
+def FalkorImmZPred : SchedPredicate<[{TII->isGPRZero(*MI)}]>;
+def FalkorLSLFastPred : SchedPredicate<[{TII->isFalkorLSLFast(*MI)}]>;
+
+def FalkorWr_FMOV : SchedWriteVariant<[
+ SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1GTOV_1cyc]>]>;
+
+def FalkorWr_MOVZ : SchedWriteVariant<[
+ SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZB_1cyc]>]>;
+
+def FalkorWr_LDR : SchedWriteVariant<[
+ SchedVar<FalkorLSLFastPred, [FalkorWr_1LD_3cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_4cyc]>]>;
+
+def FalkorWr_ADD : SchedWriteVariant<[
+ SchedVar<FalkorLSLFastPred, [FalkorWr_1XYZ_1cyc]>,
+ SchedVar<FalkorImmZPred, [FalkorWr_1XYZ_1cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_2XYZ_2cyc]>]>;
+
+def FalkorWr_PRFM : SchedWriteVariant<[
+ SchedVar<FalkorLSLFastPred, [FalkorWr_1ST_3cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>;
+
+def FalkorWr_LDRS : SchedWriteVariant<[
+ SchedVar<FalkorLSLFastPred, [FalkorWr_1LD_4cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedKryoDetails.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedKryoDetails.td
index 426ae6103e4b..02cccccd3078 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedKryoDetails.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedKryoDetails.td
@@ -776,23 +776,29 @@ def KryoWrite_4cyc_X_X_115ln :
}
def : InstRW<[KryoWrite_4cyc_X_X_115ln],
(instregex "FCVTZ(S|U)(v2f64|v4f32|(v2i64|v4i32)(_shift)?)$")>;
-def KryoWrite_1cyc_XA_Y_noRSV_43ln :
+def KryoWrite_10cyc_XA_Y_noRSV_43ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 3;
+ let Latency = 10; let NumMicroOps = 3;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_43ln],
- (instrs FDIVDrr, FDIVSrr)>;
-def KryoWrite_1cyc_XA_Y_noRSV_121ln :
+def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_43ln],
+ (instrs FDIVSrr)>;
+def KryoWrite_14cyc_XA_Y_noRSV_43ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 3;
+ let Latency = 14; let NumMicroOps = 3;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_121ln],
+def : InstRW<[KryoWrite_14cyc_XA_Y_noRSV_43ln],
+ (instrs FDIVDrr)>;
+def KryoWrite_10cyc_XA_Y_noRSV_121ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
+ let Latency = 10; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_121ln],
(instrs FDIVv2f32)>;
-def KryoWrite_1cyc_XA_Y_XA_Y_123ln :
+def KryoWrite_14cyc_XA_Y_XA_Y_123ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 4;
+ let Latency = 14; let NumMicroOps = 4;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_XA_Y_123ln],
+def : InstRW<[KryoWrite_14cyc_XA_Y_XA_Y_123ln],
(instrs FDIVv2f64, FDIVv4f32)>;
def KryoWrite_5cyc_X_noRSV_55ln :
SchedWriteRes<[KryoUnitX]> {
@@ -968,24 +974,36 @@ def KryoWrite_2cyc_XY_XY_109ln :
}
def : InstRW<[KryoWrite_2cyc_XY_XY_109ln],
(instregex "FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)")>;
-def KryoWrite_1cyc_XA_Y_noRSV_42ln :
+def KryoWrite_12cyc_XA_Y_noRSV_42ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 3;
+ let Latency = 12; let NumMicroOps = 3;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_42ln],
- (instregex "FSQRT(S|D)r")>;
-def KryoWrite_1cyc_XA_Y_noRSV_120ln :
+def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_42ln],
+ (instrs FSQRTSr)>;
+def KryoWrite_21cyc_XA_Y_noRSV_42ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 3;
+ let Latency = 21; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_21cyc_XA_Y_noRSV_42ln],
+ (instrs FSQRTDr)>;
+def KryoWrite_12cyc_XA_Y_noRSV_120ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
+ let Latency = 12; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_120ln],
+ (instrs FSQRTv2f32)>;
+def KryoWrite_21cyc_XA_Y_XA_Y_122ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
+ let Latency = 21; let NumMicroOps = 4;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_120ln],
- (instregex "FSQRTv2f32")>;
-def KryoWrite_1cyc_XA_Y_XA_Y_122ln :
+def : InstRW<[KryoWrite_21cyc_XA_Y_XA_Y_122ln],
+ (instrs FSQRTv4f32)>;
+def KryoWrite_36cyc_XA_Y_XA_Y_122ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 4;
+ let Latency = 36; let NumMicroOps = 4;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_XA_Y_122ln],
- (instregex "FSQRT(v2f64|v4f32)")>;
+def : InstRW<[KryoWrite_36cyc_XA_Y_XA_Y_122ln],
+ (instrs FSQRTv2f64)>;
def KryoWrite_1cyc_X_201ln :
SchedWriteRes<[KryoUnitX]> {
let Latency = 1; let NumMicroOps = 1;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td
index 14d6891253fa..3fbbc0be682d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td
@@ -366,7 +366,8 @@ def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>;
// Cryptography instructions.
def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
-def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AES")>;
+def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
+def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>;
def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
new file mode 100644
index 000000000000..9a0cb702518d
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
@@ -0,0 +1,352 @@
+//==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM ThunderX T8X
+// (T88, T81, T83) processors.
+// Loosely based on Cortex-A53 which is somewhat similar.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details.
+
+// Cavium ThunderX T8X scheduling machine model.
+def ThunderXT8XModel : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops dispatched per cycle.
+ let MicroOpBufferSize = 0; // ThunderX T88/T81/T83 are in-order.
+ let LoadLatency = 3; // Optimistic load latency.
+ let MispredictPenalty = 8; // Branch mispredict penalty.
+ let PostRAScheduler = 1; // Use PostRA scheduler.
+ let CompleteModel = 1;
+}
+
+// Modeling each pipeline with BufferSize == 0 since T8X is in-order.
+def THXT8XUnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
+def THXT8XUnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
+def THXT8XUnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
+def THXT8XUnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store
+def THXT8XUnitBr : ProcResource<1> { let BufferSize = 0; } // Branch
+def THXT8XUnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU
+def THXT8XUnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mul/Div/Sqrt
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types mapping the ProcResources and
+// latencies.
+
+let SchedModel = ThunderXT8XModel in {
+
+// ALU
+def : WriteRes<WriteImm, [THXT8XUnitALU]> { let Latency = 1; }
+def : WriteRes<WriteI, [THXT8XUnitALU]> { let Latency = 1; }
+def : WriteRes<WriteISReg, [THXT8XUnitALU]> { let Latency = 2; }
+def : WriteRes<WriteIEReg, [THXT8XUnitALU]> { let Latency = 2; }
+def : WriteRes<WriteIS, [THXT8XUnitALU]> { let Latency = 2; }
+def : WriteRes<WriteExtr, [THXT8XUnitALU]> { let Latency = 2; }
+
+// MAC
+def : WriteRes<WriteIM32, [THXT8XUnitMAC]> {
+ let Latency = 4;
+ let ResourceCycles = [1];
+}
+
+def : WriteRes<WriteIM64, [THXT8XUnitMAC]> {
+ let Latency = 4;
+ let ResourceCycles = [1];
+}
+
+// Div
+def : WriteRes<WriteID32, [THXT8XUnitDiv]> {
+ let Latency = 12;
+ let ResourceCycles = [6];
+}
+
+def : WriteRes<WriteID64, [THXT8XUnitDiv]> {
+ let Latency = 14;
+ let ResourceCycles = [8];
+}
+
+// Load
+def : WriteRes<WriteLD, [THXT8XUnitLdSt]> { let Latency = 3; }
+def : WriteRes<WriteLDIdx, [THXT8XUnitLdSt]> { let Latency = 3; }
+def : WriteRes<WriteLDHi, [THXT8XUnitLdSt]> { let Latency = 3; }
+
+// Vector Load
+def : WriteRes<WriteVLD, [THXT8XUnitLdSt]> {
+ let Latency = 8;
+ let ResourceCycles = [3];
+}
+
+def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 6;
+ let ResourceCycles = [1];
+}
+
+def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 11;
+ let ResourceCycles = [7];
+}
+
+def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 12;
+ let ResourceCycles = [8];
+}
+
+def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 13;
+ let ResourceCycles = [9];
+}
+
+def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 13;
+ let ResourceCycles = [9];
+}
+
+// Pre/Post Indexing
+def : WriteRes<WriteAdr, []> { let Latency = 0; }
+
+// Store
+def : WriteRes<WriteST, [THXT8XUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTP, [THXT8XUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTIdx, [THXT8XUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTX, [THXT8XUnitLdSt]> { let Latency = 1; }
+
+// Vector Store
+def : WriteRes<WriteVST, [THXT8XUnitLdSt]>;
+def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]>;
+
+def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 10;
+ let ResourceCycles = [9];
+}
+
+def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 11;
+ let ResourceCycles = [10];
+}
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// Branch
+def : WriteRes<WriteBr, [THXT8XUnitBr]>;
+def THXT8XWriteBR : SchedWriteRes<[THXT8XUnitBr]>;
+def : WriteRes<WriteBrReg, [THXT8XUnitBr]>;
+def THXT8XWriteBRR : SchedWriteRes<[THXT8XUnitBr]>;
+def THXT8XWriteRET : SchedWriteRes<[THXT8XUnitALU]>;
+def : WriteRes<WriteSys, [THXT8XUnitBr]>;
+def : WriteRes<WriteBarrier, [THXT8XUnitBr]>;
+def : WriteRes<WriteHint, [THXT8XUnitBr]>;
+
+// FP ALU
+def : WriteRes<WriteF, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; }
+
+// FP Mul, Div, Sqrt
+def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
+def : WriteRes<WriteFDiv, [THXT8XUnitFPMDS]> {
+ let Latency = 22;
+ let ResourceCycles = [19];
+}
+
+def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; }
+
+def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 12;
+ let ResourceCycles = [9];
+}
+
+def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 22;
+ let ResourceCycles = [19];
+}
+
+def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 17;
+ let ResourceCycles = [14];
+}
+
+def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 31;
+ let ResourceCycles = [28];
+}
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types.
+
+// No forwarding for these reads.
+def : ReadAdvance<ReadExtrHi, 1>;
+def : ReadAdvance<ReadAdrBase, 2>;
+def : ReadAdvance<ReadVLD, 2>;
+
+// FIXME: This needs more targeted benchmarking.
+// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
+// operands are needed one cycle later if and only if they are to be
+// shifted. Otherwise, they too are needed two cycles later. This same
+// ReadAdvance applies to Extended registers as well, even though there is
+// a separate SchedPredicate for them.
+def : ReadAdvance<ReadI, 2, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def THXT8XReadShifted : SchedReadAdvance<1, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def THXT8XReadNotShifted : SchedReadAdvance<2, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def THXT8XReadISReg : SchedReadVariant<[
+ SchedVar<RegShiftedPred, [THXT8XReadShifted]>,
+ SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>;
+def : SchedAlias<ReadISReg, THXT8XReadISReg>;
+
+def THXT8XReadIEReg : SchedReadVariant<[
+ SchedVar<RegExtendedPred, [THXT8XReadShifted]>,
+ SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>;
+def : SchedAlias<ReadIEReg, THXT8XReadIEReg>;
+
+// MAC - Operands are generally needed one cycle later in the MAC pipe.
+// Accumulator operands are needed two cycles later.
+def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadIMA, 2, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+
+// Div
+def : ReadAdvance<ReadID, 1, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific InstRW.
+
+//---
+// Branch
+//---
+def : InstRW<[THXT8XWriteBR], (instregex "^B")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^BL")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^B.*")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>;
+def : InstRW<[THXT8XWriteBRR], (instregex "^BR")>;
+def : InstRW<[THXT8XWriteBRR], (instregex "^BLR")>;
+
+//---
+// Ret
+//---
+def : InstRW<[THXT8XWriteRET], (instregex "^RET")>;
+
+//---
+// Miscellaneous
+//---
+def : InstRW<[WriteI], (instrs COPY)>;
+
+//---
+// Vector Loads
+//---
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
+
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVLD3], (instregex "LD3Threev(2d)$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD4i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD4Fourv(2d)$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+
+//---
+// Vector Stores
+//---
+def : InstRW<[THXT8XWriteVST1], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVST1], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVST2], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST3Threev(2d)$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVST2], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+
+//---
+// Floating Point MAC, DIV, SQRT
+//---
+def : InstRW<[THXT8XWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
+def : InstRW<[THXT8XWriteFMAC], (instregex "^FML(A|S).*")>;
+def : InstRW<[THXT8XWriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[THXT8XWriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[THXT8XWriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[THXT8XWriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[THXT8XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[THXT8XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedVulcan.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index 35a40c314bf4..3654eeca530a 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedVulcan.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -1,4 +1,4 @@
-//=- AArch64SchedVulcan.td - Vulcan Scheduling Defs ----------*- tablegen -*-=//
+//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 Scheduling ---*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -6,23 +6,23 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-// 1. Introduction
//
-// This file defines the machine model for Broadcom Vulcan to support
-// instruction scheduling and other instruction cost heuristics.
+// This file defines the scheduling model for Cavium ThunderX2T99
+// processors.
+// Based on Broadcom Vulcan.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// 2. Pipeline Description.
-def VulcanModel : SchedMachineModel {
+def ThunderX2T99Model : SchedMachineModel {
let IssueWidth = 4; // 4 micro-ops dispatched at a time.
let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer.
let LoadLatency = 4; // Optimistic load latency.
let MispredictPenalty = 12; // Extra cycles for mispredicted branch.
// Determined via a mix of micro-arch details and experimentation.
- let LoopMicroOpBufferSize = 32;
+ let LoopMicroOpBufferSize = 32;
let PostRAScheduler = 1; // Using PostRA sched.
let CompleteModel = 1;
}
@@ -30,155 +30,155 @@ def VulcanModel : SchedMachineModel {
// Define the issue ports.
// Port 0: ALU, FP/SIMD.
-def VulcanP0 : ProcResource<1>;
+def THX2T99P0 : ProcResource<1>;
// Port 1: ALU, FP/SIMD, integer mul/div.
-def VulcanP1 : ProcResource<1>;
+def THX2T99P1 : ProcResource<1>;
// Port 2: ALU, Branch.
-def VulcanP2 : ProcResource<1>;
+def THX2T99P2 : ProcResource<1>;
// Port 3: Store data.
-def VulcanP3 : ProcResource<1>;
+def THX2T99P3 : ProcResource<1>;
// Port 4: Load/store.
-def VulcanP4 : ProcResource<1>;
+def THX2T99P4 : ProcResource<1>;
// Port 5: Load/store.
-def VulcanP5 : ProcResource<1>;
+def THX2T99P5 : ProcResource<1>;
-let SchedModel = VulcanModel in {
+let SchedModel = ThunderX2T99Model in {
// Define groups for the functional units on each issue port. Each group
// created will be used by a WriteRes later on.
//
// NOTE: Some groups only contain one member. This is a way to create names for
// the various functional units that share a single issue port. For example,
-// "VulcanI1" for ALU ops on port 1 and "VulcanF1" for FP ops on port 1.
+// "THX2T99I1" for ALU ops on port 1 and "THX2T99F1" for FP ops on port 1.
// Integer divide and multiply micro-ops only on port 1.
-def VulcanI1 : ProcResGroup<[VulcanP1]>;
+def THX2T99I1 : ProcResGroup<[THX2T99P1]>;
// Branch micro-ops only on port 2.
-def VulcanI2 : ProcResGroup<[VulcanP2]>;
+def THX2T99I2 : ProcResGroup<[THX2T99P2]>;
// ALU micro-ops on ports 0, 1, and 2.
-def VulcanI012 : ProcResGroup<[VulcanP0, VulcanP1, VulcanP2]>;
+def THX2T99I012 : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2]>;
// Crypto FP/SIMD micro-ops only on port 1.
-def VulcanF1 : ProcResGroup<[VulcanP1]>;
+def THX2T99F1 : ProcResGroup<[THX2T99P1]>;
// FP/SIMD micro-ops on ports 0 and 1.
-def VulcanF01 : ProcResGroup<[VulcanP0, VulcanP1]>;
+def THX2T99F01 : ProcResGroup<[THX2T99P0, THX2T99P1]>;
// Store data micro-ops only on port 3.
-def VulcanSD : ProcResGroup<[VulcanP3]>;
+def THX2T99SD : ProcResGroup<[THX2T99P3]>;
// Load/store micro-ops on ports 4 and 5.
-def VulcanLS01 : ProcResGroup<[VulcanP4, VulcanP5]>;
+def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>;
// 60 entry unified scheduler.
-def VulcanAny : ProcResGroup<[VulcanP0, VulcanP1, VulcanP2,
- VulcanP3, VulcanP4, VulcanP5]> {
+def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2,
+ THX2T99P3, THX2T99P4, THX2T99P5]> {
let BufferSize=60;
}
// Define commonly used write types for InstRW specializations.
-// All definitions follow the format: VulcanWrite_<NumCycles>Cyc_<Resources>.
+// All definitions follow the format: THX2T99Write_<NumCycles>Cyc_<Resources>.
// 3 cycles on I1.
-def VulcanWrite_3Cyc_I1 : SchedWriteRes<[VulcanI1]> { let Latency = 3; }
+def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 3; }
// 4 cycles on I1.
-def VulcanWrite_4Cyc_I1 : SchedWriteRes<[VulcanI1]> { let Latency = 4; }
+def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 4; }
// 1 cycle on I0, I1, or I2.
-def VulcanWrite_1Cyc_I012 : SchedWriteRes<[VulcanI012]> { let Latency = 1; }
+def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { let Latency = 1; }
// 5 cycles on F1.
-def VulcanWrite_5Cyc_F1 : SchedWriteRes<[VulcanF1]> { let Latency = 5; }
+def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 5; }
// 7 cycles on F1.
-def VulcanWrite_7Cyc_F1 : SchedWriteRes<[VulcanF1]> { let Latency = 7; }
+def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 7; }
// 4 cycles on F0 or F1.
-def VulcanWrite_4Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 4; }
+def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 4; }
// 5 cycles on F0 or F1.
-def VulcanWrite_5Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 5; }
+def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 5; }
// 6 cycles on F0 or F1.
-def VulcanWrite_6Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 6; }
+def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 6; }
// 7 cycles on F0 or F1.
-def VulcanWrite_7Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 7; }
+def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 7; }
// 8 cycles on F0 or F1.
-def VulcanWrite_8Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 8; }
+def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 8; }
// 16 cycles on F0 or F1.
-def VulcanWrite_16Cyc_F01 : SchedWriteRes<[VulcanF01]> {
+def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
let Latency = 16;
let ResourceCycles = [8];
}
// 23 cycles on F0 or F1.
-def VulcanWrite_23Cyc_F01 : SchedWriteRes<[VulcanF01]> {
+def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
let Latency = 23;
let ResourceCycles = [11];
}
// 1 cycles on LS0 or LS1.
-def VulcanWrite_1Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 1; }
+def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 1; }
// 4 cycles on LS0 or LS1.
-def VulcanWrite_4Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 4; }
+def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 4; }
// 5 cycles on LS0 or LS1.
-def VulcanWrite_5Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 5; }
+def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 5; }
// 6 cycles on LS0 or LS1.
-def VulcanWrite_6Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 6; }
+def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 6; }
// 5 cycles on LS0 or LS1 and I0, I1, or I2.
-def VulcanWrite_5Cyc_LS01_I012 : SchedWriteRes<[VulcanLS01, VulcanI012]> {
+def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
let Latency = 5;
let NumMicroOps = 2;
}
// 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
-def VulcanWrite_6Cyc_LS01_I012_I012 :
- SchedWriteRes<[VulcanLS01, VulcanI012, VulcanI012]> {
+def THX2T99Write_6Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
let Latency = 6;
let NumMicroOps = 3;
}
// 1 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_1Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 1;
let NumMicroOps = 2;
}
// 5 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_5Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 5;
let NumMicroOps = 2;
}
// 6 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_6Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 6;
let NumMicroOps = 2;
}
// 7 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_7Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 7;
let NumMicroOps = 2;
}
// 8 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_8Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 8;
let NumMicroOps = 2;
}
@@ -202,7 +202,7 @@ def : ReadAdvance<ReadVLD, 0>;
//===----------------------------------------------------------------------===//
// 3. Instruction Tables.
-let SchedModel = VulcanModel in {
+let SchedModel = ThunderX2T99Model in {
//---
// 3.1 Branch Instructions
@@ -211,7 +211,7 @@ let SchedModel = VulcanModel in {
// Branch, immed
// Branch and link, immed
// Compare and branch
-def : WriteRes<WriteBr, [VulcanI2]> { let Latency = 1; }
+def : WriteRes<WriteBr, [THX2T99I2]> { let Latency = 1; }
def : WriteRes<WriteSys, []> { let Latency = 1; }
def : WriteRes<WriteBarrier, []> { let Latency = 1; }
@@ -222,7 +222,7 @@ def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
// Branch, register
// Branch and link, register != LR
// Branch and link, register = LR
-def : WriteRes<WriteBrReg, [VulcanI2]> { let Latency = 1; }
+def : WriteRes<WriteBrReg, [THX2T99I2]> { let Latency = 1; }
//---
// 3.2 Arithmetic and Logical Instructions
@@ -233,25 +233,25 @@ def : WriteRes<WriteBrReg, [VulcanI2]> { let Latency = 1; }
// Conditional compare
// Conditional select
// Address generation
-def : WriteRes<WriteI, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteI, [THX2T99I012]> { let Latency = 1; }
def : InstRW<[WriteI], (instrs COPY)>;
// ALU, extend and/or shift
-def : WriteRes<WriteISReg, [VulcanI012]> {
+def : WriteRes<WriteISReg, [THX2T99I012]> {
let Latency = 2;
let ResourceCycles = [2];
}
-def : WriteRes<WriteIEReg, [VulcanI012]> {
+def : WriteRes<WriteIEReg, [THX2T99I012]> {
let Latency = 2;
let ResourceCycles = [2];
}
// Move immed
-def : WriteRes<WriteImm, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteImm, [THX2T99I012]> { let Latency = 1; }
// Variable shift
-def : WriteRes<WriteIS, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteIS, [THX2T99I012]> { let Latency = 1; }
//---
// 3.4 Divide and Multiply Instructions
@@ -259,33 +259,33 @@ def : WriteRes<WriteIS, [VulcanI012]> { let Latency = 1; }
// Divide, W-form
// Latency range of 13-23. Take the average.
-def : WriteRes<WriteID32, [VulcanI1]> {
+def : WriteRes<WriteID32, [THX2T99I1]> {
let Latency = 18;
let ResourceCycles = [18];
}
// Divide, X-form
// Latency range of 13-39. Take the average.
-def : WriteRes<WriteID64, [VulcanI1]> {
+def : WriteRes<WriteID64, [THX2T99I1]> {
let Latency = 26;
let ResourceCycles = [26];
}
// Multiply accumulate, W-form
-def : WriteRes<WriteIM32, [VulcanI012]> { let Latency = 5; }
+def : WriteRes<WriteIM32, [THX2T99I012]> { let Latency = 5; }
// Multiply accumulate, X-form
-def : WriteRes<WriteIM64, [VulcanI012]> { let Latency = 5; }
+def : WriteRes<WriteIM64, [THX2T99I012]> { let Latency = 5; }
// Bitfield extract, two reg
-def : WriteRes<WriteExtr, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteExtr, [THX2T99I012]> { let Latency = 1; }
// Bitfield move, basic
// Bitfield move, insert
// NOTE: Handled by WriteIS.
// Count leading
-def : InstRW<[VulcanWrite_3Cyc_I1], (instregex "^CLS(W|X)r$",
+def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$",
"^CLZ(W|X)r$")>;
// Reverse bits/bytes
@@ -300,13 +300,13 @@ def : InstRW<[VulcanWrite_3Cyc_I1], (instregex "^CLS(W|X)r$",
// Load register, unscaled immed
// Load register, immed unprivileged
// Load register, unsigned immed
-def : WriteRes<WriteLD, [VulcanLS01]> { let Latency = 4; }
+def : WriteRes<WriteLD, [THX2T99LS01]> { let Latency = 4; }
// Load register, immed post-index
// NOTE: Handled by WriteLD, WriteI.
// Load register, immed pre-index
// NOTE: Handled by WriteLD, WriteAdr.
-def : WriteRes<WriteAdr, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteAdr, [THX2T99I012]> { let Latency = 1; }
// Load register offset, basic
// Load register, register offset, scale by 4/8
@@ -314,15 +314,15 @@ def : WriteRes<WriteAdr, [VulcanI012]> { let Latency = 1; }
// Load register offset, extend
// Load register, register offset, extend, scale by 4/8
// Load register, register offset, extend, scale by 2
-def VulcanWriteLDIdx : SchedWriteVariant<[
- SchedVar<ScaledIdxPred, [VulcanWrite_6Cyc_LS01_I012_I012]>,
- SchedVar<NoSchedPred, [VulcanWrite_5Cyc_LS01_I012]>]>;
-def : SchedAlias<WriteLDIdx, VulcanWriteLDIdx>;
+def THX2T99WriteLDIdx : SchedWriteVariant<[
+ SchedVar<ScaledIdxPred, [THX2T99Write_6Cyc_LS01_I012_I012]>,
+ SchedVar<NoSchedPred, [THX2T99Write_5Cyc_LS01_I012]>]>;
+def : SchedAlias<WriteLDIdx, THX2T99WriteLDIdx>;
-def VulcanReadAdrBase : SchedReadVariant<[
+def THX2T99ReadAdrBase : SchedReadVariant<[
SchedVar<ScaledIdxPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>]>;
-def : SchedAlias<ReadAdrBase, VulcanReadAdrBase>;
+def : SchedAlias<ReadAdrBase, THX2T99ReadAdrBase>;
// Load pair, immed offset, normal
// Load pair, immed offset, signed words, base != SP
@@ -347,7 +347,7 @@ def : WriteRes<WriteLDHi, []> {
// Store register, unscaled immed
// Store register, immed unprivileged
// Store register, unsigned immed
-def : WriteRes<WriteST, [VulcanLS01, VulcanSD]> {
+def : WriteRes<WriteST, [THX2T99LS01, THX2T99SD]> {
let Latency = 1;
let NumMicroOps = 2;
}
@@ -364,14 +364,14 @@ def : WriteRes<WriteST, [VulcanLS01, VulcanSD]> {
// Store register, register offset, extend
// Store register, register offset, extend, scale by 4/8
// Store register, register offset, extend, scale by 1
-def : WriteRes<WriteSTIdx, [VulcanLS01, VulcanSD, VulcanI012]> {
+def : WriteRes<WriteSTIdx, [THX2T99LS01, THX2T99SD, THX2T99I012]> {
let Latency = 1;
let NumMicroOps = 3;
}
// Store pair, immed offset, W-form
// Store pair, immed offset, X-form
-def : WriteRes<WriteSTP, [VulcanLS01, VulcanSD]> {
+def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> {
let Latency = 1;
let NumMicroOps = 2;
}
@@ -389,35 +389,35 @@ def : WriteRes<WriteSTP, [VulcanLS01, VulcanSD]> {
// FP absolute value
// FP min/max
// FP negate
-def : WriteRes<WriteF, [VulcanF01]> { let Latency = 5; }
+def : WriteRes<WriteF, [THX2T99F01]> { let Latency = 5; }
// FP arithmetic
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FADD", "^FSUB")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>;
// FP compare
-def : WriteRes<WriteFCmp, [VulcanF01]> { let Latency = 5; }
+def : WriteRes<WriteFCmp, [THX2T99F01]> { let Latency = 5; }
// FP divide, S-form
// FP square root, S-form
-def : WriteRes<WriteFDiv, [VulcanF01]> {
+def : WriteRes<WriteFDiv, [THX2T99F01]> {
let Latency = 16;
let ResourceCycles = [8];
}
// FP divide, D-form
// FP square root, D-form
-def : InstRW<[VulcanWrite_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>;
// FP multiply
// FP multiply accumulate
-def : WriteRes<WriteFMul, [VulcanF01]> { let Latency = 6; }
+def : WriteRes<WriteFMul, [THX2T99F01]> { let Latency = 6; }
// FP round to integral
-def : InstRW<[VulcanWrite_7Cyc_F01],
+def : InstRW<[THX2T99Write_7Cyc_F01],
(instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
// FP select
-def : InstRW<[VulcanWrite_4Cyc_F01], (instregex "^FCSEL")>;
+def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>;
//---
// 3.9 FP Miscellaneous Instructions
@@ -426,16 +426,16 @@ def : InstRW<[VulcanWrite_4Cyc_F01], (instregex "^FCSEL")>;
// FP convert, from vec to vec reg
// FP convert, from gen to vec reg
// FP convert, from vec to gen reg
-def : WriteRes<WriteFCvt, [VulcanF01]> { let Latency = 7; }
+def : WriteRes<WriteFCvt, [THX2T99F01]> { let Latency = 7; }
// FP move, immed
// FP move, register
-def : WriteRes<WriteFImm, [VulcanF01]> { let Latency = 4; }
+def : WriteRes<WriteFImm, [THX2T99F01]> { let Latency = 4; }
// FP transfer, from gen to vec reg
// FP transfer, from vec to gen reg
-def : WriteRes<WriteFCopy, [VulcanF01]> { let Latency = 4; }
-def : InstRW<[VulcanWrite_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
+def : WriteRes<WriteFCopy, [THX2T99F01]> { let Latency = 4; }
+def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
//---
// 3.12 ASIMD Integer Instructions
@@ -470,39 +470,39 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [VulcanF01]> { let Latency = 7; }
+def : WriteRes<WriteV, [THX2T99F01]> { let Latency = 7; }
// ASIMD arith, reduce, 4H/4S
// ASIMD arith, reduce, 8B/8H
// ASIMD arith, reduce, 16B
-def : InstRW<[VulcanWrite_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
// ASIMD logical (MOV, MVN, ORN, ORR)
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>;
// ASIMD polynomial (8x8) multiply long
-def : InstRW<[VulcanWrite_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>;
//---
// 3.13 ASIMD Floating-point Instructions
//---
// ASIMD FP absolute value
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FABSv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>;
// ASIMD FP arith, normal, D-form
// ASIMD FP arith, normal, Q-form
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>;
// ASIMD FP arith,pairwise, D-form
// ASIMD FP arith, pairwise, Q-form
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FADDPv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>;
// ASIMD FP compare, D-form
// ASIMD FP compare, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>;
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
"^FCMGTv", "^FCMLEv",
"^FCMLTv")>;
@@ -513,42 +513,42 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
// NOTE: Handled by WriteV.
// ASIMD FP divide, D-form, F32
-def : InstRW<[VulcanWrite_16Cyc_F01], (instrs FDIVv2f32)>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>;
// ASIMD FP divide, Q-form, F32
-def : InstRW<[VulcanWrite_16Cyc_F01], (instrs FDIVv4f32)>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>;
// ASIMD FP divide, Q-form, F64
-def : InstRW<[VulcanWrite_23Cyc_F01], (instrs FDIVv2f64)>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>;
// ASIMD FP max/min, normal, D-form
// ASIMD FP max/min, normal, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv",
"^FMINv", "^FMINNMv")>;
// ASIMD FP max/min, pairwise, D-form
// ASIMD FP max/min, pairwise, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv",
"^FMINPv", "^FMINNMPv")>;
// ASIMD FP max/min, reduce
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv",
"^FMINVv", "^FMINNMVv")>;
// ASIMD FP multiply, D-form, FZ
// ASIMD FP multiply, D-form, no FZ
// ASIMD FP multiply, Q-form, FZ
// ASIMD FP multiply, Q-form, no FZ
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>;
// ASIMD FP multiply accumulate, Dform, FZ
// ASIMD FP multiply accumulate, Dform, no FZ
// ASIMD FP multiply accumulate, Qform, FZ
// ASIMD FP multiply accumulate, Qform, no FZ
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>;
// ASIMD FP negate
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FNEGv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>;
// ASIMD FP round, D-form
// ASIMD FP round, Q-form
@@ -559,39 +559,39 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FNEGv")>;
//--
// ASIMD bit reverse
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^RBITv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>;
// ASIMD bitwise insert, D-form
// ASIMD bitwise insert, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>;
// ASIMD count, D-form
// ASIMD count, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>;
// ASIMD duplicate, gen reg
// ASIMD duplicate, element
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^DUPv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>;
// ASIMD extract
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^EXTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>;
// ASIMD extract narrow
// ASIMD extract narrow, saturating
// NOTE: Handled by WriteV.
// ASIMD insert, element to element
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^INSv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>;
// ASIMD move, integer immed
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>;
// ASIMD move, FP immed
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMOVv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>;
// ASIMD reciprocal estimate, D-form
// ASIMD reciprocal estimate, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
"^FRSQRTEv", "^URSQRTEv")>;
@@ -599,31 +599,31 @@ def : InstRW<[VulcanWrite_5Cyc_F01],
// ASIMD reciprocal step, D-form, no FZ
// ASIMD reciprocal step, Q-form, FZ
// ASIMD reciprocal step, Q-form, no FZ
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>;
// ASIMD reverse
-def : InstRW<[VulcanWrite_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^REV16v", "^REV32v", "^REV64v")>;
// ASIMD table lookup, D-form
// ASIMD table lookup, Q-form
-def : InstRW<[VulcanWrite_8Cyc_F01], (instregex "^TBLv", "^TBXv")>;
+def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>;
// ASIMD transfer, element to word or word
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^UMOVv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^UMOVv")>;
// ASIMD transfer, element to gen reg
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>;
// ASIMD transfer gen reg to element
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^INSv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>;
// ASIMD transpose
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^TRN1v", "^TRN2v",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v",
"^UZP1v", "^UZP2v")>;
// ASIMD unzip/zip
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>;
//--
// 3.15 ASIMD Load Instructions
@@ -631,114 +631,114 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>;
// ASIMD load, 1 element, multiple, 1 reg, D-form
// ASIMD load, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[VulcanWrite_4Cyc_LS01],
+def : InstRW<[THX2T99Write_4Cyc_LS01],
(instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_4Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
(instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 2 reg, D-form
// ASIMD load, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[VulcanWrite_4Cyc_LS01],
+def : InstRW<[THX2T99Write_4Cyc_LS01],
(instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_4Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
(instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 3 reg, D-form
// ASIMD load, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[VulcanWrite_5Cyc_LS01],
+def : InstRW<[THX2T99Write_5Cyc_LS01],
(instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr],
(instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg, D-form
// ASIMD load, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[VulcanWrite_6Cyc_LS01],
+def : InstRW<[THX2T99Write_6Cyc_LS01],
(instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_6Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr],
(instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, one lane, B/H/S
// ASIMD load, 1 element, one lane, D
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD1i(8|16|32|64)_POST$")>;
// ASIMD load, 1 element, all lanes, D-form, B/H/S
// ASIMD load, 1 element, all lanes, D-form, D
// ASIMD load, 1 element, all lanes, Q-form
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, multiple, D-form, B/H/S
// ASIMD load, 2 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, one lane, B/H
// ASIMD load, 2 element, one lane, S
// ASIMD load, 2 element, one lane, D
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2i(8|16|32|64)_POST$")>;
// ASIMD load, 2 element, all lanes, D-form, B/H/S
// ASIMD load, 2 element, all lanes, D-form, D
// ASIMD load, 2 element, all lanes, Q-form
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, multiple, D-form, B/H/S
// ASIMD load, 3 element, multiple, Q-form, B/H/S
// ASIMD load, 3 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_8Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
(instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_8Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
(instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, one lone, B/H
// ASIMD load, 3 element, one lane, S
// ASIMD load, 3 element, one lane, D
-def : InstRW<[VulcanWrite_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_7Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
(instregex "^LD3i(8|16|32|64)_POST$")>;
// ASIMD load, 3 element, all lanes, D-form, B/H/S
// ASIMD load, 3 element, all lanes, D-form, D
// ASIMD load, 3 element, all lanes, Q-form, B/H/S
// ASIMD load, 3 element, all lanes, Q-form, D
-def : InstRW<[VulcanWrite_7Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01],
(instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_7Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
(instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, multiple, D-form, B/H/S
// ASIMD load, 4 element, multiple, Q-form, B/H/S
// ASIMD load, 4 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_8Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
(instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_8Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
(instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, one lane, B/H
// ASIMD load, 4 element, one lane, S
// ASIMD load, 4 element, one lane, D
-def : InstRW<[VulcanWrite_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
(instregex "^LD4i(8|16|32|64)_POST$")>;
// ASIMD load, 4 element, all lanes, D-form, B/H/S
// ASIMD load, 4 element, all lanes, D-form, D
// ASIMD load, 4 element, all lanes, Q-form, B/H/S
// ASIMD load, 4 element, all lanes, Q-form, D
-def : InstRW<[VulcanWrite_6Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01],
(instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
(instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
//--
@@ -747,82 +747,82 @@ def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr],
// ASIMD store, 1 element, multiple, 1 reg, D-form
// ASIMD store, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[VulcanWrite_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 2 reg, D-form
// ASIMD store, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[VulcanWrite_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 3 reg, D-form
// ASIMD store, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[VulcanWrite_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 4 reg, D-form
// ASIMD store, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[VulcanWrite_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, one lane, B/H/S
// ASIMD store, 1 element, one lane, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST1i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST1i(8|16|32|64)_POST$")>;
// ASIMD store, 2 element, multiple, D-form, B/H/S
// ASIMD store, 2 element, multiple, Q-form, B/H/S
// ASIMD store, 2 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 2 element, one lane, B/H/S
// ASIMD store, 2 element, one lane, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST2i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST2i(8|16|32|64)_POST$")>;
// ASIMD store, 3 element, multiple, D-form, B/H/S
// ASIMD store, 3 element, multiple, Q-form, B/H/S
// ASIMD store, 3 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 3 element, one lane, B/H
// ASIMD store, 3 element, one lane, S
// ASIMD store, 3 element, one lane, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST3i(8|16|32|64)_POST$")>;
// ASIMD store, 4 element, multiple, D-form, B/H/S
// ASIMD store, 4 element, multiple, Q-form, B/H/S
// ASIMD store, 4 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 4 element, one lane, B/H
// ASIMD store, 4 element, one lane, S
// ASIMD store, 4 element, one lane, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST4i(8|16|32|64)_POST$")>;
//--
@@ -830,23 +830,23 @@ def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
//--
// Crypto AES ops
-def : InstRW<[VulcanWrite_5Cyc_F1], (instregex "^AES")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES")>;
// Crypto polynomial (64x64) multiply long
-def : InstRW<[VulcanWrite_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>;
// Crypto SHA1 xor ops
// Crypto SHA1 schedule acceleration ops
// Crypto SHA256 schedule acceleration op (1 u-op)
// Crypto SHA256 schedule acceleration op (2 u-ops)
// Crypto SHA256 hash acceleration ops
-def : InstRW<[VulcanWrite_7Cyc_F1], (instregex "^SHA")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA")>;
//--
// 3.18 CRC
//--
// CRC checksum ops
-def : InstRW<[VulcanWrite_4Cyc_I1], (instregex "^CRC32")>;
+def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32")>;
-} // SchedModel = VulcanModel
+} // SchedModel = ThunderX2T99Model
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 66a8f332513a..7f5507371fa0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -42,10 +42,12 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
Entry.Node = Size;
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args))
- .setDiscardResult();
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(bzeroEntry, IntPtr),
+ std::move(Args))
+ .setDiscardResult();
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
}
@@ -53,7 +55,5 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
}
bool AArch64SelectionDAGInfo::generateFMAsInMachineCombiner(
CodeGenOpt::Level OptLevel) const {
- if (OptLevel >= CodeGenOpt::Aggressive)
- return true;
- return false;
+ return OptLevel >= CodeGenOpt::Aggressive;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 03e01329e036..b3aba4781db8 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -81,8 +81,22 @@ void AArch64Subtarget::initializeProperties() {
MinPrefetchStride = 1024;
MaxPrefetchIterationsAhead = 11;
break;
- case Vulcan:
+ case ThunderX2T99:
+ CacheLineSize = 64;
+ PrefFunctionAlignment = 3;
+ PrefLoopAlignment = 2;
MaxInterleaveFactor = 4;
+ PrefetchDistance = 128;
+ MinPrefetchStride = 1024;
+ MaxPrefetchIterationsAhead = 4;
+ break;
+ case ThunderX:
+ case ThunderXT88:
+ case ThunderXT81:
+ case ThunderXT83:
+ CacheLineSize = 128;
+ PrefFunctionAlignment = 3;
+ PrefLoopAlignment = 2;
break;
case CortexA35: break;
case CortexA53: break;
@@ -133,9 +147,9 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
return AArch64II::MO_GOT;
- // The small code mode's direct accesses use ADRP, which cannot necessarily
- // produce the value 0 (if the code is above 4GB).
- if (TM.getCodeModel() == CodeModel::Small && GV->hasExternalWeakLinkage())
+ // The small code model's direct accesses use ADRP, which cannot
+ // necessarily produce the value 0 (if the code is above 4GB).
+ if (useSmallAddressing() && GV->hasExternalWeakLinkage())
return AArch64II::MO_GOT;
return AArch64II::MO_NO_FLAG;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
index a99340225082..40ad9185012c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -45,7 +45,11 @@ public:
ExynosM1,
Falkor,
Kryo,
- Vulcan
+ ThunderX2T99,
+ ThunderX,
+ ThunderXT81,
+ ThunderXT83,
+ ThunderXT88
};
protected:
@@ -61,9 +65,11 @@ protected:
bool HasCRC = false;
bool HasLSE = false;
bool HasRAS = false;
+ bool HasRDM = false;
bool HasPerfMon = false;
bool HasFullFP16 = false;
bool HasSPE = false;
+ bool HasLSLFast = false;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove = false;
@@ -73,6 +79,10 @@ protected:
// StrictAlign - Disallow unaligned memory accesses.
bool StrictAlign = false;
+
+ // NegativeImmediates - transform instructions with negative immediates
+ bool NegativeImmediates = true;
+
bool UseAA = false;
bool PredictableSelectIsExpensive = false;
bool BalanceFPOps = false;
@@ -83,6 +93,8 @@ protected:
bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
+ bool HasFuseAES = false;
+ bool HasFuseLiterals = false;
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
uint8_t MaxInterleaveFactor = 2;
@@ -183,6 +195,7 @@ public:
bool hasCRC() const { return HasCRC; }
bool hasLSE() const { return HasLSE; }
bool hasRAS() const { return HasRAS; }
+ bool hasRDM() const { return HasRDM; }
bool balanceFPOps() const { return BalanceFPOps; }
bool predictableSelectIsExpensive() const {
return PredictableSelectIsExpensive;
@@ -195,6 +208,8 @@ public:
}
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
+ bool hasFuseAES() const { return HasFuseAES; }
+ bool hasFuseLiterals() const { return HasFuseLiterals; }
bool useRSqrt() const { return UseRSqrt; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
@@ -218,6 +233,7 @@ public:
bool hasPerfMon() const { return HasPerfMon; }
bool hasFullFP16() const { return HasFullFP16; }
bool hasSPE() const { return HasSPE; }
+ bool hasLSLFast() const { return HasLSLFast; }
bool isLittleEndian() const { return IsLittle; }
@@ -226,6 +242,7 @@ public:
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
+ bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
@@ -233,9 +250,17 @@ public:
bool useAA() const override { return UseAA; }
- /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
- /// that still makes it profitable to inline the call.
- unsigned getMaxInlineSizeThreshold() const { return 64; }
+ bool useSmallAddressing() const {
+ switch (TLInfo.getTargetMachine().getCodeModel()) {
+ case CodeModel::Kernel:
+ // Kernel is currently allowed only for Fuchsia targets,
+ // where it is the same as Small for almost all purposes.
+ case CodeModel::Small:
+ return true;
+ default:
+ return false;
+ }
+ }
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index a3736c0868fb..7c5dcb0853eb 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -18,35 +18,37 @@ include "llvm/TableGen/SearchableTable.td"
// AT (address translate) instruction options.
//===----------------------------------------------------------------------===//
-class AT<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
+class AT<string name, bits<3> op1, bits<4> crn, bits<4> crm,
bits<3> op2> : SearchableTable {
let SearchableFields = ["Name", "Encoding"];
let EnumValueField = "Encoding";
string Name = name;
- bits<16> Encoding;
- let Encoding{15-14} = op0;
+ bits<14> Encoding;
let Encoding{13-11} = op1;
let Encoding{10-7} = crn;
let Encoding{6-3} = crm;
let Encoding{2-0} = op2;
+ code Requires = [{ {} }];
}
-def : AT<"S1E1R", 0b01, 0b000, 0b0111, 0b1000, 0b000>;
-def : AT<"S1E2R", 0b01, 0b100, 0b0111, 0b1000, 0b000>;
-def : AT<"S1E3R", 0b01, 0b110, 0b0111, 0b1000, 0b000>;
-def : AT<"S1E1W", 0b01, 0b000, 0b0111, 0b1000, 0b001>;
-def : AT<"S1E2W", 0b01, 0b100, 0b0111, 0b1000, 0b001>;
-def : AT<"S1E3W", 0b01, 0b110, 0b0111, 0b1000, 0b001>;
-def : AT<"S1E0R", 0b01, 0b000, 0b0111, 0b1000, 0b010>;
-def : AT<"S1E0W", 0b01, 0b000, 0b0111, 0b1000, 0b011>;
-def : AT<"S12E1R", 0b01, 0b100, 0b0111, 0b1000, 0b100>;
-def : AT<"S12E1W", 0b01, 0b100, 0b0111, 0b1000, 0b101>;
-def : AT<"S12E0R", 0b01, 0b100, 0b0111, 0b1000, 0b110>;
-def : AT<"S12E0W", 0b01, 0b100, 0b0111, 0b1000, 0b111>;
-def : AT<"S1E1RP", 0b01, 0b000, 0b0111, 0b1001, 0b000>;
-def : AT<"S1E1WP", 0b01, 0b000, 0b0111, 0b1001, 0b001>;
-
+def : AT<"S1E1R", 0b000, 0b0111, 0b1000, 0b000>;
+def : AT<"S1E2R", 0b100, 0b0111, 0b1000, 0b000>;
+def : AT<"S1E3R", 0b110, 0b0111, 0b1000, 0b000>;
+def : AT<"S1E1W", 0b000, 0b0111, 0b1000, 0b001>;
+def : AT<"S1E2W", 0b100, 0b0111, 0b1000, 0b001>;
+def : AT<"S1E3W", 0b110, 0b0111, 0b1000, 0b001>;
+def : AT<"S1E0R", 0b000, 0b0111, 0b1000, 0b010>;
+def : AT<"S1E0W", 0b000, 0b0111, 0b1000, 0b011>;
+def : AT<"S12E1R", 0b100, 0b0111, 0b1000, 0b100>;
+def : AT<"S12E1W", 0b100, 0b0111, 0b1000, 0b101>;
+def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>;
+def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>;
+
+let Requires = [{ {AArch64::HasV8_2aOps} }] in {
+def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>;
+def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>;
+}
//===----------------------------------------------------------------------===//
// DMB/DSB (data barrier) instruction options.
@@ -77,28 +79,31 @@ def : DB<"sy", 0xf>;
// DC (data cache maintenance) instruction options.
//===----------------------------------------------------------------------===//
-class DC<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
+class DC<string name, bits<3> op1, bits<4> crn, bits<4> crm,
bits<3> op2> : SearchableTable {
let SearchableFields = ["Name", "Encoding"];
let EnumValueField = "Encoding";
string Name = name;
- bits<16> Encoding;
- let Encoding{15-14} = op0;
+ bits<14> Encoding;
let Encoding{13-11} = op1;
let Encoding{10-7} = crn;
let Encoding{6-3} = crm;
let Encoding{2-0} = op2;
+ code Requires = [{ {} }];
}
-def : DC<"ZVA", 0b01, 0b011, 0b0111, 0b0100, 0b001>;
-def : DC<"IVAC", 0b01, 0b000, 0b0111, 0b0110, 0b001>;
-def : DC<"ISW", 0b01, 0b000, 0b0111, 0b0110, 0b010>;
-def : DC<"CVAC", 0b01, 0b011, 0b0111, 0b1010, 0b001>;
-def : DC<"CSW", 0b01, 0b000, 0b0111, 0b1010, 0b010>;
-def : DC<"CVAU", 0b01, 0b011, 0b0111, 0b1011, 0b001>;
-def : DC<"CIVAC", 0b01, 0b011, 0b0111, 0b1110, 0b001>;
-def : DC<"CISW", 0b01, 0b000, 0b0111, 0b1110, 0b010>;
+def : DC<"ZVA", 0b011, 0b0111, 0b0100, 0b001>;
+def : DC<"IVAC", 0b000, 0b0111, 0b0110, 0b001>;
+def : DC<"ISW", 0b000, 0b0111, 0b0110, 0b010>;
+def : DC<"CVAC", 0b011, 0b0111, 0b1010, 0b001>;
+def : DC<"CSW", 0b000, 0b0111, 0b1010, 0b010>;
+def : DC<"CVAU", 0b011, 0b0111, 0b1011, 0b001>;
+def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>;
+def : DC<"CISW", 0b000, 0b0111, 0b1110, 0b010>;
+
+let Requires = [{ {AArch64::HasV8_2aOps} }] in
+def : DC<"CVAP", 0b011, 0b0111, 0b1100, 0b001>;
//===----------------------------------------------------------------------===//
// IC (instruction cache maintenance) instruction options.
@@ -120,7 +125,7 @@ class IC<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2,
def : IC<"IALLUIS", 0b000, 0b0111, 0b0001, 0b000, 0>;
def : IC<"IALLU", 0b000, 0b0111, 0b0101, 0b000, 0>;
-def : IC<"IVAU", 0b000, 0b0111, 0b0001, 0b000, 1>;
+def : IC<"IVAU", 0b011, 0b0111, 0b0101, 0b001, 1>;
//===----------------------------------------------------------------------===//
// ISB (instruction-fetch barrier) instruction options.
@@ -213,14 +218,13 @@ def : PSB<"csync", 0x11>;
// TLBI (translation lookaside buffer invalidate) instruction options.
//===----------------------------------------------------------------------===//
-class TLBI<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
+class TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
bits<3> op2, bit needsreg = 1> : SearchableTable {
let SearchableFields = ["Name", "Encoding"];
let EnumValueField = "Encoding";
string Name = name;
- bits<16> Encoding;
- let Encoding{15-14} = op0;
+ bits<14> Encoding;
let Encoding{13-11} = op1;
let Encoding{10-7} = crn;
let Encoding{6-3} = crm;
@@ -228,38 +232,38 @@ class TLBI<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
bit NeedsReg = needsreg;
}
-def : TLBI<"IPAS2E1IS", 0b01, 0b100, 0b1000, 0b0000, 0b001>;
-def : TLBI<"IPAS2LE1IS", 0b01, 0b100, 0b1000, 0b0000, 0b101>;
-def : TLBI<"VMALLE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"ALLE2IS", 0b01, 0b100, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"ALLE3IS", 0b01, 0b110, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"VAE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b001>;
-def : TLBI<"VAE2IS", 0b01, 0b100, 0b1000, 0b0011, 0b001>;
-def : TLBI<"VAE3IS", 0b01, 0b110, 0b1000, 0b0011, 0b001>;
-def : TLBI<"ASIDE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b010>;
-def : TLBI<"VAAE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b011>;
-def : TLBI<"ALLE1IS", 0b01, 0b100, 0b1000, 0b0011, 0b100, 0>;
-def : TLBI<"VALE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VALE2IS", 0b01, 0b100, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VALE3IS", 0b01, 0b110, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VMALLS12E1IS", 0b01, 0b100, 0b1000, 0b0011, 0b110, 0>;
-def : TLBI<"VAALE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b111>;
-def : TLBI<"IPAS2E1", 0b01, 0b100, 0b1000, 0b0100, 0b001>;
-def : TLBI<"IPAS2LE1", 0b01, 0b100, 0b1000, 0b0100, 0b101>;
-def : TLBI<"VMALLE1", 0b01, 0b000, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"ALLE2", 0b01, 0b100, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"ALLE3", 0b01, 0b110, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"VAE1", 0b01, 0b000, 0b1000, 0b0111, 0b001>;
-def : TLBI<"VAE2", 0b01, 0b100, 0b1000, 0b0111, 0b001>;
-def : TLBI<"VAE3", 0b01, 0b110, 0b1000, 0b0111, 0b001>;
-def : TLBI<"ASIDE1", 0b01, 0b000, 0b1000, 0b0111, 0b010>;
-def : TLBI<"VAAE1", 0b01, 0b000, 0b1000, 0b0111, 0b011>;
-def : TLBI<"ALLE1", 0b01, 0b100, 0b1000, 0b0111, 0b100, 0>;
-def : TLBI<"VALE1", 0b01, 0b000, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VALE2", 0b01, 0b100, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VALE3", 0b01, 0b110, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VMALLS12E1", 0b01, 0b100, 0b1000, 0b0111, 0b110, 0>;
-def : TLBI<"VAALE1", 0b01, 0b000, 0b1000, 0b0111, 0b111>;
+def : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>;
+def : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>;
+def : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>;
+def : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>;
+def : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>;
+def : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>;
+def : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>;
+def : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>;
+def : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>;
+def : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>;
+def : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>;
+def : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>;
+def : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>;
+def : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>;
+def : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
+def : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>;
+def : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>;
+def : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>;
+def : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>;
+def : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>;
+def : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>;
+def : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>;
+def : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>;
+def : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>;
+def : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>;
+def : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>;
+def : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>;
+def : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>;
+def : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>;
+def : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>;
+def : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>;
+def : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>;
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index d2883941e2c4..dcc51bf02329 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -12,9 +12,11 @@
#include "AArch64.h"
#include "AArch64CallLowering.h"
-#include "AArch64InstructionSelector.h"
#include "AArch64LegalizerInfo.h"
+#include "AArch64MacroFusion.h"
+#ifdef LLVM_BUILD_GLOBAL_ISEL
#include "AArch64RegisterBankInfo.h"
+#endif
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "AArch64TargetObjectFile.h"
@@ -115,7 +117,7 @@ EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
static cl::opt<bool>
EnableAddressTypePromotion("aarch64-enable-type-promotion", cl::Hidden,
cl::desc("Enable the type promotion pass"),
- cl::init(true));
+ cl::init(false));
static cl::opt<bool>
EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
@@ -136,6 +138,11 @@ static cl::opt<bool>
cl::desc("Enable the loop data prefetch pass"),
cl::init(true));
+static cl::opt<int> EnableGlobalISelAtO(
+ "aarch64-enable-global-isel-at-O", cl::Hidden,
+ cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
+ cl::init(-1));
+
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
@@ -278,7 +285,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
// FIXME: At this point, we can't rely on Subtarget having RBI.
// It's awkward to mix passing RBI and the Subtarget; should we pass
// TII/TRI as well?
- GISel->InstSelector.reset(new AArch64InstructionSelector(*this, *I, *RBI));
+ GISel->InstSelector.reset(
+ createAArch64InstructionSelector(*this, *I, *RBI));
GISel->RegBankInfo.reset(RBI);
#endif
@@ -323,10 +331,24 @@ public:
ScheduleDAGMILive *DAG = createGenericSchedLive(C);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
- DAG->addMutation(createMacroFusionDAGMutation(DAG->TII));
+ DAG->addMutation(createAArch64MacroFusionDAGMutation());
return DAG;
}
+ ScheduleDAGInstrs *
+ createPostMachineScheduler(MachineSchedContext *C) const override {
+ const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
+ if (ST.hasFuseLiterals()) {
+ // Run the Macro Fusion after RA again since literals are expanded from
+ // pseudos then (v. addPreSched2()).
+ ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+ DAG->addMutation(createAArch64MacroFusionDAGMutation());
+ return DAG;
+ }
+
+ return nullptr;
+ }
+
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
@@ -341,6 +363,8 @@ public:
void addPostRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
+
+ bool isGlobalISelEnabled() const override;
};
} // end anonymous namespace
@@ -450,6 +474,10 @@ bool AArch64PassConfig::addGlobalInstructionSelect() {
}
#endif
+bool AArch64PassConfig::isGlobalISelEnabled() const {
+ return TM->getOptLevel() <= EnableGlobalISelAtO;
+}
+
bool AArch64PassConfig::addILPOpts() {
if (EnableCondOpt)
addPass(createAArch64ConditionOptimizerPass());
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index 6fa5e83957e1..2c75a3258c1c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -21,6 +21,8 @@
namespace llvm {
+class AArch64RegisterBankInfo;
+
class AArch64TargetMachine : public LLVMTargetMachine {
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b8833e5a5552..4d59da0c646d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -176,7 +176,8 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
-int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -436,7 +437,7 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
}
int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) {
+ Type *CondTy, const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// We don't lower some vector selects well that are wider than the register
@@ -463,11 +464,12 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return Entry->Cost;
}
}
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
- unsigned Alignment, unsigned AddressSpace) {
+ unsigned Alignment, unsigned AddressSpace,
+ const Instruction *I) {
auto LT = TLI->getTypeLegalizationCost(DL, Ty);
if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
@@ -505,12 +507,14 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
unsigned NumElts = VecTy->getVectorNumElements();
- Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
- unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
+ auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
// ldN/stN only support legal vector types of size 64 or 128 in bits.
- if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
- return Factor;
+ // Accesses having vector types that are a multiple of 128 bits can be
+ // matched to more than one ldN/stN instruction.
+ if (NumElts % Factor == 0 &&
+ TLI->isLegalInterleavedAccessType(SubVecTy, DL))
+ return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
@@ -594,8 +598,6 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
case Intrinsic::aarch64_neon_ld4:
Info.ReadMem = true;
Info.WriteMem = false;
- Info.IsSimple = true;
- Info.NumMemRefs = 1;
Info.PtrVal = Inst->getArgOperand(0);
break;
case Intrinsic::aarch64_neon_st2:
@@ -603,8 +605,6 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
case Intrinsic::aarch64_neon_st4:
Info.ReadMem = false;
Info.WriteMem = true;
- Info.IsSimple = true;
- Info.NumMemRefs = 1;
Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
break;
}
@@ -628,6 +628,38 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
return true;
}
+/// See if \p I should be considered for address type promotion. We check if \p
+/// I is a sext with right type and used in memory accesses. If it used in a
+/// "complex" getelementptr, we allow it to be promoted without finding other
+/// sext instructions that sign extended the same initial value. A getelementptr
+/// is considered as "complex" if it has more than 2 operands.
+bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
+ const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
+ bool Considerable = false;
+ AllowPromotionWithoutCommonHeader = false;
+ if (!isa<SExtInst>(&I))
+ return false;
+ Type *ConsideredSExtType =
+ Type::getInt64Ty(I.getParent()->getParent()->getContext());
+ if (I.getType() != ConsideredSExtType)
+ return false;
+ // See if the sext is the one with the right type and used in at least one
+ // GetElementPtrInst.
+ for (const User *U : I.users()) {
+ if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
+ Considerable = true;
+ // A getelementptr is considered as "complex" if it has more than 2
+ // operands. We will promote a SExt used in such complex GEP as we
+ // expect some computation to be merged if they are done on 64 bits.
+ if (GEPInst->getNumOperands() > 2) {
+ AllowPromotionWithoutCommonHeader = true;
+ break;
+ }
+ }
+ }
+ return Considerable;
+}
+
unsigned AArch64TTIImpl::getCacheLineSize() {
return ST->getCacheLineSize();
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 18287ed6653f..e37c003e064c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -34,10 +34,6 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
const AArch64Subtarget *ST;
const AArch64TargetLowering *TLI;
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
const AArch64Subtarget *getST() const { return ST; }
const AArch64TargetLowering *getTLI() const { return TLI; }
@@ -90,7 +86,8 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF);
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
unsigned Index);
@@ -107,10 +104,11 @@ public:
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, const Instruction *I = nullptr);
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
@@ -125,6 +123,10 @@ public:
ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace);
+ bool
+ shouldConsiderAddressTypePromotion(const Instruction &I,
+ bool &AllowPromotionWithoutCommonHeader);
+
unsigned getCacheLineSize();
unsigned getPrefetchDistance();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp b/contrib/llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
index e3b1d7cea48d..f53af2315ec9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
@@ -19,13 +19,27 @@
// is rewritten into
// dup v3.4s, v2.s[1]
// fmla v0.4s, v1.4s, v3.4s
+//
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <map>
using namespace llvm;
@@ -41,14 +55,15 @@ namespace {
struct AArch64VectorByElementOpt : public MachineFunctionPass {
static char ID;
- AArch64VectorByElementOpt() : MachineFunctionPass(ID) {
- initializeAArch64VectorByElementOptPass(*PassRegistry::getPassRegistry());
- }
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
TargetSchedModel SchedModel;
+ AArch64VectorByElementOpt() : MachineFunctionPass(ID) {
+ initializeAArch64VectorByElementOptPass(*PassRegistry::getPassRegistry());
+ }
+
/// Based only on latency of instructions, determine if it is cost efficient
/// to replace the instruction InstDesc by the two instructions InstDescRep1
/// and InstDescRep2.
@@ -90,8 +105,10 @@ struct AArch64VectorByElementOpt : public MachineFunctionPass {
return AARCH64_VECTOR_BY_ELEMENT_OPT_NAME;
}
};
+
char AArch64VectorByElementOpt::ID = 0;
-} // namespace
+
+} // end anonymous namespace
INITIALIZE_PASS(AArch64VectorByElementOpt, "aarch64-vectorbyelement-opt",
AARCH64_VECTOR_BY_ELEMENT_OPT_NAME, false, false)
diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index b86a283b40d4..cbab68979c56 100644
--- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -74,6 +74,7 @@ private:
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
+ void createSysAlias(uint16_t Encoding, OperandVector &Operands, SMLoc S);
AArch64CC::CondCode parseCondCodeString(StringRef Cond);
bool parseCondCode(OperandVector &Operands, bool invertCondCode);
unsigned matchRegisterNameAlias(StringRef Name, bool isVector);
@@ -537,154 +538,15 @@ public:
return (Val % Scale) == 0 && Val >= 0 && (Val / Scale) < 0x1000;
}
- bool isImm0_1() const {
+ template <int N, int M>
+ bool isImmInRange() const {
if (!isImm())
return false;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
if (!MCE)
return false;
int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 2);
- }
-
- bool isImm0_7() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 8);
- }
-
- bool isImm1_8() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val > 0 && Val < 9);
- }
-
- bool isImm0_15() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 16);
- }
-
- bool isImm1_16() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val > 0 && Val < 17);
- }
-
- bool isImm0_31() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 32);
- }
-
- bool isImm1_31() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 32);
- }
-
- bool isImm1_32() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 33);
- }
-
- bool isImm0_63() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 64);
- }
-
- bool isImm1_63() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 64);
- }
-
- bool isImm1_64() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 65);
- }
-
- bool isImm0_127() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 128);
- }
-
- bool isImm0_255() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 256);
- }
-
- bool isImm0_65535() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 65536);
- }
-
- bool isImm32_63() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 32 && Val < 64);
+ return (Val >= N && Val <= M);
}
bool isLogicalImm32() const {
@@ -804,31 +666,8 @@ public:
return AArch64_AM::isAdvSIMDModImmType10(MCE->getValue());
}
- bool isBranchTarget26() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return true;
- int64_t Val = MCE->getValue();
- if (Val & 0x3)
- return false;
- return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2));
- }
-
- bool isPCRelLabel19() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return true;
- int64_t Val = MCE->getValue();
- if (Val & 0x3)
- return false;
- return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2));
- }
-
- bool isBranchTarget14() const {
+ template<int N>
+ bool isBranchTarget() const {
if (!isImm())
return false;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
@@ -837,7 +676,8 @@ public:
int64_t Val = MCE->getValue();
if (Val & 0x3)
return false;
- return (Val >= -(0x2000 << 2) && Val <= (0x1fff << 2));
+ assert(N > 0 && "Branch target immediate cannot be 0 bits!");
+ return (Val >= -((1<<(N-1)) << 2) && Val <= (((1<<(N-1))-1) << 2));
}
bool
@@ -2494,6 +2334,35 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
return MatchOperand_Success;
}
+static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
+ if (FBS[AArch64::HasV8_1aOps])
+ Str += "ARMv8.1a";
+ else if (FBS[AArch64::HasV8_2aOps])
+ Str += "ARMv8.2a";
+ else
+ Str += "(unknown)";
+}
+
+void AArch64AsmParser::createSysAlias(uint16_t Encoding, OperandVector &Operands,
+ SMLoc S) {
+ const uint16_t Op2 = Encoding & 7;
+ const uint16_t Cm = (Encoding & 0x78) >> 3;
+ const uint16_t Cn = (Encoding & 0x780) >> 7;
+ const uint16_t Op1 = (Encoding & 0x3800) >> 11;
+
+ const MCExpr *Expr = MCConstantExpr::create(Op1, getContext());
+
+ Operands.push_back(
+ AArch64Operand::CreateImm(Expr, S, getLoc(), getContext()));
+ Operands.push_back(
+ AArch64Operand::CreateSysCR(Cn, S, getLoc(), getContext()));
+ Operands.push_back(
+ AArch64Operand::CreateSysCR(Cm, S, getLoc(), getContext()));
+ Expr = MCConstantExpr::create(Op2, getContext());
+ Operands.push_back(
+ AArch64Operand::CreateImm(Expr, S, getLoc(), getContext()));
+}
+
/// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for
/// the SYS instruction. Parse them specially so that we create a SYS MCInst.
bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
@@ -2510,228 +2379,48 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
StringRef Op = Tok.getString();
SMLoc S = Tok.getLoc();
- const MCExpr *Expr = nullptr;
-
-#define SYS_ALIAS(op1, Cn, Cm, op2) \
- do { \
- Expr = MCConstantExpr::create(op1, getContext()); \
- Operands.push_back( \
- AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
- Operands.push_back( \
- AArch64Operand::CreateSysCR(Cn, S, getLoc(), getContext())); \
- Operands.push_back( \
- AArch64Operand::CreateSysCR(Cm, S, getLoc(), getContext())); \
- Expr = MCConstantExpr::create(op2, getContext()); \
- Operands.push_back( \
- AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
- } while (false)
-
if (Mnemonic == "ic") {
- if (!Op.compare_lower("ialluis")) {
- // SYS #0, C7, C1, #0
- SYS_ALIAS(0, 7, 1, 0);
- } else if (!Op.compare_lower("iallu")) {
- // SYS #0, C7, C5, #0
- SYS_ALIAS(0, 7, 5, 0);
- } else if (!Op.compare_lower("ivau")) {
- // SYS #3, C7, C5, #1
- SYS_ALIAS(3, 7, 5, 1);
- } else {
+ const AArch64IC::IC *IC = AArch64IC::lookupICByName(Op);
+ if (!IC)
return TokError("invalid operand for IC instruction");
+ else if (!IC->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("IC " + std::string(IC->Name) + " requires ");
+ setRequiredFeatureString(IC->getRequiredFeatures(), Str);
+ return TokError(Str.c_str());
}
+ createSysAlias(IC->Encoding, Operands, S);
} else if (Mnemonic == "dc") {
- if (!Op.compare_lower("zva")) {
- // SYS #3, C7, C4, #1
- SYS_ALIAS(3, 7, 4, 1);
- } else if (!Op.compare_lower("ivac")) {
- // SYS #3, C7, C6, #1
- SYS_ALIAS(0, 7, 6, 1);
- } else if (!Op.compare_lower("isw")) {
- // SYS #0, C7, C6, #2
- SYS_ALIAS(0, 7, 6, 2);
- } else if (!Op.compare_lower("cvac")) {
- // SYS #3, C7, C10, #1
- SYS_ALIAS(3, 7, 10, 1);
- } else if (!Op.compare_lower("csw")) {
- // SYS #0, C7, C10, #2
- SYS_ALIAS(0, 7, 10, 2);
- } else if (!Op.compare_lower("cvau")) {
- // SYS #3, C7, C11, #1
- SYS_ALIAS(3, 7, 11, 1);
- } else if (!Op.compare_lower("civac")) {
- // SYS #3, C7, C14, #1
- SYS_ALIAS(3, 7, 14, 1);
- } else if (!Op.compare_lower("cisw")) {
- // SYS #0, C7, C14, #2
- SYS_ALIAS(0, 7, 14, 2);
- } else if (!Op.compare_lower("cvap")) {
- if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
- // SYS #3, C7, C12, #1
- SYS_ALIAS(3, 7, 12, 1);
- } else {
- return TokError("DC CVAP requires ARMv8.2a");
- }
- } else {
+ const AArch64DC::DC *DC = AArch64DC::lookupDCByName(Op);
+ if (!DC)
return TokError("invalid operand for DC instruction");
+ else if (!DC->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("DC " + std::string(DC->Name) + " requires ");
+ setRequiredFeatureString(DC->getRequiredFeatures(), Str);
+ return TokError(Str.c_str());
}
+ createSysAlias(DC->Encoding, Operands, S);
} else if (Mnemonic == "at") {
- if (!Op.compare_lower("s1e1r")) {
- // SYS #0, C7, C8, #0
- SYS_ALIAS(0, 7, 8, 0);
- } else if (!Op.compare_lower("s1e2r")) {
- // SYS #4, C7, C8, #0
- SYS_ALIAS(4, 7, 8, 0);
- } else if (!Op.compare_lower("s1e3r")) {
- // SYS #6, C7, C8, #0
- SYS_ALIAS(6, 7, 8, 0);
- } else if (!Op.compare_lower("s1e1w")) {
- // SYS #0, C7, C8, #1
- SYS_ALIAS(0, 7, 8, 1);
- } else if (!Op.compare_lower("s1e2w")) {
- // SYS #4, C7, C8, #1
- SYS_ALIAS(4, 7, 8, 1);
- } else if (!Op.compare_lower("s1e3w")) {
- // SYS #6, C7, C8, #1
- SYS_ALIAS(6, 7, 8, 1);
- } else if (!Op.compare_lower("s1e0r")) {
- // SYS #0, C7, C8, #3
- SYS_ALIAS(0, 7, 8, 2);
- } else if (!Op.compare_lower("s1e0w")) {
- // SYS #0, C7, C8, #3
- SYS_ALIAS(0, 7, 8, 3);
- } else if (!Op.compare_lower("s12e1r")) {
- // SYS #4, C7, C8, #4
- SYS_ALIAS(4, 7, 8, 4);
- } else if (!Op.compare_lower("s12e1w")) {
- // SYS #4, C7, C8, #5
- SYS_ALIAS(4, 7, 8, 5);
- } else if (!Op.compare_lower("s12e0r")) {
- // SYS #4, C7, C8, #6
- SYS_ALIAS(4, 7, 8, 6);
- } else if (!Op.compare_lower("s12e0w")) {
- // SYS #4, C7, C8, #7
- SYS_ALIAS(4, 7, 8, 7);
- } else if (!Op.compare_lower("s1e1rp")) {
- if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
- // SYS #0, C7, C9, #0
- SYS_ALIAS(0, 7, 9, 0);
- } else {
- return TokError("AT S1E1RP requires ARMv8.2a");
- }
- } else if (!Op.compare_lower("s1e1wp")) {
- if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
- // SYS #0, C7, C9, #1
- SYS_ALIAS(0, 7, 9, 1);
- } else {
- return TokError("AT S1E1WP requires ARMv8.2a");
- }
- } else {
+ const AArch64AT::AT *AT = AArch64AT::lookupATByName(Op);
+ if (!AT)
return TokError("invalid operand for AT instruction");
+ else if (!AT->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("AT " + std::string(AT->Name) + " requires ");
+ setRequiredFeatureString(AT->getRequiredFeatures(), Str);
+ return TokError(Str.c_str());
}
+ createSysAlias(AT->Encoding, Operands, S);
} else if (Mnemonic == "tlbi") {
- if (!Op.compare_lower("vmalle1is")) {
- // SYS #0, C8, C3, #0
- SYS_ALIAS(0, 8, 3, 0);
- } else if (!Op.compare_lower("alle2is")) {
- // SYS #4, C8, C3, #0
- SYS_ALIAS(4, 8, 3, 0);
- } else if (!Op.compare_lower("alle3is")) {
- // SYS #6, C8, C3, #0
- SYS_ALIAS(6, 8, 3, 0);
- } else if (!Op.compare_lower("vae1is")) {
- // SYS #0, C8, C3, #1
- SYS_ALIAS(0, 8, 3, 1);
- } else if (!Op.compare_lower("vae2is")) {
- // SYS #4, C8, C3, #1
- SYS_ALIAS(4, 8, 3, 1);
- } else if (!Op.compare_lower("vae3is")) {
- // SYS #6, C8, C3, #1
- SYS_ALIAS(6, 8, 3, 1);
- } else if (!Op.compare_lower("aside1is")) {
- // SYS #0, C8, C3, #2
- SYS_ALIAS(0, 8, 3, 2);
- } else if (!Op.compare_lower("vaae1is")) {
- // SYS #0, C8, C3, #3
- SYS_ALIAS(0, 8, 3, 3);
- } else if (!Op.compare_lower("alle1is")) {
- // SYS #4, C8, C3, #4
- SYS_ALIAS(4, 8, 3, 4);
- } else if (!Op.compare_lower("vale1is")) {
- // SYS #0, C8, C3, #5
- SYS_ALIAS(0, 8, 3, 5);
- } else if (!Op.compare_lower("vaale1is")) {
- // SYS #0, C8, C3, #7
- SYS_ALIAS(0, 8, 3, 7);
- } else if (!Op.compare_lower("vmalle1")) {
- // SYS #0, C8, C7, #0
- SYS_ALIAS(0, 8, 7, 0);
- } else if (!Op.compare_lower("alle2")) {
- // SYS #4, C8, C7, #0
- SYS_ALIAS(4, 8, 7, 0);
- } else if (!Op.compare_lower("vale2is")) {
- // SYS #4, C8, C3, #5
- SYS_ALIAS(4, 8, 3, 5);
- } else if (!Op.compare_lower("vale3is")) {
- // SYS #6, C8, C3, #5
- SYS_ALIAS(6, 8, 3, 5);
- } else if (!Op.compare_lower("alle3")) {
- // SYS #6, C8, C7, #0
- SYS_ALIAS(6, 8, 7, 0);
- } else if (!Op.compare_lower("vae1")) {
- // SYS #0, C8, C7, #1
- SYS_ALIAS(0, 8, 7, 1);
- } else if (!Op.compare_lower("vae2")) {
- // SYS #4, C8, C7, #1
- SYS_ALIAS(4, 8, 7, 1);
- } else if (!Op.compare_lower("vae3")) {
- // SYS #6, C8, C7, #1
- SYS_ALIAS(6, 8, 7, 1);
- } else if (!Op.compare_lower("aside1")) {
- // SYS #0, C8, C7, #2
- SYS_ALIAS(0, 8, 7, 2);
- } else if (!Op.compare_lower("vaae1")) {
- // SYS #0, C8, C7, #3
- SYS_ALIAS(0, 8, 7, 3);
- } else if (!Op.compare_lower("alle1")) {
- // SYS #4, C8, C7, #4
- SYS_ALIAS(4, 8, 7, 4);
- } else if (!Op.compare_lower("vale1")) {
- // SYS #0, C8, C7, #5
- SYS_ALIAS(0, 8, 7, 5);
- } else if (!Op.compare_lower("vale2")) {
- // SYS #4, C8, C7, #5
- SYS_ALIAS(4, 8, 7, 5);
- } else if (!Op.compare_lower("vale3")) {
- // SYS #6, C8, C7, #5
- SYS_ALIAS(6, 8, 7, 5);
- } else if (!Op.compare_lower("vaale1")) {
- // SYS #0, C8, C7, #7
- SYS_ALIAS(0, 8, 7, 7);
- } else if (!Op.compare_lower("ipas2e1")) {
- // SYS #4, C8, C4, #1
- SYS_ALIAS(4, 8, 4, 1);
- } else if (!Op.compare_lower("ipas2le1")) {
- // SYS #4, C8, C4, #5
- SYS_ALIAS(4, 8, 4, 5);
- } else if (!Op.compare_lower("ipas2e1is")) {
- // SYS #4, C8, C4, #1
- SYS_ALIAS(4, 8, 0, 1);
- } else if (!Op.compare_lower("ipas2le1is")) {
- // SYS #4, C8, C4, #5
- SYS_ALIAS(4, 8, 0, 5);
- } else if (!Op.compare_lower("vmalls12e1")) {
- // SYS #4, C8, C7, #6
- SYS_ALIAS(4, 8, 7, 6);
- } else if (!Op.compare_lower("vmalls12e1is")) {
- // SYS #4, C8, C3, #6
- SYS_ALIAS(4, 8, 3, 6);
- } else {
+ const AArch64TLBI::TLBI *TLBI = AArch64TLBI::lookupTLBIByName(Op);
+ if (!TLBI)
return TokError("invalid operand for TLBI instruction");
+ else if (!TLBI->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("TLBI " + std::string(TLBI->Name) + " requires ");
+ setRequiredFeatureString(TLBI->getRequiredFeatures(), Str);
+ return TokError(Str.c_str());
}
+ createSysAlias(TLBI->Encoding, Operands, S);
}
-#undef SYS_ALIAS
-
Parser.Lex(); // Eat operand.
bool ExpectRegister = (Op.lower().find("all") == StringRef::npos);
@@ -2744,12 +2433,10 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
HasRegister = true;
}
- if (ExpectRegister && !HasRegister) {
+ if (ExpectRegister && !HasRegister)
return TokError("specified " + Mnemonic + " op requires a register");
- }
- else if (!ExpectRegister && HasRegister) {
+ else if (!ExpectRegister && HasRegister)
return TokError("specified " + Mnemonic + " op does not use a register");
- }
if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
return true;
@@ -2884,7 +2571,6 @@ bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
/// parseRegister - Parse a non-vector register operand.
bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
// Try for a vector register.
if (!tryParseVectorRegister(Operands))
@@ -2897,30 +2583,6 @@ bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
Operands.push_back(
AArch64Operand::CreateReg(Reg, false, S, getLoc(), getContext()));
- // A small number of instructions (FMOVXDhighr, for example) have "[1]"
- // as a string token in the instruction itself.
- SMLoc LBracS = getLoc();
- const AsmToken &Tok = Parser.getTok();
- if (parseOptionalToken(AsmToken::LBrac)) {
- if (Tok.is(AsmToken::Integer)) {
- SMLoc IntS = getLoc();
- int64_t Val = Tok.getIntVal();
- if (Val == 1) {
- Parser.Lex();
- SMLoc RBracS = getLoc();
- if (parseOptionalToken(AsmToken::RBrac)) {
- Operands.push_back(
- AArch64Operand::CreateToken("[", false, LBracS, getContext()));
- Operands.push_back(
- AArch64Operand::CreateToken("1", false, IntS, getContext()));
- Operands.push_back(
- AArch64Operand::CreateToken("]", false, RBracS, getContext()));
- return false;
- }
- }
- }
- }
-
return false;
}
@@ -3696,6 +3358,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
return Error(Loc, "immediate must be an integer in range [0, 63].");
case Match_InvalidImm0_127:
return Error(Loc, "immediate must be an integer in range [0, 127].");
+ case Match_InvalidImm0_255:
+ return Error(Loc, "immediate must be an integer in range [0, 255].");
case Match_InvalidImm0_65535:
return Error(Loc, "immediate must be an integer in range [0, 65535].");
case Match_InvalidImm1_8:
@@ -4120,6 +3784,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidImm0_31:
case Match_InvalidImm0_63:
case Match_InvalidImm0_127:
+ case Match_InvalidImm0_255:
case Match_InvalidImm0_65535:
case Match_InvalidImm1_8:
case Match_InvalidImm1_16:
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index b4f85204714f..41ae70f85e58 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -16,12 +16,20 @@
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <string>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -451,8 +459,8 @@ static const LdStNInstrDesc LdStNInstInfo[] = {
{ AArch64::LD3i64, "ld3", ".d", 1, true, 0 },
{ AArch64::LD3i8_POST, "ld3", ".b", 2, true, 3 },
{ AArch64::LD3i16_POST, "ld3", ".h", 2, true, 6 },
- { AArch64::LD3i32_POST, "ld3", ".s", 2, true, 12 },
- { AArch64::LD3i64_POST, "ld3", ".d", 2, true, 24 },
+ { AArch64::LD3i32_POST, "ld3", ".s", 2, true, 12 },
+ { AArch64::LD3i64_POST, "ld3", ".d", 2, true, 24 },
{ AArch64::LD3Rv16b, "ld3r", ".16b", 0, false, 0 },
{ AArch64::LD3Rv8h, "ld3r", ".8h", 0, false, 0 },
{ AArch64::LD3Rv4s, "ld3r", ".4s", 0, false, 0 },
@@ -731,7 +739,6 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
assert(Opcode == AArch64::SYSxt && "Invalid opcode for SYS alias!");
#endif
- const char *Asm = nullptr;
const MCOperand &Op1 = MI->getOperand(0);
const MCOperand &Cn = MI->getOperand(1);
const MCOperand &Cm = MI->getOperand(2);
@@ -742,230 +749,74 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
unsigned CmVal = Cm.getImm();
unsigned Op2Val = Op2.getImm();
+ uint16_t Encoding = Op2Val;
+ Encoding |= CmVal << 3;
+ Encoding |= CnVal << 7;
+ Encoding |= Op1Val << 11;
+
+ bool NeedsReg;
+ std::string Ins;
+ std::string Name;
+
if (CnVal == 7) {
switch (CmVal) {
- default:
- break;
-
+ default: return false;
// IC aliases
- case 1:
- if (Op1Val == 0 && Op2Val == 0)
- Asm = "ic\tialluis";
- break;
- case 5:
- if (Op1Val == 0 && Op2Val == 0)
- Asm = "ic\tiallu";
- else if (Op1Val == 3 && Op2Val == 1)
- Asm = "ic\tivau";
- break;
-
+ case 1: case 5: {
+ const AArch64IC::IC *IC = AArch64IC::lookupICByEncoding(Encoding);
+ if (!IC || !IC->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = IC->NeedsReg;
+ Ins = "ic\t";
+ Name = std::string(IC->Name);
+ }
+ break;
// DC aliases
- case 4:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tzva";
- break;
- case 6:
- if (Op1Val == 0 && Op2Val == 1)
- Asm = "dc\tivac";
- if (Op1Val == 0 && Op2Val == 2)
- Asm = "dc\tisw";
- break;
- case 10:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tcvac";
- else if (Op1Val == 0 && Op2Val == 2)
- Asm = "dc\tcsw";
- break;
- case 11:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tcvau";
- break;
- case 12:
- if (Op1Val == 3 && Op2Val == 1 &&
- (STI.getFeatureBits()[AArch64::HasV8_2aOps]))
- Asm = "dc\tcvap";
- break;
- case 14:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tcivac";
- else if (Op1Val == 0 && Op2Val == 2)
- Asm = "dc\tcisw";
- break;
-
+ case 4: case 6: case 10: case 11: case 12: case 14:
+ {
+ const AArch64DC::DC *DC = AArch64DC::lookupDCByEncoding(Encoding);
+ if (!DC || !DC->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = true;
+ Ins = "dc\t";
+ Name = std::string(DC->Name);
+ }
+ break;
// AT aliases
- case 8:
- switch (Op1Val) {
- default:
- break;
- case 0:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e1r"; break;
- case 1: Asm = "at\ts1e1w"; break;
- case 2: Asm = "at\ts1e0r"; break;
- case 3: Asm = "at\ts1e0w"; break;
- }
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e2r"; break;
- case 1: Asm = "at\ts1e2w"; break;
- case 4: Asm = "at\ts12e1r"; break;
- case 5: Asm = "at\ts12e1w"; break;
- case 6: Asm = "at\ts12e0r"; break;
- case 7: Asm = "at\ts12e0w"; break;
- }
- break;
- case 6:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e3r"; break;
- case 1: Asm = "at\ts1e3w"; break;
- }
- break;
- }
- break;
- case 9:
- switch (Op1Val) {
- default:
- break;
- case 0:
- if (STI.getFeatureBits()[AArch64::HasV8_2aOps]) {
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e1rp"; break;
- case 1: Asm = "at\ts1e1wp"; break;
- }
- }
- break;
- }
+ case 8: case 9: {
+ const AArch64AT::AT *AT = AArch64AT::lookupATByEncoding(Encoding);
+ if (!AT || !AT->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = true;
+ Ins = "at\t";
+ Name = std::string(AT->Name);
+ }
+ break;
}
} else if (CnVal == 8) {
// TLBI aliases
- switch (CmVal) {
- default:
- break;
- case 3:
- switch (Op1Val) {
- default:
- break;
- case 0:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\tvmalle1is"; break;
- case 1: Asm = "tlbi\tvae1is"; break;
- case 2: Asm = "tlbi\taside1is"; break;
- case 3: Asm = "tlbi\tvaae1is"; break;
- case 5: Asm = "tlbi\tvale1is"; break;
- case 7: Asm = "tlbi\tvaale1is"; break;
- }
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle2is"; break;
- case 1: Asm = "tlbi\tvae2is"; break;
- case 4: Asm = "tlbi\talle1is"; break;
- case 5: Asm = "tlbi\tvale2is"; break;
- case 6: Asm = "tlbi\tvmalls12e1is"; break;
- }
- break;
- case 6:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle3is"; break;
- case 1: Asm = "tlbi\tvae3is"; break;
- case 5: Asm = "tlbi\tvale3is"; break;
- }
- break;
- }
- break;
- case 0:
- switch (Op1Val) {
- default:
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 1: Asm = "tlbi\tipas2e1is"; break;
- case 5: Asm = "tlbi\tipas2le1is"; break;
- }
- break;
- }
- break;
- case 4:
- switch (Op1Val) {
- default:
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 1: Asm = "tlbi\tipas2e1"; break;
- case 5: Asm = "tlbi\tipas2le1"; break;
- }
- break;
- }
- break;
- case 7:
- switch (Op1Val) {
- default:
- break;
- case 0:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\tvmalle1"; break;
- case 1: Asm = "tlbi\tvae1"; break;
- case 2: Asm = "tlbi\taside1"; break;
- case 3: Asm = "tlbi\tvaae1"; break;
- case 5: Asm = "tlbi\tvale1"; break;
- case 7: Asm = "tlbi\tvaale1"; break;
- }
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle2"; break;
- case 1: Asm = "tlbi\tvae2"; break;
- case 4: Asm = "tlbi\talle1"; break;
- case 5: Asm = "tlbi\tvale2"; break;
- case 6: Asm = "tlbi\tvmalls12e1"; break;
- }
- break;
- case 6:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle3"; break;
- case 1: Asm = "tlbi\tvae3"; break;
- case 5: Asm = "tlbi\tvale3"; break;
- }
- break;
- }
- break;
- }
+ const AArch64TLBI::TLBI *TLBI = AArch64TLBI::lookupTLBIByEncoding(Encoding);
+ if (!TLBI || !TLBI->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = TLBI->NeedsReg;
+ Ins = "tlbi\t";
+ Name = std::string(TLBI->Name);
}
+ else
+ return false;
- if (Asm) {
- unsigned Reg = MI->getOperand(4).getReg();
+ std::string Str = Ins + Name;
+ std::transform(Str.begin(), Str.end(), Str.begin(), ::tolower);
- O << '\t' << Asm;
- if (StringRef(Asm).lower().find("all") == StringRef::npos)
- O << ", " << getRegisterName(Reg);
- }
+ O << '\t' << Str;
+ if (NeedsReg)
+ O << ", " << getRegisterName(MI->getOperand(4).getReg());
- return Asm != nullptr;
+ return true;
}
void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 65dca99ed04e..a45258cb97b7 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
@@ -37,9 +38,11 @@ public:
unsigned PrintMethodIdx,
const MCSubtargetInfo &STI,
raw_ostream &O);
+
virtual StringRef getRegName(unsigned RegNo) const {
return getRegisterName(RegNo);
}
+
static const char *getRegisterName(unsigned RegNo,
unsigned AltIdx = AArch64::NoRegAltName);
@@ -177,12 +180,15 @@ public:
unsigned PrintMethodIdx,
const MCSubtargetInfo &STI,
raw_ostream &O) override;
+
StringRef getRegName(unsigned RegNo) const override {
return getRegisterName(RegNo);
}
+
static const char *getRegisterName(unsigned RegNo,
unsigned AltIdx = AArch64::NoRegAltName);
};
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 14c0327f5fa8..ebf05ae303dd 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -73,7 +73,7 @@ public:
}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
bool mayNeedRelaxation(const MCInst &Inst) const override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
@@ -138,15 +138,15 @@ static unsigned AdrImmBits(unsigned Value) {
}
static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- MCContext *Ctx) {
+ MCContext &Ctx) {
unsigned Kind = Fixup.getKind();
int64_t SignedValue = static_cast<int64_t>(Value);
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
case AArch64::fixup_aarch64_pcrel_adr_imm21:
- if (Ctx && (SignedValue > 2097151 || SignedValue < -2097152))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
+ if (SignedValue > 2097151 || SignedValue < -2097152)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
return AdrImmBits(Value & 0x1fffffULL);
case AArch64::fixup_aarch64_pcrel_adrp_imm21:
return AdrImmBits((Value & 0x1fffff000ULL) >> 12);
@@ -154,66 +154,65 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case AArch64::fixup_aarch64_pcrel_branch19:
// Signed 21-bit immediate
if (SignedValue > 2097151 || SignedValue < -2097152)
- if (Ctx) Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0x3))
- Ctx->reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0x3)
+ Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
// Low two bits are not encoded.
return (Value >> 2) & 0x7ffff;
case AArch64::fixup_aarch64_add_imm12:
case AArch64::fixup_aarch64_ldst_imm12_scale1:
// Unsigned 12-bit immediate
- if (Ctx && Value >= 0x1000)
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value >= 0x1000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
return Value;
case AArch64::fixup_aarch64_ldst_imm12_scale2:
// Unsigned 12-bit immediate which gets multiplied by 2
- if (Ctx && (Value >= 0x2000))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0x1))
- Ctx->reportError(Fixup.getLoc(), "fixup must be 2-byte aligned");
+ if (Value >= 0x2000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0x1)
+ Ctx.reportError(Fixup.getLoc(), "fixup must be 2-byte aligned");
return Value >> 1;
case AArch64::fixup_aarch64_ldst_imm12_scale4:
// Unsigned 12-bit immediate which gets multiplied by 4
- if (Ctx && (Value >= 0x4000))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0x3))
- Ctx->reportError(Fixup.getLoc(), "fixup must be 4-byte aligned");
+ if (Value >= 0x4000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0x3)
+ Ctx.reportError(Fixup.getLoc(), "fixup must be 4-byte aligned");
return Value >> 2;
case AArch64::fixup_aarch64_ldst_imm12_scale8:
// Unsigned 12-bit immediate which gets multiplied by 8
- if (Ctx && (Value >= 0x8000))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0x7))
- Ctx->reportError(Fixup.getLoc(), "fixup must be 8-byte aligned");
+ if (Value >= 0x8000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0x7)
+ Ctx.reportError(Fixup.getLoc(), "fixup must be 8-byte aligned");
return Value >> 3;
case AArch64::fixup_aarch64_ldst_imm12_scale16:
// Unsigned 12-bit immediate which gets multiplied by 16
- if (Ctx && (Value >= 0x10000))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0xf))
- Ctx->reportError(Fixup.getLoc(), "fixup must be 16-byte aligned");
+ if (Value >= 0x10000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0xf)
+ Ctx.reportError(Fixup.getLoc(), "fixup must be 16-byte aligned");
return Value >> 4;
case AArch64::fixup_aarch64_movw:
- if (Ctx)
- Ctx->reportError(Fixup.getLoc(),
- "no resolvable MOVZ/MOVK fixups supported yet");
+ Ctx.reportError(Fixup.getLoc(),
+ "no resolvable MOVZ/MOVK fixups supported yet");
return Value;
case AArch64::fixup_aarch64_pcrel_branch14:
// Signed 16-bit immediate
- if (Ctx && (SignedValue > 32767 || SignedValue < -32768))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
+ if (SignedValue > 32767 || SignedValue < -32768)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
// Low two bits are not encoded (4-byte alignment assumed).
- if (Ctx && (Value & 0x3))
- Ctx->reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
+ if (Value & 0x3)
+ Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
return (Value >> 2) & 0x3fff;
case AArch64::fixup_aarch64_pcrel_branch26:
case AArch64::fixup_aarch64_pcrel_call26:
// Signed 28-bit immediate
- if (Ctx && (SignedValue > 134217727 || SignedValue < -134217728))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
+ if (SignedValue > 134217727 || SignedValue < -134217728)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
// Low two bits are not encoded (4-byte alignment assumed).
- if (Ctx && (Value & 0x3))
- Ctx->reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
+ if (Value & 0x3)
+ Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
return (Value >> 2) & 0x3ffffff;
case FK_Data_1:
case FK_Data_2:
@@ -264,13 +263,13 @@ unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) con
void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ bool IsPCRel, MCContext &Ctx) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
if (!Value)
return; // Doesn't change encoding.
MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
// Apply any target-specific value adjustments.
- Value = adjustFixupValue(Fixup, Value, nullptr);
+ Value = adjustFixupValue(Fixup, Value, Ctx);
// Shift the value into position.
Value <<= Info.TargetOffset;
@@ -521,17 +520,6 @@ public:
return CompactUnwindEncoding;
}
-
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override {
- // Try to get the encoded value for the fixup as-if we're mapping it into
- // the instruction. This allows adjustFixupValue() to issue a diagnostic
- // if the value is invalid.
- if (IsResolved)
- (void)adjustFixupValue(Fixup, Value, &Asm.getContext());
- }
};
} // end anonymous namespace
@@ -575,12 +563,6 @@ void ELFAArch64AsmBackend::processFixupValue(
// to the linker -- a relocation!
if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
IsResolved = false;
-
- // Try to get the encoded value for the fixup as-if we're mapping it into
- // the instruction. This allows adjustFixupValue() to issue a diagnostic
- // if the value is invalid.
- if (IsResolved)
- (void)adjustFixupValue(Fixup, Value, &Asm.getContext());
}
}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 685907a2178e..271263507ae1 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -14,27 +14,23 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetStreamer.h"
-#include "llvm/MC/MCELFStreamer.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
@@ -106,8 +102,8 @@ public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- void EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) override {
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override {
EmitA64MappingSymbol();
MCELFStreamer::EmitInstruction(Inst, STI);
}
@@ -180,6 +176,7 @@ private:
DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
ElfMappingSymbol LastEMS;
};
+
} // end anonymous namespace
AArch64ELFStreamer &AArch64TargetELFStreamer::getStreamer() {
@@ -191,6 +188,7 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
}
namespace llvm {
+
MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
MCInstPrinter *InstPrint,
@@ -214,4 +212,5 @@ createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
return new AArch64TargetELFStreamer(S);
return nullptr;
}
-}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index e9d38d3dcf10..f710065d9bc7 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -84,9 +84,14 @@ static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM,
// no matter how far away they are.
else if (CM == CodeModel::JITDefault)
CM = CodeModel::Large;
- else if (CM != CodeModel::Small && CM != CodeModel::Large)
- report_fatal_error(
- "Only small and large code models are allowed on AArch64");
+ else if (CM != CodeModel::Small && CM != CodeModel::Large) {
+ if (!TT.isOSFuchsia())
+ report_fatal_error(
+ "Only small and large code models are allowed on AArch64");
+ else if (CM != CodeModel::Kernel)
+ report_fatal_error(
+ "Only small, kernel, and large code models are allowed on AArch64");
+ }
}
static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T,
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 53a68527ee8e..3d296ba4806b 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -16,14 +16,22 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/MachO.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
namespace {
+
class AArch64MachObjectWriter : public MCMachObjectTargetWriter {
bool getAArch64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType,
const MCSymbolRefExpr *Sym,
@@ -38,7 +46,8 @@ public:
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override;
};
-}
+
+} // end anonymous namespace
bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym,
@@ -51,18 +60,18 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
return false;
case FK_Data_1:
- Log2Size = llvm::Log2_32(1);
+ Log2Size = Log2_32(1);
return true;
case FK_Data_2:
- Log2Size = llvm::Log2_32(2);
+ Log2Size = Log2_32(2);
return true;
case FK_Data_4:
- Log2Size = llvm::Log2_32(4);
+ Log2Size = Log2_32(4);
if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
return true;
case FK_Data_8:
- Log2Size = llvm::Log2_32(8);
+ Log2Size = Log2_32(8);
if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
return true;
@@ -72,7 +81,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
case AArch64::fixup_aarch64_ldst_imm12_scale4:
case AArch64::fixup_aarch64_ldst_imm12_scale8:
case AArch64::fixup_aarch64_ldst_imm12_scale16:
- Log2Size = llvm::Log2_32(4);
+ Log2Size = Log2_32(4);
switch (Sym->getKind()) {
default:
return false;
@@ -87,14 +96,13 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
return true;
}
case AArch64::fixup_aarch64_pcrel_adrp_imm21:
- Log2Size = llvm::Log2_32(4);
+ Log2Size = Log2_32(4);
// This encompasses the relocation for the whole 21-bit value.
switch (Sym->getKind()) {
- default: {
+ default:
Asm.getContext().reportError(Fixup.getLoc(),
"ADR/ADRP relocations must be GOT relative");
return false;
- }
case MCSymbolRefExpr::VK_PAGE:
RelocType = unsigned(MachO::ARM64_RELOC_PAGE21);
return true;
@@ -108,7 +116,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
return true;
case AArch64::fixup_aarch64_pcrel_branch26:
case AArch64::fixup_aarch64_pcrel_call26:
- Log2Size = llvm::Log2_32(4);
+ Log2Size = Log2_32(4);
RelocType = unsigned(MachO::ARM64_RELOC_BRANCH26);
return true;
}
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index dcc39176031c..5d76681cd97b 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -266,82 +266,86 @@ inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) {
}
} // end namespace AArch64CC
+struct SysAlias {
+ const char *Name;
+ uint16_t Encoding;
+ FeatureBitset FeaturesRequired;
+
+ SysAlias (const char *N, uint16_t E) : Name(N), Encoding(E) {};
+ SysAlias (const char *N, uint16_t E, FeatureBitset F) :
+ Name(N), Encoding(E), FeaturesRequired(F) {};
+
+ bool haveFeatures(FeatureBitset ActiveFeatures) const {
+ return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
+ }
+
+ FeatureBitset getRequiredFeatures() const { return FeaturesRequired; }
+};
+
+struct SysAliasReg : SysAlias {
+ bool NeedsReg;
+ SysAliasReg(const char *N, uint16_t E, bool R) : SysAlias(N, E), NeedsReg(R) {};
+};
+
namespace AArch64AT{
- struct AT {
- const char *Name;
- uint16_t Encoding;
+ struct AT : SysAlias {
+ using SysAlias::SysAlias;
};
-
#define GET_AT_DECL
#include "AArch64GenSystemOperands.inc"
-
}
+
namespace AArch64DB {
- struct DB {
- const char *Name;
- uint16_t Encoding;
+ struct DB : SysAlias {
+ using SysAlias::SysAlias;
};
-
#define GET_DB_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64DC {
- struct DC {
- const char *Name;
- uint16_t Encoding;
+ struct DC : SysAlias {
+ using SysAlias::SysAlias;
};
-
#define GET_DC_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64IC {
- struct IC {
- const char *Name;
- uint16_t Encoding;
- bool NeedsReg;
+ struct IC : SysAliasReg {
+ using SysAliasReg::SysAliasReg;
};
#define GET_IC_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64ISB {
- struct ISB {
- const char *Name;
- uint16_t Encoding;
+ struct ISB : SysAlias {
+ using SysAlias::SysAlias;
};
#define GET_ISB_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64PRFM {
- struct PRFM {
- const char *Name;
- uint16_t Encoding;
+ struct PRFM : SysAlias {
+ using SysAlias::SysAlias;
};
#define GET_PRFM_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64PState {
- struct PState {
- const char *Name;
- uint16_t Encoding;
- FeatureBitset FeaturesRequired;
-
- bool haveFeatures(FeatureBitset ActiveFeatures) const {
- return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
- }
+ struct PState : SysAlias{
+ using SysAlias::SysAlias;
};
#define GET_PSTATE_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64PSBHint {
- struct PSB {
- const char *Name;
- uint16_t Encoding;
+ struct PSB : SysAlias {
+ using SysAlias::SysAlias;
};
#define GET_PSB_DECL
#include "AArch64GenSystemOperands.inc"
@@ -451,10 +455,8 @@ namespace AArch64SysReg {
}
namespace AArch64TLBI {
- struct TLBI {
- const char *Name;
- uint16_t Encoding;
- bool NeedsReg;
+ struct TLBI : SysAliasReg {
+ using SysAliasReg::SysAliasReg;
};
#define GET_TLBI_DECL
#include "AArch64GenSystemOperands.inc"