aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-06-20 18:49:12 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-06-20 18:49:12 +0000
commit8055b7e383f74dbc58c8085a0f0c45f4c61f8231 (patch)
treec472e0a73f8ac1437fe3d5f9202fa1590708867d
parentec2b0f99f245da9ce98e41cf4cc2b6b2a02726f6 (diff)
Vendor import of llvm-project branch release/10.xvendor/llvm-project/llvmorg-10.0.0-97-g6f71678ecd2
llvmorg-10.0.0-97-g6f71678ecd2.
Notes
Notes: svn path=/vendor/llvm-project/release-10.x/; revision=362442 svn path=/vendor/llvm-project/llvmorg-10.0.0-97-g6f71678ecd2/; revision=362443; tag=vendor/llvm-project/llvmorg-10.0.0-97-g6f71678ecd2
-rw-r--r--clang/lib/Sema/SemaTemplate.cpp8
-rw-r--r--clang/lib/Tooling/Syntax/Tokens.cpp320
-rw-r--r--clang/utils/TableGen/ClangAttrEmitter.cpp3
-rw-r--r--lld/COFF/Chunks.h10
-rw-r--r--lld/COFF/DLL.cpp16
-rw-r--r--lld/ELF/ScriptLexer.cpp6
-rw-r--r--lld/ELF/ScriptParser.cpp1
-rw-r--r--llvm/include/llvm/Support/AArch64TargetParser.def4
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp18
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td26
-rw-r--r--llvm/lib/Target/AArch64/AArch64BranchTargets.cpp12
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA53.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA57.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedCyclone.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM3.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM4.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM5.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedFalkor.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedKryo.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedThunderX.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td1997
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.cpp11
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h3
-rw-r--r--llvm/lib/Target/X86/X86AsmPrinter.cpp46
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp21
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp94
-rw-r--r--llvm/lib/Transforms/Utils/ValueMapper.cpp3
29 files changed, 2396 insertions, 239 deletions
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index c38c724ed9b0..264c903209af 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -3817,6 +3817,9 @@ TypeResult Sema::ActOnTagTemplateIdType(TagUseKind TUK,
SourceLocation LAngleLoc,
ASTTemplateArgsPtr TemplateArgsIn,
SourceLocation RAngleLoc) {
+ if (SS.isInvalid())
+ return TypeResult(true);
+
TemplateName Template = TemplateD.get();
// Translate the parser's template argument list in our AST format.
@@ -5925,7 +5928,9 @@ bool UnnamedLocalNoLinkageFinder::VisitDependentNameType(
bool UnnamedLocalNoLinkageFinder::VisitDependentTemplateSpecializationType(
const DependentTemplateSpecializationType* T) {
- return VisitNestedNameSpecifier(T->getQualifier());
+ if (auto *Q = T->getQualifier())
+ return VisitNestedNameSpecifier(Q);
+ return false;
}
bool UnnamedLocalNoLinkageFinder::VisitPackExpansionType(
@@ -5979,6 +5984,7 @@ bool UnnamedLocalNoLinkageFinder::VisitTagDecl(const TagDecl *Tag) {
bool UnnamedLocalNoLinkageFinder::VisitNestedNameSpecifier(
NestedNameSpecifier *NNS) {
+ assert(NNS);
if (NNS->getPrefix() && VisitNestedNameSpecifier(NNS->getPrefix()))
return true;
diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp
index 3df1c064923a..35a35f904069 100644
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -335,14 +335,38 @@ public:
SourceRange Range, const MacroArgs *Args) override {
if (!Collector)
return;
- // Only record top-level expansions, not those where:
+ const auto &SM = Collector->PP.getSourceManager();
+ // Only record top-level expansions that directly produce expanded tokens.
+ // This excludes those where:
// - the macro use is inside a macro body,
// - the macro appears in an argument to another macro.
- if (!MacroNameTok.getLocation().isFileID() ||
- (LastExpansionEnd.isValid() &&
- Collector->PP.getSourceManager().isBeforeInTranslationUnit(
- Range.getBegin(), LastExpansionEnd)))
+ // However macro expansion isn't really a tree, it's token rewrite rules,
+ // so there are other cases, e.g.
+ // #define B(X) X
+ // #define A 1 + B
+ // A(2)
+ // Both A and B produce expanded tokens, though the macro name 'B' comes
+ // from an expansion. The best we can do is merge the mappings for both.
+
+ // The *last* token of any top-level macro expansion must be in a file.
+ // (In the example above, see the closing paren of the expansion of B).
+ if (!Range.getEnd().isFileID())
return;
+ // If there's a current expansion that encloses this one, this one can't be
+ // top-level.
+ if (LastExpansionEnd.isValid() &&
+ !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd()))
+ return;
+
+ // If the macro invocation (B) starts in a macro (A) but ends in a file,
+ // we'll create a merged mapping for A + B by overwriting the endpoint for
+ // A's startpoint.
+ if (!Range.getBegin().isFileID()) {
+ Range.setBegin(SM.getExpansionLoc(Range.getBegin()));
+ assert(Collector->Expansions.count(Range.getBegin().getRawEncoding()) &&
+ "Overlapping macros should have same expansion location");
+ }
+
Collector->Expansions[Range.getBegin().getRawEncoding()] = Range.getEnd();
LastExpansionEnd = Range.getEnd();
}
@@ -399,197 +423,167 @@ public:
}
TokenBuffer build() && {
- buildSpelledTokens();
-
- // Walk over expanded tokens and spelled tokens in parallel, building the
- // mappings between those using source locations.
- // To correctly recover empty macro expansions, we also take locations
- // reported to PPCallbacks::MacroExpands into account as we do not have any
- // expanded tokens with source locations to guide us.
-
- // The 'eof' token is special, it is not part of spelled token stream. We
- // handle it separately at the end.
assert(!Result.ExpandedTokens.empty());
assert(Result.ExpandedTokens.back().kind() == tok::eof);
- for (unsigned I = 0; I < Result.ExpandedTokens.size() - 1; ++I) {
- // (!) I might be updated by the following call.
- processExpandedToken(I);
- }
- // 'eof' not handled in the loop, do it here.
- assert(SM.getMainFileID() ==
- SM.getFileID(Result.ExpandedTokens.back().location()));
- fillGapUntil(Result.Files[SM.getMainFileID()],
- Result.ExpandedTokens.back().location(),
- Result.ExpandedTokens.size() - 1);
- Result.Files[SM.getMainFileID()].EndExpanded = Result.ExpandedTokens.size();
+ // Tokenize every file that contributed tokens to the expanded stream.
+ buildSpelledTokens();
- // Some files might have unaccounted spelled tokens at the end, add an empty
- // mapping for those as they did not have expanded counterparts.
- fillGapsAtEndOfFiles();
+ // The expanded token stream consists of runs of tokens that came from
+ // the same source (a macro expansion, part of a file etc).
+ // Between these runs are the logical positions of spelled tokens that
+ // didn't expand to anything.
+ while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
+ // Create empty mappings for spelled tokens that expanded to nothing here.
+ // May advance NextSpelled, but NextExpanded is unchanged.
+ discard();
+ // Create mapping for a contiguous run of expanded tokens.
+ // Advances NextExpanded past the run, and NextSpelled accordingly.
+ unsigned OldPosition = NextExpanded;
+ advance();
+ if (NextExpanded == OldPosition)
+ diagnoseAdvanceFailure();
+ }
+ // If any tokens remain in any of the files, they didn't expand to anything.
+ // Create empty mappings up until the end of the file.
+ for (const auto &File : Result.Files)
+ discard(File.first);
return std::move(Result);
}
private:
- /// Process the next token in an expanded stream and move corresponding
- /// spelled tokens, record any mapping if needed.
- /// (!) \p I will be updated if this had to skip tokens, e.g. for macros.
- void processExpandedToken(unsigned &I) {
- auto L = Result.ExpandedTokens[I].location();
- if (L.isMacroID()) {
- processMacroExpansion(SM.getExpansionRange(L), I);
- return;
+ // Consume a sequence of spelled tokens that didn't expand to anything.
+ // In the simplest case, skips spelled tokens until finding one that produced
+ // the NextExpanded token, and creates an empty mapping for them.
+ // If Drain is provided, skips remaining tokens from that file instead.
+ void discard(llvm::Optional<FileID> Drain = llvm::None) {
+ SourceLocation Target =
+ Drain ? SM.getLocForEndOfFile(*Drain)
+ : SM.getExpansionLoc(
+ Result.ExpandedTokens[NextExpanded].location());
+ FileID File = SM.getFileID(Target);
+ const auto &SpelledTokens = Result.Files[File].SpelledTokens;
+ auto &NextSpelled = this->NextSpelled[File];
+
+ TokenBuffer::Mapping Mapping;
+ Mapping.BeginSpelled = NextSpelled;
+ // When dropping trailing tokens from a file, the empty mapping should
+ // be positioned within the file's expanded-token range (at the end).
+ Mapping.BeginExpanded = Mapping.EndExpanded =
+ Drain ? Result.Files[*Drain].EndExpanded : NextExpanded;
+ // We may want to split into several adjacent empty mappings.
+ // FlushMapping() emits the current mapping and starts a new one.
+ auto FlushMapping = [&, this] {
+ Mapping.EndSpelled = NextSpelled;
+ if (Mapping.BeginSpelled != Mapping.EndSpelled)
+ Result.Files[File].Mappings.push_back(Mapping);
+ Mapping.BeginSpelled = NextSpelled;
+ };
+
+ while (NextSpelled < SpelledTokens.size() &&
+ SpelledTokens[NextSpelled].location() < Target) {
+ // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
+ // then we want to partition our (empty) mapping.
+ // [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
+ SourceLocation KnownEnd = CollectedExpansions.lookup(
+ SpelledTokens[NextSpelled].location().getRawEncoding());
+ if (KnownEnd.isValid()) {
+ FlushMapping(); // Emits [Start, NextSpelled)
+ while (NextSpelled < SpelledTokens.size() &&
+ SpelledTokens[NextSpelled].location() <= KnownEnd)
+ ++NextSpelled;
+ FlushMapping(); // Emits [NextSpelled, KnownEnd]
+ // Now the loop contitues and will emit (KnownEnd, Target).
+ } else {
+ ++NextSpelled;
+ }
}
- if (L.isFileID()) {
- auto FID = SM.getFileID(L);
- TokenBuffer::MarkedFile &File = Result.Files[FID];
-
- fillGapUntil(File, L, I);
+ FlushMapping();
+ }
- // Skip the token.
- assert(File.SpelledTokens[NextSpelled[FID]].location() == L &&
- "no corresponding token in the spelled stream");
- ++NextSpelled[FID];
- return;
+ // Consumes the NextExpanded token and others that are part of the same run.
+ // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
+ // (unless this is a run of file tokens, which we represent with no mapping).
+ void advance() {
+ const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
+ SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
+ FileID File = SM.getFileID(Expansion);
+ const auto &SpelledTokens = Result.Files[File].SpelledTokens;
+ auto &NextSpelled = this->NextSpelled[File];
+
+ if (Tok.location().isFileID()) {
+ // A run of file tokens continues while the expanded/spelled tokens match.
+ while (NextSpelled < SpelledTokens.size() &&
+ NextExpanded < Result.ExpandedTokens.size() &&
+ SpelledTokens[NextSpelled].location() ==
+ Result.ExpandedTokens[NextExpanded].location()) {
+ ++NextSpelled;
+ ++NextExpanded;
+ }
+ // We need no mapping for file tokens copied to the expanded stream.
+ } else {
+ // We found a new macro expansion. We should have its spelling bounds.
+ auto End = CollectedExpansions.lookup(Expansion.getRawEncoding());
+ assert(End.isValid() && "Macro expansion wasn't captured?");
+
+ // Mapping starts here...
+ TokenBuffer::Mapping Mapping;
+ Mapping.BeginExpanded = NextExpanded;
+ Mapping.BeginSpelled = NextSpelled;
+ // ... consumes spelled tokens within bounds we captured ...
+ while (NextSpelled < SpelledTokens.size() &&
+ SpelledTokens[NextSpelled].location() <= End)
+ ++NextSpelled;
+ // ... consumes expanded tokens rooted at the same expansion ...
+ while (NextExpanded < Result.ExpandedTokens.size() &&
+ SM.getExpansionLoc(
+ Result.ExpandedTokens[NextExpanded].location()) == Expansion)
+ ++NextExpanded;
+ // ... and ends here.
+ Mapping.EndExpanded = NextExpanded;
+ Mapping.EndSpelled = NextSpelled;
+ Result.Files[File].Mappings.push_back(Mapping);
}
}
- /// Skipped expanded and spelled tokens of a macro expansion that covers \p
- /// SpelledRange. Add a corresponding mapping.
- /// (!) \p I will be the index of the last token in an expansion after this
- /// function returns.
- void processMacroExpansion(CharSourceRange SpelledRange, unsigned &I) {
- auto FID = SM.getFileID(SpelledRange.getBegin());
- assert(FID == SM.getFileID(SpelledRange.getEnd()));
- TokenBuffer::MarkedFile &File = Result.Files[FID];
-
- fillGapUntil(File, SpelledRange.getBegin(), I);
-
- // Skip all expanded tokens from the same macro expansion.
- unsigned BeginExpanded = I;
- for (; I + 1 < Result.ExpandedTokens.size(); ++I) {
- auto NextL = Result.ExpandedTokens[I + 1].location();
- if (!NextL.isMacroID() ||
- SM.getExpansionLoc(NextL) != SpelledRange.getBegin())
- break;
+ // advance() is supposed to consume at least one token - if not, we crash.
+ void diagnoseAdvanceFailure() {
+#ifndef NDEBUG
+ // Show the failed-to-map token in context.
+ for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
+ I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
+ const char *L =
+ (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : " ";
+ llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
}
- unsigned EndExpanded = I + 1;
- consumeMapping(File, SM.getFileOffset(SpelledRange.getEnd()), BeginExpanded,
- EndExpanded, NextSpelled[FID]);
+#endif
+ llvm_unreachable("Couldn't map expanded token to spelled tokens!");
}
/// Initializes TokenBuffer::Files and fills spelled tokens and expanded
/// ranges for each of the files.
void buildSpelledTokens() {
for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
- auto FID =
- SM.getFileID(SM.getExpansionLoc(Result.ExpandedTokens[I].location()));
+ const auto &Tok = Result.ExpandedTokens[I];
+ auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
auto It = Result.Files.try_emplace(FID);
TokenBuffer::MarkedFile &File = It.first->second;
- File.EndExpanded = I + 1;
+ // The eof token should not be considered part of the main-file's range.
+ File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1;
+
if (!It.second)
continue; // we have seen this file before.
-
// This is the first time we see this file.
File.BeginExpanded = I;
File.SpelledTokens = tokenize(FID, SM, LangOpts);
}
}
- void consumeEmptyMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset,
- unsigned ExpandedIndex, unsigned &SpelledIndex) {
- consumeMapping(File, EndOffset, ExpandedIndex, ExpandedIndex, SpelledIndex);
- }
-
- /// Consumes spelled tokens that form a macro expansion and adds a entry to
- /// the resulting token buffer.
- /// (!) SpelledIndex is updated in-place.
- void consumeMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset,
- unsigned BeginExpanded, unsigned EndExpanded,
- unsigned &SpelledIndex) {
- // We need to record this mapping before continuing.
- unsigned MappingBegin = SpelledIndex;
- ++SpelledIndex;
-
- bool HitMapping =
- tryConsumeSpelledUntil(File, EndOffset + 1, SpelledIndex).hasValue();
- (void)HitMapping;
- assert(!HitMapping && "recursive macro expansion?");
-
- TokenBuffer::Mapping M;
- M.BeginExpanded = BeginExpanded;
- M.EndExpanded = EndExpanded;
- M.BeginSpelled = MappingBegin;
- M.EndSpelled = SpelledIndex;
-
- File.Mappings.push_back(M);
- }
-
- /// Consumes spelled tokens until location \p L is reached and adds a mapping
- /// covering the consumed tokens. The mapping will point to an empty expanded
- /// range at position \p ExpandedIndex.
- void fillGapUntil(TokenBuffer::MarkedFile &File, SourceLocation L,
- unsigned ExpandedIndex) {
- assert(L.isFileID());
- FileID FID;
- unsigned Offset;
- std::tie(FID, Offset) = SM.getDecomposedLoc(L);
-
- unsigned &SpelledIndex = NextSpelled[FID];
- unsigned MappingBegin = SpelledIndex;
- while (true) {
- auto EndLoc = tryConsumeSpelledUntil(File, Offset, SpelledIndex);
- if (SpelledIndex != MappingBegin) {
- TokenBuffer::Mapping M;
- M.BeginSpelled = MappingBegin;
- M.EndSpelled = SpelledIndex;
- M.BeginExpanded = M.EndExpanded = ExpandedIndex;
- File.Mappings.push_back(M);
- }
- if (!EndLoc)
- break;
- consumeEmptyMapping(File, SM.getFileOffset(*EndLoc), ExpandedIndex,
- SpelledIndex);
-
- MappingBegin = SpelledIndex;
- }
- };
-
- /// Consumes spelled tokens until it reaches Offset or a mapping boundary,
- /// i.e. a name of a macro expansion or the start '#' token of a PP directive.
- /// (!) NextSpelled is updated in place.
- ///
- /// returns None if \p Offset was reached, otherwise returns the end location
- /// of a mapping that starts at \p NextSpelled.
- llvm::Optional<SourceLocation>
- tryConsumeSpelledUntil(TokenBuffer::MarkedFile &File, unsigned Offset,
- unsigned &NextSpelled) {
- for (; NextSpelled < File.SpelledTokens.size(); ++NextSpelled) {
- auto L = File.SpelledTokens[NextSpelled].location();
- if (Offset <= SM.getFileOffset(L))
- return llvm::None; // reached the offset we are looking for.
- auto Mapping = CollectedExpansions.find(L.getRawEncoding());
- if (Mapping != CollectedExpansions.end())
- return Mapping->second; // found a mapping before the offset.
- }
- return llvm::None; // no more tokens, we "reached" the offset.
- }
-
- /// Adds empty mappings for unconsumed spelled tokens at the end of each file.
- void fillGapsAtEndOfFiles() {
- for (auto &F : Result.Files) {
- if (F.second.SpelledTokens.empty())
- continue;
- fillGapUntil(F.second, F.second.SpelledTokens.back().endLocation(),
- F.second.EndExpanded);
- }
- }
-
TokenBuffer Result;
- /// For each file, a position of the next spelled token we will consume.
- llvm::DenseMap<FileID, unsigned> NextSpelled;
+ unsigned NextExpanded = 0; // cursor in ExpandedTokens
+ llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
PPExpansions CollectedExpansions;
const SourceManager &SM;
const LangOptions &LangOpts;
diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp
index 4c3742c8e339..2fce9d428137 100644
--- a/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -2825,6 +2825,7 @@ void EmitClangAttrPCHRead(RecordKeeper &Records, raw_ostream &OS) {
if (R.isSubClassOf(InhClass))
OS << " bool isInherited = Record.readInt();\n";
OS << " bool isImplicit = Record.readInt();\n";
+ OS << " bool isPackExpansion = Record.readInt();\n";
ArgRecords = R.getValueAsListOfDefs("Args");
Args.clear();
for (const auto *Arg : ArgRecords) {
@@ -2840,6 +2841,7 @@ void EmitClangAttrPCHRead(RecordKeeper &Records, raw_ostream &OS) {
if (R.isSubClassOf(InhClass))
OS << " cast<InheritableAttr>(New)->setInherited(isInherited);\n";
OS << " New->setImplicit(isImplicit);\n";
+ OS << " New->setPackExpansion(isPackExpansion);\n";
OS << " break;\n";
OS << " }\n";
}
@@ -2866,6 +2868,7 @@ void EmitClangAttrPCHWrite(RecordKeeper &Records, raw_ostream &OS) {
if (R.isSubClassOf(InhClass))
OS << " Record.push_back(SA->isInherited());\n";
OS << " Record.push_back(A->isImplicit());\n";
+ OS << " Record.push_back(A->isPackExpansion());\n";
for (const auto *Arg : Args)
createArgument(*Arg, R.getName())->writePCHWrite(OS);
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index 7ae4ee735f4a..2be2a72c4a1e 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -486,7 +486,9 @@ public:
class ImportThunkChunkARM : public ImportThunkChunk {
public:
- explicit ImportThunkChunkARM(Defined *s) : ImportThunkChunk(s) {}
+ explicit ImportThunkChunkARM(Defined *s) : ImportThunkChunk(s) {
+ setAlignment(2);
+ }
size_t getSize() const override { return sizeof(importThunkARM); }
void getBaserels(std::vector<Baserel> *res) override;
void writeTo(uint8_t *buf) const override;
@@ -494,14 +496,16 @@ public:
class ImportThunkChunkARM64 : public ImportThunkChunk {
public:
- explicit ImportThunkChunkARM64(Defined *s) : ImportThunkChunk(s) {}
+ explicit ImportThunkChunkARM64(Defined *s) : ImportThunkChunk(s) {
+ setAlignment(4);
+ }
size_t getSize() const override { return sizeof(importThunkARM64); }
void writeTo(uint8_t *buf) const override;
};
class RangeExtensionThunkARM : public NonSectionChunk {
public:
- explicit RangeExtensionThunkARM(Defined *t) : target(t) {}
+ explicit RangeExtensionThunkARM(Defined *t) : target(t) { setAlignment(2); }
size_t getSize() const override;
void writeTo(uint8_t *buf) const override;
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index 39d9fbab63d5..50301ad91b1d 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -365,7 +365,9 @@ public:
class ThunkChunkARM : public NonSectionChunk {
public:
- ThunkChunkARM(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) {}
+ ThunkChunkARM(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) {
+ setAlignment(2);
+ }
size_t getSize() const override { return sizeof(thunkARM); }
@@ -385,7 +387,9 @@ public:
class TailMergeChunkARM : public NonSectionChunk {
public:
- TailMergeChunkARM(Chunk *d, Defined *h) : desc(d), helper(h) {}
+ TailMergeChunkARM(Chunk *d, Defined *h) : desc(d), helper(h) {
+ setAlignment(2);
+ }
size_t getSize() const override { return sizeof(tailMergeARM); }
@@ -405,7 +409,9 @@ public:
class ThunkChunkARM64 : public NonSectionChunk {
public:
- ThunkChunkARM64(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) {}
+ ThunkChunkARM64(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) {
+ setAlignment(4);
+ }
size_t getSize() const override { return sizeof(thunkARM64); }
@@ -422,7 +428,9 @@ public:
class TailMergeChunkARM64 : public NonSectionChunk {
public:
- TailMergeChunkARM64(Chunk *d, Defined *h) : desc(d), helper(h) {}
+ TailMergeChunkARM64(Chunk *d, Defined *h) : desc(d), helper(h) {
+ setAlignment(4);
+ }
size_t getSize() const override { return sizeof(tailMergeARM64); }
diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp
index e0ff56fec3f3..1fed3d06227e 100644
--- a/lld/ELF/ScriptLexer.cpp
+++ b/lld/ELF/ScriptLexer.cpp
@@ -52,6 +52,8 @@ StringRef ScriptLexer::getLine() {
// Returns 1-based line number of the current token.
size_t ScriptLexer::getLineNumber() {
+ if (pos == 0)
+ return 1;
StringRef s = getCurrentMB().getBuffer();
StringRef tok = tokens[pos - 1];
return s.substr(0, tok.data() - s.data()).count('\n') + 1;
@@ -292,7 +294,9 @@ static bool encloses(StringRef s, StringRef t) {
MemoryBufferRef ScriptLexer::getCurrentMB() {
// Find input buffer containing the current token.
- assert(!mbs.empty() && pos > 0);
+ assert(!mbs.empty());
+ if (pos == 0)
+ return mbs.back();
for (MemoryBufferRef mb : mbs)
if (encloses(mb.getBuffer(), tokens[pos - 1]))
return mb;
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index fd8de3b54bd7..80ec8b655b04 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -737,6 +737,7 @@ bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok1, Stri
expect("(");
if (consume("NOLOAD")) {
cmd->noload = true;
+ cmd->type = SHT_NOBITS;
} else {
skip(); // This is "COPY", "INFO" or "OVERLAY".
cmd->nonAlloc = true;
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def
index 6b25ef2ca435..050059e36a25 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -152,6 +152,10 @@ AARCH64_CPU_NAME("kryo", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC))
AARCH64_CPU_NAME("thunderx2t99", ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_NONE))
+AARCH64_CPU_NAME("thunderx3t110", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_CRC | AEK_CRYPTO | AEK_FP | AEK_SIMD |
+ AEK_LSE | AEK_RAND | AArch64::AEK_PROFILE |
+ AArch64::AEK_RAS))
AARCH64_CPU_NAME("thunderx", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC | AArch64::AEK_PROFILE))
AARCH64_CPU_NAME("thunderxt88", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 4b9c50aeb1d3..35964b2cdbda 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -963,10 +963,10 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
continue;
}
- // If one of the blocks is the entire common tail (and not the entry
- // block, which we can't jump to), we can treat all blocks with this same
- // tail at once. Use PredBB if that is one of the possibilities, as that
- // will not introduce any extra branches.
+ // If one of the blocks is the entire common tail (and is not the entry
+ // block/an EH pad, which we can't jump to), we can treat all blocks with
+ // this same tail at once. Use PredBB if that is one of the possibilities,
+ // as that will not introduce any extra branches.
MachineBasicBlock *EntryBB =
&MergePotentials.front().getBlock()->getParent()->front();
unsigned commonTailIndex = SameTails.size();
@@ -974,19 +974,21 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// into the other.
if (SameTails.size() == 2 &&
SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&
- SameTails[1].tailIsWholeBlock())
+ SameTails[1].tailIsWholeBlock() && !SameTails[1].getBlock()->isEHPad())
commonTailIndex = 1;
else if (SameTails.size() == 2 &&
SameTails[1].getBlock()->isLayoutSuccessor(
- SameTails[0].getBlock()) &&
- SameTails[0].tailIsWholeBlock())
+ SameTails[0].getBlock()) &&
+ SameTails[0].tailIsWholeBlock() &&
+ !SameTails[0].getBlock()->isEHPad())
commonTailIndex = 0;
else {
// Otherwise just pick one, favoring the fall-through predecessor if
// there is one.
for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
MachineBasicBlock *MBB = SameTails[i].getBlock();
- if (MBB == EntryBB && SameTails[i].tailIsWholeBlock())
+ if ((MBB == EntryBB || MBB->isEHPad()) &&
+ SameTails[i].tailIsWholeBlock())
continue;
if (MBB == PredBB) {
commonTailIndex = i;
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 0106355b1a44..6e57543c4c0f 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -443,6 +443,10 @@ def SVEUnsupported : AArch64Unsupported {
HasSVE2BitPerm];
}
+def PAUnsupported : AArch64Unsupported {
+ let F = [HasPA];
+}
+
include "AArch64SchedA53.td"
include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"
@@ -453,6 +457,7 @@ include "AArch64SchedExynosM4.td"
include "AArch64SchedExynosM5.td"
include "AArch64SchedThunderX.td"
include "AArch64SchedThunderX2T99.td"
+include "AArch64SchedThunderX3T110.td"
def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
"Cortex-A35 ARM processors", [
@@ -780,6 +785,25 @@ def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
FeatureLSE,
HasV8_1aOps]>;
+def ProcThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
+ "ThunderX3T110",
+ "Marvell ThunderX3 processors", [
+ FeatureAggressiveFMA,
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureArithmeticBccFusion,
+ FeatureNEON,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureLSE,
+ FeaturePA,
+ FeatureUseAA,
+ FeatureBalanceFPOps,
+ FeaturePerfMon,
+ FeatureStrictAlign,
+ HasV8_3aOps]>;
+
def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
"Cavium ThunderX processors", [
FeatureCRC,
@@ -878,6 +902,8 @@ def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>;
def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>;
// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan.
def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
+// Marvell ThunderX3T110 Processors.
+def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, [ProcThunderX3T110]>;
// FIXME: HiSilicon TSV110 is currently modeled as a Cortex-A57.
def : ProcessorModel<"tsv110", CortexA57Model, [ProcTSV110]>;
diff --git a/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp b/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
index 6fa3a462bc71..1956014b738d 100644
--- a/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
+++ b/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
@@ -118,9 +118,15 @@ void AArch64BranchTargets::addBTI(MachineBasicBlock &MBB, bool CouldCall,
auto MBBI = MBB.begin();
- // PACI[AB]SP are implicitly BTI JC, so no BTI instruction needed there.
- if (MBBI != MBB.end() && (MBBI->getOpcode() == AArch64::PACIASP ||
- MBBI->getOpcode() == AArch64::PACIBSP))
+ // Skip the meta instuctions, those will be removed anyway.
+ for (; MBBI != MBB.end() && MBBI->isMetaInstruction(); ++MBBI)
+ ;
+
+ // SCTLR_EL1.BT[01] is set to 0 by default which means
+ // PACI[AB]SP are implicitly BTI C so no BTI C instruction is needed there.
+ if (MBBI != MBB.end() && HintNum == 34 &&
+ (MBBI->getOpcode() == AArch64::PACIASP ||
+ MBBI->getOpcode() == AArch64::PACIBSP))
return;
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA53.td b/llvm/lib/Target/AArch64/AArch64SchedA53.td
index a6df0f3f083c..c5ff1fcb274b 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA53.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA53.td
@@ -26,7 +26,8 @@ def CortexA53Model : SchedMachineModel {
// v 1.0 Spreadsheet
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ PAUnsupported.F);
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA57.td b/llvm/lib/Target/AArch64/AArch64SchedA57.td
index 9f566d1c7079..a760c4319005 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA57.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA57.td
@@ -31,7 +31,8 @@ def CortexA57Model : SchedMachineModel {
let LoopMicroOpBufferSize = 16;
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ PAUnsupported.F);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
index 798ecb7508c0..5ddfe9e0e34c 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
@@ -18,7 +18,8 @@ def CycloneModel : SchedMachineModel {
let MispredictPenalty = 16; // 14-19 cycles are typical.
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ PAUnsupported.F);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
index d1734c455b2b..3272640cb3f2 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
@@ -24,7 +24,8 @@ def ExynosM3Model : SchedMachineModel {
let MispredictPenalty = 16; // Minimum branch misprediction penalty.
let CompleteModel = 1; // Use the default model otherwise.
- list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ PAUnsupported.F);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
index d2284f9fa0b5..cac86491f56d 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -24,7 +24,8 @@ def ExynosM4Model : SchedMachineModel {
let MispredictPenalty = 16; // Minimum branch misprediction penalty.
let CompleteModel = 1; // Use the default model otherwise.
- list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ PAUnsupported.F);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
index df7402591e7b..86477b81b8bf 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
@@ -24,7 +24,8 @@ def ExynosM5Model : SchedMachineModel {
let MispredictPenalty = 15; // Minimum branch misprediction penalty.
let CompleteModel = 1; // Use the default model otherwise.
- list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ PAUnsupported.F);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
index 92d03963de57..a17ab36d7f9e 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
@@ -23,8 +23,8 @@ def FalkorModel : SchedMachineModel {
let MispredictPenalty = 11; // Minimum branch misprediction penalty.
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
-
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ PAUnsupported.F);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedKryo.td b/llvm/lib/Target/AArch64/AArch64SchedKryo.td
index 0e1a24103121..ba14bf1f50de 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedKryo.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedKryo.td
@@ -27,8 +27,8 @@ def KryoModel : SchedMachineModel {
let LoopMicroOpBufferSize = 16;
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
-
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ PAUnsupported.F);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
index 3b6aecf5c035..9c50f9708583 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
@@ -25,8 +25,8 @@ def ThunderXT8XModel : SchedMachineModel {
let PostRAScheduler = 1; // Use PostRA scheduler.
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
-
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ PAUnsupported.F);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index e2a293c06877..233613f7be3a 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -25,8 +25,8 @@ def ThunderX2T99Model : SchedMachineModel {
let PostRAScheduler = 1; // Using PostRA sched.
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
-
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ PAUnsupported.F);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
new file mode 100644
index 000000000000..00838cc4b9bd
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
@@ -0,0 +1,1997 @@
+//=- AArch64SchedThunderX3T110.td - Marvell ThunderX3 T110 ---*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for Marvell ThunderX3T110
+// family of processors.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pipeline Description.
+
+def ThunderX3T110Model : SchedMachineModel {
+ let IssueWidth = 4; // 4 micro-ops dispatched at a time.
+ let MicroOpBufferSize = 70; // 70 entries in micro-op re-order buffer.
+ let LoadLatency = 4; // Optimistic load latency.
+ let MispredictPenalty = 12; // Extra cycles for mispredicted branch.
+ // Determined via a mix of micro-arch details and experimentation.
+ let LoopMicroOpBufferSize = 128; // FIXME: might be much bigger in TX3.
+ let PostRAScheduler = 1; // Using PostRA sched.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ PAUnsupported.F);
+ // FIXME: Remove when all errors have been fixed.
+ let FullInstRWOverlapCheck = 0;
+}
+
+let SchedModel = ThunderX3T110Model in {
+
+// Issue ports.
+
+// Port 0: ALU.
+def THX3T110P0 : ProcResource<1>;
+
+// Port 1: ALU.
+def THX3T110P1 : ProcResource<1>;
+
+// Port 2: ALU/Branch.
+def THX3T110P2 : ProcResource<1>;
+
+// Port 3: ALU/Branch.
+def THX3T110P3 : ProcResource<1>;
+
+// Port 4: Load/Store.
+def THX3T110P4 : ProcResource<1>;
+
+// Port 5: Load/store.
+def THX3T110P5 : ProcResource<1>;
+
+// Port 6: FP/Neon/SIMD/Crypto.
+def THX3T110P6FP0 : ProcResource<1>;
+
+// Port 7: FP/Neon/SIMD/Crypto.
+def THX3T110P7FP1 : ProcResource<1>;
+
+// Port 8: FP/Neon/SIMD/Crypto.
+def THX3T110P8FP2 : ProcResource<1>;
+
+// Port 9: FP/Neon/SIMD/Crypto.
+def THX3T110P9FP3 : ProcResource<1>;
+
+// Port 10: Store Data Unit.
+def THX3T110SD0 : ProcResource<1>;
+
+// Define groups for the functional units on each issue port. Each group
+// created will be used by a WriteRes.
+
+// Integer divide/mulhi micro-ops only on port I1.
+def THX3T110I1 : ProcResGroup<[THX3T110P1]>;
+
+// Branch micro-ops on ports I2/I3.
+def THX3T110I23 : ProcResGroup<[THX3T110P2, THX3T110P3]>;
+
+// Branch micro-ops on ports I1/I2/I3.
+def THX3T110I123 : ProcResGroup<[THX3T110P1, THX3T110P2, THX3T110P3]>;
+
+// Integer micro-ops on ports I0/I1/I2.
+def THX3T110I012 : ProcResGroup<[THX3T110P0, THX3T110P1, THX3T110P2]>;
+
+// Integer micro-ops on ports I0/I1/I2/I3.
+def THX3T110I0123 : ProcResGroup<[THX3T110P0, THX3T110P1,
+ THX3T110P2, THX3T110P3]>;
+
+// FP micro-ops on ports FP0/FP1/FP2/FP3.
+def THX3T110FP0123 : ProcResGroup<[THX3T110P6FP0, THX3T110P7FP1,
+ THX3T110P8FP2, THX3T110P9FP3]>;
+
+// FP micro-ops on ports FP2/FP3.
+def THX3T110FP23 : ProcResGroup<[THX3T110P8FP2, THX3T110P9FP3]>;
+
+// ASIMD micro-ops on ports FP0/FP1/FP2/FP3.
+def THX3T110SIMD : ProcResGroup<[THX3T110P6FP0, THX3T110P7FP1,
+ THX3T110P8FP2, THX3T110P9FP3]>;
+
+// Store data micro-ops only on port 10.
+def THX3T110SD : ProcResGroup<[THX3T110SD0]>;
+
+// Load/store micro-ops on ports P4/P5.
+def THX3T110LS : ProcResGroup<[THX3T110P4, THX3T110P5]>;
+
+// 70 entry unified scheduler.
+def THX3T110ANY: ProcResGroup<[THX3T110P0, THX3T110P1, THX3T110P2,
+ THX3T110P3, THX3T110P4, THX3T110P5,
+ THX3T110P6FP0, THX3T110P7FP1,
+ THX3T110P8FP2, THX3T110P9FP3]> {
+ let BufferSize = 70;
+}
+
+// Define commonly used write types for InstRW specializations.
+// All definitions follow the format: THX3T110Write_<NumCycles>Cyc_<Resources>.
+
+// 3 cycles on I1.
+def THX3T110Write_3Cyc_I1 : SchedWriteRes<[THX3T110I1]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+// 4 cycles on I1.
+def THX3T110Write_4Cyc_I1 : SchedWriteRes<[THX3T110I1]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// 5 cycles on I1.
+def THX3T110Write_5Cyc_I1 : SchedWriteRes<[THX3T110I1]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+// 7 cycles on I1.
+def THX3T110Write_7Cyc_I1 : SchedWriteRes<[THX3T110I1]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+// 23 cycles on I1.
+def THX3T110Write_23Cyc_I1 : SchedWriteRes<[THX3T110I1]> {
+ let Latency = 23;
+ let ResourceCycles = [13, 23];
+ let NumMicroOps = 4;
+}
+
+// 39 cycles on I1.
+def THX3T110Write_39Cyc_I1 : SchedWriteRes<[THX3T110I1]> {
+ let Latency = 39;
+ let ResourceCycles = [13, 39];
+ let NumMicroOps = 4;
+}
+
+// 1 cycle on I2/I3
+def THX3T110Write_1Cyc_I23 : SchedWriteRes<[THX3T110I23]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// 8 cycles on I2/I3
+def THX3T110Write_8Cyc_I23 : SchedWriteRes<[THX3T110I23]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+// 1 cycle on I1/I2/I3
+def THX3T110Write_1Cyc_I123 : SchedWriteRes<[THX3T110I123]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// 8 cycles on I1/I2/I3
+def THX3T110Write_8Cyc_I123 : SchedWriteRes<[THX3T110I123]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+// 1 cycle on I0/I1/I2/I3.
+def THX3T110Write_1Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// 2 cycles on I0/I1/I2/I3.
+def THX3T110Write_2Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+// 3 cycles on I0/I1/I2/I3.
+def THX3T110Write_3Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+// 4 cycles on I0/I1/I2/I3.
+def THX3T110Write_4Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+// 5 cycles on I0/I1/I2/I3.
+def THX3T110Write_5Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+// 6 cycles on I0/I1/I2/I3.
+def THX3T110Write_6Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+// 8 cycles on I0/I1/I2/I3.
+def THX3T110Write_8Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+// 13 cycles on I0/I1/I2/I3.
+def THX3T110Write_13Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> {
+ let Latency = 13;
+ let NumMicroOps = 3;
+}
+
+// 23 cycles on I0/I1/I2/I3.
+def THX3T110Write_23Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> {
+ let Latency = 23;
+ let NumMicroOps = 3;
+}
+
+// 39 cycles on I0/I1/I2/I3.
+def THX3T110Write_39Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> {
+ let Latency = 39;
+ let NumMicroOps = 3;
+}
+
+// 4 cycles on F2/F3.
+def THX3T110Write_4Cyc_F23 : SchedWriteRes<[THX3T110FP23]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// 5 cycles on F0/F1/F2/F3.
+def THX3T110Write_5Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+// 6 cycles on F0/F1/F2/F3.
+def THX3T110Write_6Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+// 7 cycles on F0/F1/F2/F3.
+def THX3T110Write_7Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+// 8 cycles on F0/F1/F2/F3.
+def THX3T110Write_8Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+// 10 cycles on F0/F1/F2/F3.
+def THX3T110Write_10Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+
+// 16 cycles on F0/F1/F2/F3.
+def THX3T110Write_16Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 16;
+ let NumMicroOps = 3;
+ let ResourceCycles = [8];
+}
+
+// 23 cycles on F0/F1/F2/F3.
+def THX3T110Write_23Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 23;
+ let NumMicroOps = 3;
+ let ResourceCycles = [11];
+}
+
+// 1 cycle on LS0/LS1.
+def THX3T110Write_1Cyc_LS01 : SchedWriteRes<[THX3T110LS]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+}
+
+// 2 cycles on LS0/LS1.
+def THX3T110Write_2Cyc_LS01 : SchedWriteRes<[THX3T110LS]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+// 4 cycles on LS0/LS1.
+def THX3T110Write_4Cyc_LS01 : SchedWriteRes<[THX3T110LS]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+
+// 5 cycles on LS0/LS1.
+def THX3T110Write_5Cyc_LS01 : SchedWriteRes<[THX3T110LS]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+// 6 cycles on LS0/LS1.
+def THX3T110Write_6Cyc_LS01 : SchedWriteRes<[THX3T110LS]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+// 4 + 5 cycles on LS0/LS1.
+// First resource is available after 4 cycles.
+// Second resource is available after 5 cycles.
+// Load vector pair, immed offset, Q-form [LDP/LDNP].
+def THX3T110Write_4_5Cyc_LS01 : SchedWriteRes<[THX3T110LS]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [4, 5];
+}
+
+// 4 + 8 cycles on LS0/LS1.
+// First resource is available after 4 cycles.
+// Second resource is available after 8 cycles.
+// Load vector pair, immed offset, S/D-form [LDP/LDNP].
+def THX3T110Write_4_8Cyc_LS01 : SchedWriteRes<[THX3T110LS]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [4, 8];
+}
+
+// 11 cycles on LS0/LS1 and I1.
+def THX3T110Write_11Cyc_LS01_I1 :
+ SchedWriteRes<[THX3T110LS, THX3T110I1]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+}
+
+// 1 cycles on LS0/LS1 and I0/I1/I2/I3.
+def THX3T110Write_1Cyc_LS01_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110I0123]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// 1 cycles on LS0/LS1 and 2 of I0/I1/I2/I3.
+def THX3T110Write_1Cyc_LS01_I0123_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> {
+ let Latency = 1;
+ let NumMicroOps = 3;
+}
+
+// 4 cycles on LS0/LS1 and I0/I1/I2/I3.
+def THX3T110Write_4Cyc_LS01_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110I0123]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+// 4 cycles on LS0/LS1 and 2 of I0/I1/I2/I3.
+def THX3T110Write_4Cyc_LS01_I0123_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+// 5 cycles on LS0/LS1 and I0/I1/I2/I3.
+def THX3T110Write_5Cyc_LS01_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110I0123]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+// 5 cycles on LS0/LS1 and 2 of I0/I1/I2/I3.
+def THX3T110Write_5Cyc_LS01_I0123_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+// 6 cycles on LS0/LS1 and I0/I1/I2/I3.
+def THX3T110Write_6Cyc_LS01_I012 :
+ SchedWriteRes<[THX3T110LS, THX3T110I0123]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+// 6 cycles on LS0/LS1 and 2 of I0/I1/I2/I3.
+def THX3T110Write_6Cyc_LS01_I0123_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+// 1 cycle on LS0/LS1 and SD.
+def THX3T110Write_1Cyc_LS01_SD :
+ SchedWriteRes<[THX3T110LS, THX3T110SD]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// 2 cycles on LS0/LS1 and SD.
+def THX3T110Write_2Cyc_LS01_SD :
+ SchedWriteRes<[THX3T110LS, THX3T110SD]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+// 4 cycles on LS0/LS1 and SD.
+def THX3T110Write_4Cyc_LS01_SD :
+ SchedWriteRes<[THX3T110LS, THX3T110SD]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+// 5 cycles on LS0/LS1 and SD.
+def THX3T110Write_5Cyc_LS01_SD :
+ SchedWriteRes<[THX3T110LS, THX3T110SD]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+// 6 cycles on LS0/LS1 and SD.
+def THX3T110Write_6Cyc_LS01_SD :
+ SchedWriteRes<[THX3T110LS, THX3T110SD]> {
+ let Latency = 6;
+ let NumMicroOps = 5;
+}
+
+// 1 cycle on LS0/LS1, SD and I0/I1/I2/I3.
+def THX3T110Write_1Cyc_LS01_SD_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// 2 cycles on LS0/LS1, SD and I0/I1/I2/I3.
+def THX3T110Write_2Cyc_LS01_SD_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+// 4 cycles on LS0/LS1, SD and I0/I1/I2/I3.
+def THX3T110Write_4Cyc_LS01_SD_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+// 5 cycles on LS0/LS1, SD and I0/I1/I2/I3.
+def THX3T110Write_5Cyc_LS01_SD_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+// 6 cycles on LS0/LS1, SD and I0/I1/I2/I3.
+def THX3T110Write_6Cyc_LS01_SD_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> {
+ let Latency = 6;
+ let NumMicroOps = 5;
+}
+
+// 1 cycles on LS0/LS1 and F0/F1/F2/F3.
+def THX3T110Write_1Cyc_LS01_F0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110FP0123]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// 5 cycles on LS0/LS1 and F0/F1/F2/F3.
+def THX3T110Write_5Cyc_LS01_F0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110FP0123]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+// 6 cycles on LS0/LS1 and F0/F1/F2/F3.
+def THX3T110Write_6Cyc_LS01_F0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110FP0123]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+// 7 cycles on LS0/LS1 and F0/F1/F2/F3.
+def THX3T110Write_7Cyc_LS01_F0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110FP0123]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+// 8 cycles on LS0/LS1 and F0/F1/F2/F3.
+def THX3T110Write_8Cyc_LS01_F0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110FP0123]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+// 8 cycles on LS0/LS1 and I0/I1/I2/I3.
+def THX3T110Write_8Cyc_LS01_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110I0123]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+// 12 cycles on LS0/LS1 and I0/I1/I2/I3.
+def THX3T110Write_12Cyc_LS01_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110I0123]> {
+ let Latency = 12;
+ let NumMicroOps = 4;
+}
+
+// 16 cycles on LS0/LS1 and I0/I1/I2/I3.
+def THX3T110Write_16Cyc_LS01_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110I0123]> {
+ let Latency = 16;
+ let NumMicroOps = 5;
+}
+
+// 24 cycles on LS0/LS1 and I0/I1/I2/I3.
+def THX3T110Write_24Cyc_LS01_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110I0123]> {
+ let Latency = 24;
+ let NumMicroOps = 10;
+}
+
+// 32 cycles on LS0/LS1 and I0/I1/I2/I3.
+def THX3T110Write_32Cyc_LS01_I0123 :
+ SchedWriteRes<[THX3T110LS, THX3T110I0123]> {
+ let Latency = 32;
+ let NumMicroOps = 14;
+}
+
+// 3 cycles on F0/F1/F2/F3.
+def THX3T110Write_3Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+// 4 cycles on F0/F1/F2/F3.
+def THX3T110Write_4Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// 5 cycles on F0/F1/F2/F3.
+def THX3T110Write_5Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+// 10 cycles on F0/F1/F2/F3.
+def THX3T110Write_10Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 10;
+ let NumMicroOps = 4;
+}
+
+// 15 cycles on F0/F1/F2/F3.
+def THX3T110Write_15Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 15;
+ let NumMicroOps = 7;
+}
+
+// 16 cycles on F0/F1/F2/F3.
+def THX3T110Write_16Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 16;
+ let NumMicroOps = 3;
+}
+
+// 18 cycles on F0/F1/F2/F3.
+def THX3T110Write_18Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 18;
+ let NumMicroOps = 3;
+}
+
+// 19 cycles on F0/F1/F2/F3.
+def THX3T110Write_19Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 19;
+ let NumMicroOps = 4;
+}
+
+// 20 cycles on F0/F1/F2/F3.
+def THX3T110Write_20Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 20;
+ let NumMicroOps = 4;
+}
+
+// 23 cycles on F0/F1/F2/F3.
+def THX3T110Write_23Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 23;
+ let NumMicroOps = 4;
+}
+
+// 3 cycles on F2/F3 and 4 cycles on F0/F1/F2/F3.
+def THX3T110Write_3_4Cyc_F23_F0123 :
+ SchedWriteRes<[THX3T110FP23, THX3T110FP0123]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+ let ResourceCycles = [3, 4];
+}
+
+
+// Define commonly used read types.
+
+// No forwarding is provided for these types.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+//===----------------------------------------------------------------------===//
+// 3. Instruction Tables.
+
+//---
+// 3.1 Branch Instructions
+//---
+
+// Branch, immed
+// Branch and link, immed
+// Compare and branch
+def : WriteRes<WriteBr, [THX3T110I23]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Branch, register
+// Branch and link, register != LR
+// Branch and link, register = LR
+def : WriteRes<WriteBrReg, [THX3T110I23]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteAtomic, []> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+//---
+// Branch
+//---
+def : InstRW<[THX3T110Write_1Cyc_I23], (instrs B, BL, BR, BLR)>;
+def : InstRW<[THX3T110Write_1Cyc_I23], (instrs Bcc)>;
+def : InstRW<[THX3T110Write_1Cyc_I23], (instrs RET)>;
+def : InstRW<[THX3T110Write_1Cyc_I23],
+ (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>;
+
+//---
+// 3.2 Arithmetic and Logical Instructions
+// 3.3 Move and Shift Instructions
+//---
+
+
+// ALU, basic
+// Conditional compare
+// Conditional select
+// Address generation
+def : WriteRes<WriteI, [THX3T110I0123]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[WriteI],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC(W|X)r",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
+ "SBCS(W|X)r", "CCMN(W|X)(i|r)",
+ "CCMP(W|X)(i|r)", "CSEL(W|X)r",
+ "CSINC(W|X)r", "CSINV(W|X)r",
+ "CSNEG(W|X)r")>;
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// ALU, extend and/or shift
+def : WriteRes<WriteISReg, [THX3T110I0123]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[WriteISReg],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC(W|X)r",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
+ "SBCS(W|X)r", "CCMN(W|X)(i|r)",
+ "CCMP(W|X)(i|r)", "CSEL(W|X)r",
+ "CSINC(W|X)r", "CSINV(W|X)r",
+ "CSNEG(W|X)r")>;
+
+def : WriteRes<WriteIEReg, [THX3T110I0123]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[WriteIEReg],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC(W|X)r",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
+ "SBCS(W|X)r", "CCMN(W|X)(i|r)",
+ "CCMP(W|X)(i|r)", "CSEL(W|X)r",
+ "CSINC(W|X)r", "CSINV(W|X)r",
+ "CSNEG(W|X)r")>;
+
+// Move immed
+def : WriteRes<WriteImm, [THX3T110I0123]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[THX3T110Write_1Cyc_I0123],
+ (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
+
+def : InstRW<[THX3T110Write_1Cyc_I0123],
+ (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
+
+// Variable shift
+def : WriteRes<WriteIS, [THX3T110I0123]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+//---
+// 3.4 Divide and Multiply Instructions
+//---
+
+// Divide, W-form
+// Latency range of 13-23/13-39.
+def : WriteRes<WriteID32, [THX3T110I1]> {
+ let Latency = 39;
+ let ResourceCycles = [39];
+ let NumMicroOps = 4;
+}
+
+// Divide, X-form
+def : WriteRes<WriteID64, [THX3T110I1]> {
+ let Latency = 23;
+ let ResourceCycles = [23];
+ let NumMicroOps = 4;
+}
+
+// Multiply accumulate, W-form
+def : WriteRes<WriteIM32, [THX3T110I0123]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+// Multiply accumulate, X-form
+def : WriteRes<WriteIM64, [THX3T110I0123]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+//def : InstRW<[WriteIM32, ReadIM, ReadIM, ReadIMA, THX3T110Write_5Cyc_I012],
+// (instrs MADDWrrr, MSUBWrrr)>;
+def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
+def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
+def : InstRW<[THX3T110Write_5Cyc_I0123],
+ (instregex "(S|U)(MADDL|MSUBL)rrr")>;
+
+def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
+def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
+
+// Bitfield extract, two reg
+def : WriteRes<WriteExtr, [THX3T110I0123]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Multiply high
+def : InstRW<[THX3T110Write_4Cyc_I1], (instrs SMULHrr, UMULHrr)>;
+
+// Miscellaneous Data-Processing Instructions
+// Bitfield extract
+def : InstRW<[THX3T110Write_1Cyc_I0123], (instrs EXTRWrri, EXTRXrri)>;
+
+// Bitifield move - basic
+def : InstRW<[THX3T110Write_1Cyc_I0123],
+ (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
+
+// Bitfield move, insert
+def : InstRW<[THX3T110Write_1Cyc_I0123], (instregex "^BFM")>;
+def : InstRW<[THX3T110Write_1Cyc_I0123], (instregex "(S|U)?BFM.*")>;
+
+// Count leading
+def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123],
+ (instregex "^CLS(W|X)r$", "^CLZ(W|X)r$")>;
+
+// Reverse bits
+def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instrs RBITWr, RBITXr)>;
+
+// Cryptography Extensions
+def : InstRW<[THX3T110Write_4Cyc_F0123], (instregex "^AES[DE]")>;
+def : InstRW<[THX3T110Write_4Cyc_F0123], (instregex "^AESI?MC")>;
+def : InstRW<[THX3T110Write_4Cyc_F0123], (instregex "^PMULL")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA1SU0")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA1(H|SU1)")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA1[CMP]")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA256SU0")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA256(H|H2|SU1)")>;
+
+// CRC Instructions
+// def : InstRW<[THX3T110Write_4Cyc_I1], (instregex "^CRC32", "^CRC32C")>;
+def : InstRW<[THX3T110Write_4Cyc_I1],
+ (instrs CRC32Brr, CRC32Hrr, CRC32Wrr, CRC32Xrr)>;
+
+def : InstRW<[THX3T110Write_4Cyc_I1],
+ (instrs CRC32CBrr, CRC32CHrr, CRC32CWrr, CRC32CXrr)>;
+
+// Reverse bits/bytes
+// NOTE: Handled by WriteI.
+
+//---
+// 3.6 Load Instructions
+// 3.10 FP Load Instructions
+//---
+
+// Load register, literal
+// Load register, unscaled immed
+// Load register, immed unprivileged
+// Load register, unsigned immed
+def : WriteRes<WriteLD, [THX3T110LS]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+// Load register, immed post-index
+// NOTE: Handled by WriteLD, WriteI.
+// Load register, immed pre-index
+// NOTE: Handled by WriteLD, WriteAdr.
+def : WriteRes<WriteAdr, [THX3T110I0123]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Load pair, immed offset, normal
+// Load pair, immed offset, signed words, base != SP
+// Load pair, immed offset signed words, base = SP
+// LDP only breaks into *one* LS micro-op. Thus
+// the resources are handled by WriteLD.
+def : WriteRes<WriteLDHi, []> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+// Load register offset, basic
+// Load register, register offset, scale by 4/8
+// Load register, register offset, scale by 2
+// Load register offset, extend
+// Load register, register offset, extend, scale by 4/8
+// Load register, register offset, extend, scale by 2
+def THX3T110WriteLDIdx : SchedWriteVariant<[
+ SchedVar<ScaledIdxPred, [THX3T110Write_4Cyc_LS01_I0123_I0123]>,
+ SchedVar<NoSchedPred, [THX3T110Write_4Cyc_LS01_I0123]>]>;
+def : SchedAlias<WriteLDIdx, THX3T110WriteLDIdx>;
+
+def THX3T110ReadAdrBase : SchedReadVariant<[
+ SchedVar<ScaledIdxPred, [ReadDefault]>,
+ SchedVar<NoSchedPred, [ReadDefault]>]>;
+def : SchedAlias<ReadAdrBase, THX3T110ReadAdrBase>;
+
+// Load pair, immed pre-index, normal
+// Load pair, immed pre-index, signed words
+// Load pair, immed post-index, normal
+// Load pair, immed post-index, signed words
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPDi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPQi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPSi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPWi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPXi)>;
+
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPDi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPQi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPSi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPSWi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPWi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPXi)>;
+
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRBui)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRDui)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRHui)>;
+def : InstRW<[THX3T110Write_5Cyc_LS01], (instrs LDRQui)>;
+def : InstRW<[THX3T110Write_5Cyc_LS01], (instrs LDRSui)>;
+
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRDl)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRQl)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRWl)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRXl)>;
+
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRBi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRHi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRWi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRXi)>;
+
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSBWi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSBXi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSHWi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSHXi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSWi)>;
+
+def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr],
+ (instrs LDPDpre)>;
+def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr],
+ (instrs LDPQpre)>;
+def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr],
+ (instrs LDPSpre)>;
+def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+
+def : InstRW<[THX3T110Write_4Cyc_LS01, WriteAdr],
+ (instrs LDRBpre, LDRDpre, LDRHpre, LDRQpre,
+ LDRSpre, LDRWpre, LDRXpre,
+ LDRSBWpre, LDRSBXpre, LDRSBWpost, LDRSBXpost,
+ LDRSHWpre, LDRSHXpre, LDRSHWpost, LDRSHXpost,
+ LDRBBpre, LDRBBpost, LDRHHpre, LDRHHpost)>;
+
+def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr],
+ (instrs LDPDpost, LDPQpost, LDPSpost, LDPWpost, LDPXpost)>;
+
+def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteI],
+ (instrs LDRBpost, LDRDpost, LDRHpost,
+ LDRQpost, LDRSpost, LDRWpost, LDRXpost)>;
+
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteLDHi, WriteAdr],
+ (instrs LDPDpre, LDPQpre, LDPSpre, LDPWpre, LDPXpre)>;
+
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteAdr],
+ (instrs LDRBpre, LDRDpre, LDRHpre, LDRQpre,
+ LDRSpre, LDRWpre, LDRXpre)>;
+
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteLDHi, WriteAdr],
+ (instrs LDPDpost, LDPQpost, LDPSpost, LDPWpost, LDPXpost)>;
+
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteI],
+ (instrs LDRBpost, LDRDpost, LDRHpost, LDRQpost,
+ LDRSpost, LDRWpost, LDRXpost)>;
+
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRBroW)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRDroW)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHroW)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHHroW)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRQroW)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSroW)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHWroW)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHXroW)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRWroW)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRXroW)>;
+
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRBroX)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRDroX)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHHroX)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHroX)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRQroX)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSroX)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHWroX)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHXroX)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRWroX)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRXroX)>;
+
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURBi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURBBi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURDi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURHi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURHHi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURQi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURXi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSBWi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSBXi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSHWi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSHXi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSWi)>;
+
+// Load exclusive
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDAR(B|H|W|X)$")>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDAXR(B|H|W|X)$")>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDXR(B|H|W|X)$")>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDAXP(W|X)$")>;
+def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDXP(W|X)$")>;
+
+//---
+// Prefetch
+//---
+def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMl)>;
+def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFUMi)>;
+def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMui)>;
+def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMroW)>;
+def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMroX)>;
+
+//--
+// 3.7 Store Instructions
+// 3.11 FP Store Instructions
+//--
+
+// Store register, unscaled immed
+// Store register, immed unprivileged
+// Store register, unsigned immed
+def : WriteRes<WriteST, [THX3T110LS, THX3T110SD]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Store register, immed post-index
+// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase
+
+// Store register, immed pre-index
+// NOTE: Handled by WriteAdr, WriteST
+
+// Store register, register offset, basic
+// Store register, register offset, scaled by 4/8
+// Store register, register offset, scaled by 2
+// Store register, register offset, extend
+// Store register, register offset, extend, scale by 4/8
+// Store register, register offset, extend, scale by 1
+def : WriteRes<WriteSTIdx, [THX3T110LS, THX3T110SD, THX3T110I0123]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Store pair, immed offset, W-form
+// Store pair, immed offset, X-form
+def : WriteRes<WriteSTP, [THX3T110LS, THX3T110SD]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Store pair, immed post-index, W-form
+// Store pair, immed post-index, X-form
+// Store pair, immed pre-index, W-form
+// Store pair, immed pre-index, X-form
+// NOTE: Handled by WriteAdr, WriteSTP.
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURBi)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURBBi)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURDi)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURHi)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURHHi)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURQi)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURSi)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURWi)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURXi)>;
+
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRBi)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRHi)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRWi)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRXi)>;
+
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPDi)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPQi)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPXi)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPWi)>;
+
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPDi)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPQi)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPXi)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPWi)>;
+
+def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRBui)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRDui)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRHui)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRQui)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRXui)>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRWui)>;
+
+def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRBui)>;
+def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRDui)>;
+def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRHui)>;
+def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRQui)>;
+def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRXui)>;
+def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRWui)>;
+
+def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRBui)>;
+def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRDui)>;
+def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRHui)>;
+def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRQui)>;
+def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRXui)>;
+def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRWui)>;
+
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRBBroW, STRBBroX)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase],
+ (instrs STRXroW, STRXroX)>;
+
+// Store exclusive
+def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instrs STNPWi, STNPXi)>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STLR(B|H|W|X)$")>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STXP(W|X)$")>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STXR(B|H|W|X)$")>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STLXP(W|X)$")>;
+def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STLXR(B|H|W|X)$")>;
+
+//---
+// 3.8 FP Data Processing Instructions
+//---
+
+// FP absolute value
+// FP min/max
+// FP negate
+def : WriteRes<WriteF, [THX3T110FP0123]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+// FP arithmetic
+def : InstRW<[THX3T110Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>;
+
+// FP compare
+def : WriteRes<WriteFCmp, [THX3T110FP0123]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+// FP Mul, Div, Sqrt
+def : WriteRes<WriteFDiv, [THX3T110FP0123]> {
+ let Latency = 22;
+ let ResourceCycles = [19];
+}
+
+def THX3T110XWriteFDiv : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 16;
+ let ResourceCycles = [8];
+ let NumMicroOps = 4;
+}
+
+def THX3T110XWriteFDivSP : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 16;
+ let ResourceCycles = [8];
+ let NumMicroOps = 4;
+}
+
+def THX3T110XWriteFDivDP : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 23;
+ let ResourceCycles = [12];
+ let NumMicroOps = 4;
+}
+
+def THX3T110XWriteFSqrtSP : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 16;
+ let ResourceCycles = [8];
+ let NumMicroOps = 4;
+}
+
+def THX3T110XWriteFSqrtDP : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 23;
+ let ResourceCycles = [12];
+ let NumMicroOps = 4;
+}
+
+// FP divide, S-form
+// FP square root, S-form
+def : InstRW<[THX3T110XWriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[THX3T110XWriteFSqrtSP], (instrs FSQRTSr)>;
+def : InstRW<[THX3T110XWriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[THX3T110XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[THX3T110Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSr")>;
+
+// FP divide, D-form
+// FP square root, D-form
+def : InstRW<[THX3T110XWriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[THX3T110XWriteFSqrtDP], (instrs FSQRTDr)>;
+def : InstRW<[THX3T110XWriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[THX3T110XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+def : InstRW<[THX3T110Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDr")>;
+
+// FP multiply
+// FP multiply accumulate
+def : WriteRes<WriteFMul, [THX3T110FP0123]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def THX3T110XWriteFMul : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def THX3T110XWriteFMulAcc : SchedWriteRes<[THX3T110FP0123]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def : InstRW<[THX3T110XWriteFMul], (instregex "^FMUL", "^FNMUL")>;
+def : InstRW<[THX3T110XWriteFMulAcc],
+ (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>;
+
+// FP round to integral
+def : InstRW<[THX3T110Write_7Cyc_F01],
+ (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
+
+// FP select
+def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^FCSEL")>;
+
+//---
+// 3.9 FP Miscellaneous Instructions
+//---
+
+// FP convert, from vec to vec reg
+// FP convert, from gen to vec reg
+// FP convert, from vec to gen reg
+def : WriteRes<WriteFCvt, [THX3T110FP0123]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+// FP move, immed
+// FP move, register
+def : WriteRes<WriteFImm, [THX3T110FP0123]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// FP transfer, from gen to vec reg
+// FP transfer, from vec to gen reg
+def : WriteRes<WriteFCopy, [THX3T110FP0123]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[THX3T110Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
+
+//---
+// 3.12 ASIMD Integer Instructions
+//---
+
+// ASIMD absolute diff, D-form
+// ASIMD absolute diff, Q-form
+// ASIMD absolute diff accum, D-form
+// ASIMD absolute diff accum, Q-form
+// ASIMD absolute diff accum long
+// ASIMD absolute diff long
+// ASIMD arith, basic
+// ASIMD arith, complex
+// ASIMD compare
+// ASIMD logical (AND, BIC, EOR)
+// ASIMD max/min, basic
+// ASIMD max/min, reduce, 4H/4S
+// ASIMD max/min, reduce, 8B/8H
+// ASIMD max/min, reduce, 16B
+// ASIMD multiply, D-form
+// ASIMD multiply, Q-form
+// ASIMD multiply accumulate long
+// ASIMD multiply accumulate saturating long
+// ASIMD multiply long
+// ASIMD pairwise add and accumulate
+// ASIMD shift accumulate
+// ASIMD shift by immed, basic
+// ASIMD shift by immed and insert, basic, D-form
+// ASIMD shift by immed and insert, basic, Q-form
+// ASIMD shift by immed, complex
+// ASIMD shift by register, basic, D-form
+// ASIMD shift by register, basic, Q-form
+// ASIMD shift by register, complex, D-form
+// ASIMD shift by register, complex, Q-form
+def : WriteRes<WriteV, [THX3T110FP0123]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+ let ResourceCycles = [4];
+}
+
+// ASIMD arith, reduce, 4H/4S
+// ASIMD arith, reduce, 8B/8H
+// ASIMD arith, reduce, 16B
+
+// ASIMD logical (MVN (alias for NOT), ORN, ORR)
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
+
+// ASIMD arith, reduce
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
+
+// ASIMD polynomial (8x8) multiply long
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^(S|U|SQD)MULL")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^PMULL(v8i8|v16i8)")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^PMULL(v1i64|v2i64)")>;
+
+// ASIMD absolute diff accum, D-form
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+// ASIMD absolute diff accum, Q-form
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+// ASIMD absolute diff accum long
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^[SU]ABAL")>;
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
+// ASIMD arith, reduce, 8B
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
+// ASIMD arith, reduce, 16B/16H
+def : InstRW<[THX3T110Write_10Cyc_F0123],
+ (instregex "^[SU]?ADDL?Vv16i8v$")>;
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
+// ASIMD max/min, reduce, 16B/16H
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
+// ASIMD multiply, D-form
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^(P?MUL|SQR?DMULH)" #
+ "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
+ "(_indexed)?$")>;
+// ASIMD multiply, Q-form
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, D-form
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, Q-form
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+// ASIMD shift accumulate
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>;
+
+// ASIMD shift by immed, basic
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "RSHRNv","SHRNv", "SQRSHRNv","SQRSHRUNv",
+ "SQSHRNv","SQSHRUNv", "UQRSHRNv",
+ "UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
+// ASIMD shift by immed, complex
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^[SU]?(Q|R){1,2}SHR")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SQSHLU")>;
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[THX3T110Write_5Cyc_F01],
+ (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+// ASIMD shift by register, complex, D-form
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^[SU][QR]{1,2}SHL" #
+ "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD Arithmetic
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "(ADD|SUB)HNv.*")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "(RADD|RSUB)HNv.*")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
+ "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
+ "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SADALP","^UADALP")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SADDLPv","^UADDLPv")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SADDLV","^UADDLV")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^ADDVv","^SMAXVv","^UMAXVv","^SMINVv","^UMINVv")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^SABAv","^UABAv","^SABALv","^UABALv")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^SQADDv","^SQSUBv","^UQADDv","^UQSUBv")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SUQADDv","^USQADDv")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^ADDHNv","^RADDHNv", "^RSUBHNv",
+ "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
+ "^SRHADD", "^SUBHNv", "^SUQADD",
+ "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^CMEQv","^CMGEv","^CMGTv",
+ "^CMLEv","^CMLTv", "^CMHIv","^CMHSv")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^SMAXv","^SMINv","^UMAXv","^UMINv",
+ "^SMAXPv","^SMINPv","^UMAXPv","^UMINPv")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^SABDv","^UABDv", "^SABDLv","^UABDLv")>;
+
+//---
+// 3.13 ASIMD Floating-point Instructions
+//---
+
+// ASIMD FP absolute value
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FABSv")>;
+
+// ASIMD FP arith, normal, D-form
+// ASIMD FP arith, normal, Q-form
+def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123],
+ (instregex "^FABDv", "^FADDv", "^FSUBv")>;
+
+// ASIMD FP arith,pairwise, D-form
+// ASIMD FP arith, pairwise, Q-form
+def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^FADDPv")>;
+
+// ASIMD FP compare, D-form
+// ASIMD FP compare, Q-form
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FACGEv", "^FACGTv")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FCMEQv", "^FCMGEv",
+ "^FCMGTv", "^FCMLEv",
+ "^FCMLTv")>;
+
+// ASIMD FP round, D-form
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^FRINT[AIMNPXZ](v2f32)")>;
+// ASIMD FP round, Q-form
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
+
+// ASIMD FP convert, long
+// ASIMD FP convert, narrow
+// ASIMD FP convert, other, D-form
+// ASIMD FP convert, other, Q-form
+// NOTE: Handled by WriteV.
+
+// ASIMD FP convert, long and narrow
+def : InstRW<[THX3T110Write_5Cyc_F01], (instregex "^FCVT(L|N|XN)v")>;
+// ASIMD FP convert, other, D-form
+def : InstRW<[THX3T110Write_5Cyc_F01],
+ (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
+// ASIMD FP convert, other, Q-form
+def : InstRW<[THX3T110Write_5Cyc_F01],
+ (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[THX3T110Write_16Cyc_F0123], (instrs FDIVv2f32)>;
+def : InstRW<[THX3T110Write_16Cyc_F0123], (instregex "FDIVv2f32")>;
+
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[THX3T110Write_16Cyc_F0123], (instrs FDIVv4f32)>;
+def : InstRW<[THX3T110Write_16Cyc_F0123], (instregex "FDIVv4f32")>;
+
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[THX3T110Write_23Cyc_F0123], (instrs FDIVv2f64)>;
+def : InstRW<[THX3T110Write_23Cyc_F0123], (instregex "FDIVv2f64")>;
+
+// ASIMD FP max/min, normal, D-form
+// ASIMD FP max/min, normal, Q-form
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FMAXv", "^FMAXNMv",
+ "^FMINv", "^FMINNMv")>;
+
+// ASIMD FP max/min, pairwise, D-form
+// ASIMD FP max/min, pairwise, Q-form
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FMAXPv", "^FMAXNMPv",
+ "^FMINPv", "^FMINNMPv")>;
+
+// ASIMD FP max/min, reduce
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FMAXVv", "^FMAXNMVv",
+ "^FMINVv", "^FMINNMVv")>;
+
+// ASIMD FP multiply, D-form, FZ
+// ASIMD FP multiply, D-form, no FZ
+// ASIMD FP multiply, Q-form, FZ
+// ASIMD FP multiply, Q-form, no FZ
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^FMULv", "^FMULXv")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP multiply accumulate, Dform, FZ
+// ASIMD FP multiply accumulate, Dform, no FZ
+// ASIMD FP multiply accumulate, Qform, FZ
+// ASIMD FP multiply accumulate, Qform, no FZ
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^FMLAv", "^FMLSv")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP negate
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FNEGv")>;
+
+//--
+// 3.14 ASIMD Miscellaneous Instructions
+//--
+
+// ASIMD bit reverse
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^RBITv")>;
+
+// ASIMD bitwise insert, D-form
+// ASIMD bitwise insert, Q-form
+def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123],
+ (instregex "^BIFv", "^BITv", "^BSLv")>;
+
+// ASIMD count, D-form
+// ASIMD count, Q-form
+def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123],
+ (instregex "^CLSv", "^CLZv", "^CNTv")>;
+
+// ASIMD duplicate, gen reg
+// ASIMD duplicate, element
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUPv")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^CPY")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUPv.+gpr")>;
+
+// ASIMD extract
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^EXTv")>;
+
+// ASIMD extract narrow
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^XTNv")>;
+
+// ASIMD extract narrow, saturating
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
+
+// ASIMD insert, element to element
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^INSv")>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^[SU]MOVv")>;
+
+// ASIMD move, integer immed
+def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^MOVIv")>;
+
+// ASIMD move, FP immed
+def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^FMOVv")>;
+
+// ASIMD transpose
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^TRN1", "^TRN2")>;
+
+// ASIMD unzip/zip
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
+
+// ASIMD reciprocal estimate, D-form
+// ASIMD reciprocal estimate, Q-form
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
+ "^FRSQRTEv", "^URSQRTEv")>;
+
+// ASIMD reciprocal step, D-form, FZ
+// ASIMD reciprocal step, D-form, no FZ
+// ASIMD reciprocal step, Q-form, FZ
+// ASIMD reciprocal step, Q-form, no FZ
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^FRECPSv", "^FRSQRTSv")>;
+
+// ASIMD reverse
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^REV16v", "^REV32v", "^REV64v")>;
+
+// ASIMD table lookup, D-form
+// ASIMD table lookup, Q-form
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>;
+def : InstRW<[THX3T110Write_10Cyc_F0123],
+ (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>;
+def : InstRW<[THX3T110Write_15Cyc_F0123],
+ (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>;
+def : InstRW<[THX3T110Write_20Cyc_F0123],
+ (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>;
+
+// ASIMD transfer, element to word or word
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^[SU]MOVv")>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "(S|U)MOVv.*")>;
+
+// ASIMD transfer gen reg to element
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^INSv")>;
+
+// ASIMD transpose
+def : InstRW<[THX3T110Write_5Cyc_F0123],
+ (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>;
+
+// ASIMD unzip/zip
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^ZIP1v", "^ZIP2v")>;
+
+//--
+// 3.15 ASIMD Load Instructions
+//--
+
+// ASIMD load, 1 element, multiple, 1 reg, D-form
+// ASIMD load, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[THX3T110Write_4Cyc_LS01],
+ (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_4Cyc_LS01, WriteAdr],
+ (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg, D-form
+// ASIMD load, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[THX3T110Write_4Cyc_LS01],
+ (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_4Cyc_LS01, WriteAdr],
+ (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg, D-form
+// ASIMD load, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[THX3T110Write_5Cyc_LS01],
+ (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_5Cyc_LS01, WriteAdr],
+ (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, D-form
+// ASIMD load, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[THX3T110Write_6Cyc_LS01],
+ (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_6Cyc_LS01, WriteAdr],
+ (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, one lane, B/H/S
+// ASIMD load, 1 element, one lane, D
+def : InstRW<[THX3T110Write_5Cyc_LS01_F0123],
+ (instregex "^LD1i(8|16|32|64)$")>;
+def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr],
+ (instregex "^LD1i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 1 element, all lanes, D-form, B/H/S
+// ASIMD load, 1 element, all lanes, D-form, D
+// ASIMD load, 1 element, all lanes, Q-form
+def : InstRW<[THX3T110Write_5Cyc_LS01_F0123],
+ (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr],
+ (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, multiple, D-form, B/H/S
+// ASIMD load, 2 element, multiple, Q-form, D
+def : InstRW<[THX3T110Write_5Cyc_LS01_F0123],
+ (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr],
+ (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, one lane, B/H
+// ASIMD load, 2 element, one lane, S
+// ASIMD load, 2 element, one lane, D
+def : InstRW<[THX3T110Write_5Cyc_LS01_F0123],
+ (instregex "^LD2i(8|16|32|64)$")>;
+def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr],
+ (instregex "^LD2i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 2 element, all lanes, D-form, B/H/S
+// ASIMD load, 2 element, all lanes, D-form, D
+// ASIMD load, 2 element, all lanes, Q-form
+def : InstRW<[THX3T110Write_5Cyc_LS01_F0123],
+ (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr],
+ (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, multiple, D-form, B/H/S
+// ASIMD load, 3 element, multiple, Q-form, B/H/S
+// ASIMD load, 3 element, multiple, Q-form, D
+def : InstRW<[THX3T110Write_8Cyc_LS01_F0123],
+ (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_8Cyc_LS01_F0123, WriteAdr],
+ (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, one lone, B/H
+// ASIMD load, 3 element, one lane, S
+// ASIMD load, 3 element, one lane, D
+def : InstRW<[THX3T110Write_7Cyc_LS01_F0123],
+ (instregex "^LD3i(8|16|32|64)$")>;
+def : InstRW<[THX3T110Write_7Cyc_LS01_F0123, WriteAdr],
+ (instregex "^LD3i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 3 element, all lanes, D-form, B/H/S
+// ASIMD load, 3 element, all lanes, D-form, D
+// ASIMD load, 3 element, all lanes, Q-form, B/H/S
+// ASIMD load, 3 element, all lanes, Q-form, D
+def : InstRW<[THX3T110Write_7Cyc_LS01_F0123],
+ (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_7Cyc_LS01_F0123, WriteAdr],
+ (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, multiple, D-form, B/H/S
+// ASIMD load, 4 element, multiple, Q-form, B/H/S
+// ASIMD load, 4 element, multiple, Q-form, D
+def : InstRW<[THX3T110Write_8Cyc_LS01_F0123],
+ (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_8Cyc_LS01_F0123, WriteAdr],
+ (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, one lane, B/H
+// ASIMD load, 4 element, one lane, S
+// ASIMD load, 4 element, one lane, D
+def : InstRW<[THX3T110Write_6Cyc_LS01_F0123],
+ (instregex "^LD4i(8|16|32|64)$")>;
+def : InstRW<[THX3T110Write_6Cyc_LS01_F0123, WriteAdr],
+ (instregex "^LD4i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 4 element, all lanes, D-form, B/H/S
+// ASIMD load, 4 element, all lanes, D-form, D
+// ASIMD load, 4 element, all lanes, Q-form, B/H/S
+// ASIMD load, 4 element, all lanes, Q-form, D
+def : InstRW<[THX3T110Write_6Cyc_LS01_F0123],
+ (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_6Cyc_LS01_F0123, WriteAdr],
+ (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+//--
+// 3.16 ASIMD Store Instructions
+//--
+
+// ASIMD store, 1 element, multiple, 1 reg, D-form
+// ASIMD store, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[THX3T110Write_1Cyc_LS01],
+ (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr],
+ (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, D-form
+// ASIMD store, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[THX3T110Write_1Cyc_LS01],
+ (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr],
+ (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, D-form
+// ASIMD store, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[THX3T110Write_1Cyc_LS01],
+ (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr],
+ (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, D-form
+// ASIMD store, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[THX3T110Write_1Cyc_LS01],
+ (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr],
+ (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, one lane, B/H/S
+// ASIMD store, 1 element, one lane, D
+def : InstRW<[THX3T110Write_1Cyc_LS01_F0123],
+ (instregex "^ST1i(8|16|32|64)$")>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr],
+ (instregex "^ST1i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 2 element, multiple, D-form, B/H/S
+// ASIMD store, 2 element, multiple, Q-form, B/H/S
+// ASIMD store, 2 element, multiple, Q-form, D
+def : InstRW<[THX3T110Write_1Cyc_LS01_F0123],
+ (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr],
+ (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 2 element, one lane, B/H/S
+// ASIMD store, 2 element, one lane, D
+def : InstRW<[THX3T110Write_1Cyc_LS01_F0123],
+ (instregex "^ST2i(8|16|32|64)$")>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr],
+ (instregex "^ST2i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 3 element, multiple, D-form, B/H/S
+// ASIMD store, 3 element, multiple, Q-form, B/H/S
+// ASIMD store, 3 element, multiple, Q-form, D
+def : InstRW<[THX3T110Write_1Cyc_LS01_F0123],
+ (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr],
+ (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 3 element, one lane, B/H
+// ASIMD store, 3 element, one lane, S
+// ASIMD store, 3 element, one lane, D
+def : InstRW<[THX3T110Write_1Cyc_LS01_F0123],
+ (instregex "^ST3i(8|16|32|64)$")>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr],
+ (instregex "^ST3i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 4 element, multiple, D-form, B/H/S
+// ASIMD store, 4 element, multiple, Q-form, B/H/S
+// ASIMD store, 4 element, multiple, Q-form, D
+def : InstRW<[THX3T110Write_1Cyc_LS01_F0123],
+ (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr],
+ (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 4 element, one lane, B/H
+// ASIMD store, 4 element, one lane, S
+// ASIMD store, 4 element, one lane, D
+def : InstRW<[THX3T110Write_1Cyc_LS01_F0123],
+ (instregex "^ST4i(8|16|32|64)$")>;
+def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr],
+ (instregex "^ST4i(8|16|32|64)_POST$")>;
+
+// V8.1a Atomics (LSE)
+def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic],
+ (instrs CASB, CASH, CASW, CASX)>;
+
+def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic],
+ (instrs CASAB, CASAH, CASAW, CASAX)>;
+
+def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic],
+ (instrs CASLB, CASLH, CASLW, CASLX)>;
+
+def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic],
+ (instrs CASALB, CASALH, CASALW, CASALX)>;
+
+def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic],
+ (instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
+
+def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic],
+ (instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
+
+def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic],
+ (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
+
+def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic],
+ (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
+
+def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic],
+ (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
+
+def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic],
+ (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
+
+def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic],
+ (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
+
+def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic],
+ (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
+
+def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic],
+ (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
+
+def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic],
+ (instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
+
+def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic],
+ (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
+
+def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic],
+ (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
+
+def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic],
+ (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
+
+def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic],
+ (instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
+
+def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic],
+ (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
+
+def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic],
+ (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
+
+def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic],
+ (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
+
+def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic],
+ (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
+ LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
+ LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
+ LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
+
+def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic],
+ (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
+ LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
+ LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
+ LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
+
+def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic],
+ (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
+ LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
+ LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
+ LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
+
+def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic],
+ (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
+ LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
+ LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
+ LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
+
+def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic],
+ (instrs SWPB, SWPH, SWPW, SWPX)>;
+
+def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic],
+ (instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
+
+def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic],
+ (instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
+
+def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic],
+ (instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
+
+def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic],
+ (instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
+
+// V8.3a PAC
+def : InstRW<[THX3T110Write_11Cyc_LS01_I1], (instregex "^LDRAA", "^LDRAB")>;
+def : InstRW<[THX3T110Write_8Cyc_I123],
+ (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ,
+ BRAA, BRAAZ, BRAB, BRABZ)>;
+def : InstRW<[THX3T110Write_8Cyc_I123], (instrs RETAA, RETAB)>;
+
+} // SchedModel = ThunderX3T110Model
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 3636d8d2b628..079e8f1764dc 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -160,6 +160,17 @@ void AArch64Subtarget::initializeProperties() {
PrefFunctionLogAlignment = 4;
PrefLoopLogAlignment = 2;
break;
+ case ThunderX3T110:
+ CacheLineSize = 64;
+ PrefFunctionLogAlignment = 4;
+ PrefLoopLogAlignment = 2;
+ MaxInterleaveFactor = 4;
+ PrefetchDistance = 128;
+ MinPrefetchStride = 1024;
+ MaxPrefetchIterationsAhead = 4;
+ // FIXME: remove this to enable 64-bit SLP if performance looks good.
+ MinVectorRegisterBitWidth = 128;
+ break;
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 79c2c161d3cb..40eb67a153e4 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -63,7 +63,8 @@ public:
ThunderXT81,
ThunderXT83,
ThunderXT88,
- TSV110
+ TSV110,
+ ThunderX3T110
};
protected:
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 39d16e7999cd..1ac291fcb887 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -404,7 +404,7 @@ void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI,
static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
char Mode, raw_ostream &O) {
Register Reg = MO.getReg();
- bool EmitPercent = true;
+ bool EmitPercent = MO.getParent()->getInlineAsmDialect() == InlineAsm::AD_ATT;
if (!X86::GR8RegClass.contains(Reg) &&
!X86::GR16RegClass.contains(Reg) &&
@@ -443,6 +443,42 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
return false;
}
+static bool printAsmVRegister(X86AsmPrinter &P, const MachineOperand &MO,
+ char Mode, raw_ostream &O) {
+ unsigned Reg = MO.getReg();
+ bool EmitPercent = MO.getParent()->getInlineAsmDialect() == InlineAsm::AD_ATT;
+
+ unsigned Index;
+ if (X86::VR128XRegClass.contains(Reg))
+ Index = Reg - X86::XMM0;
+ else if (X86::VR256XRegClass.contains(Reg))
+ Index = Reg - X86::YMM0;
+ else if (X86::VR512RegClass.contains(Reg))
+ Index = Reg - X86::ZMM0;
+ else
+ return true;
+
+ switch (Mode) {
+ default: // Unknown mode.
+ return true;
+ case 'x': // Print V4SFmode register
+ Reg = X86::XMM0 + Index;
+ break;
+ case 't': // Print V8SFmode register
+ Reg = X86::YMM0 + Index;
+ break;
+ case 'g': // Print V16SFmode register
+ Reg = X86::ZMM0 + Index;
+ break;
+ }
+
+ if (EmitPercent)
+ O << '%';
+
+ O << X86ATTInstPrinter::getRegisterName(Reg);
+ return false;
+}
+
/// PrintAsmOperand - Print out an operand for an inline asm expression.
///
bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -517,6 +553,14 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
PrintOperand(MI, OpNo, O);
return false;
+ case 'x': // Print V4SFmode register
+ case 't': // Print V8SFmode register
+ case 'g': // Print V16SFmode register
+ if (MO.isReg())
+ return printAsmVRegister(*this, MO, ExtraCode[0], O);
+ PrintOperand(MI, OpNo, O);
+ return false;
+
case 'P': // This is the operand of a call, treat specially.
PrintPCRelImm(MI, OpNo, O);
return false;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 60eefbc677da..1523d56cc4e7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23319,7 +23319,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
for (unsigned i = 0; i != NumElts; ++i) {
SDValue CurrentOp = SrcOp->getOperand(i);
if (CurrentOp->isUndef()) {
- Elts.push_back(CurrentOp);
+ // Must produce 0s in the correct bits.
+ Elts.push_back(DAG.getConstant(0, dl, ElementType));
continue;
}
auto *ND = cast<ConstantSDNode>(CurrentOp);
@@ -23331,7 +23332,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
for (unsigned i = 0; i != NumElts; ++i) {
SDValue CurrentOp = SrcOp->getOperand(i);
if (CurrentOp->isUndef()) {
- Elts.push_back(CurrentOp);
+ // Must produce 0s in the correct bits.
+ Elts.push_back(DAG.getConstant(0, dl, ElementType));
continue;
}
auto *ND = cast<ConstantSDNode>(CurrentOp);
@@ -23343,7 +23345,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
for (unsigned i = 0; i != NumElts; ++i) {
SDValue CurrentOp = SrcOp->getOperand(i);
if (CurrentOp->isUndef()) {
- Elts.push_back(CurrentOp);
+ // All shifted in bits must be the same so use 0.
+ Elts.push_back(DAG.getConstant(0, dl, ElementType));
continue;
}
auto *ND = cast<ConstantSDNode>(CurrentOp);
@@ -39699,14 +39702,22 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) {
assert(EltBits.size() == VT.getVectorNumElements() &&
"Unexpected shift value type");
- for (APInt &Elt : EltBits) {
- if (X86ISD::VSHLI == Opcode)
+ // Undef elements need to fold to 0. It's possible SimplifyDemandedBits
+ // created an undef input due to no input bits being demanded, but user
+ // still expects 0 in other bits.
+ for (unsigned i = 0, e = EltBits.size(); i != e; ++i) {
+ APInt &Elt = EltBits[i];
+ if (UndefElts[i])
+ Elt = 0;
+ else if (X86ISD::VSHLI == Opcode)
Elt <<= ShiftVal;
else if (X86ISD::VSRAI == Opcode)
Elt.ashrInPlace(ShiftVal);
else
Elt.lshrInPlace(ShiftVal);
}
+ // Reset undef elements since they were zeroed above.
+ UndefElts = 0;
return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 245346d82731..90484241c28c 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3956,6 +3956,8 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm);
MIB->setDesc(TII.get(X86::POP32r));
}
+ MIB->RemoveOperand(1);
+ MIB->addImplicitDefUseOperands(*MBB.getParent());
// Build CFI if necessary.
MachineFunction &MF = *MBB.getParent();
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index d8d7acae5c9f..81163e9fcfab 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -527,19 +527,19 @@ namespace {
// Collect information about PHI nodes which can be transformed in
// rewriteLoopExitValues.
struct RewritePhi {
- PHINode *PN;
-
- // Ith incoming value.
- unsigned Ith;
-
- // Exit value after expansion.
- Value *Val;
-
- // High Cost when expansion.
- bool HighCost;
-
- RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
- : PN(P), Ith(I), Val(V), HighCost(H) {}
+ PHINode *PN; // For which PHI node is this replacement?
+ unsigned Ith; // For which incoming value?
+ const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
+ Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
+ bool HighCost; // Is this expansion a high-cost?
+
+ Value *Expansion = nullptr;
+ bool ValidRewrite = false;
+
+ RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
+ bool H)
+ : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
+ HighCost(H) {}
};
} // end anonymous namespace
@@ -671,41 +671,65 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
hasHardUserWithinLoop(L, Inst))
continue;
+ // Check if expansions of this SCEV would count as being high cost.
bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
- Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
-
- LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
- << '\n'
- << " LoopVal = " << *Inst << "\n");
-
- if (!isValidRewrite(Inst, ExitVal)) {
- DeadInsts.push_back(ExitVal);
- continue;
- }
-#ifndef NDEBUG
- // If we reuse an instruction from a loop which is neither L nor one of
- // its containing loops, we end up breaking LCSSA form for this loop by
- // creating a new use of its instruction.
- if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
- if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
- if (EVL != L)
- assert(EVL->contains(L) && "LCSSA breach detected!");
-#endif
+ // Note that we must not perform expansions until after
+ // we query *all* the costs, because if we perform temporary expansion
+ // inbetween, one that we might not intend to keep, said expansion
+ // *may* affect cost calculation of the the next SCEV's we'll query,
+ // and next SCEV may errneously get smaller cost.
// Collect all the candidate PHINodes to be rewritten.
- RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
+ RewritePhiSet.emplace_back(PN, i, ExitValue, Inst, HighCost);
}
}
}
+ // Now that we've done preliminary filtering and billed all the SCEV's,
+ // we can perform the last sanity check - the expansion must be valid.
+ for (RewritePhi &Phi : RewritePhiSet) {
+ Phi.Expansion = Rewriter.expandCodeFor(Phi.ExpansionSCEV, Phi.PN->getType(),
+ Phi.ExpansionPoint);
+
+ LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = "
+ << *(Phi.Expansion) << '\n'
+ << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
+
+ // FIXME: isValidRewrite() is a hack. it should be an assert, eventually.
+ Phi.ValidRewrite = isValidRewrite(Phi.ExpansionPoint, Phi.Expansion);
+ if (!Phi.ValidRewrite) {
+ DeadInsts.push_back(Phi.Expansion);
+ continue;
+ }
+
+#ifndef NDEBUG
+ // If we reuse an instruction from a loop which is neither L nor one of
+ // its containing loops, we end up breaking LCSSA form for this loop by
+ // creating a new use of its instruction.
+ if (auto *ExitInsn = dyn_cast<Instruction>(Phi.Expansion))
+ if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
+ if (EVL != L)
+ assert(EVL->contains(L) && "LCSSA breach detected!");
+#endif
+ }
+
+ // TODO: after isValidRewrite() is an assertion, evaluate whether
+ // it is beneficial to change how we calculate high-cost:
+ // if we have SCEV 'A' which we know we will expand, should we calculate
+ // the cost of other SCEV's after expanding SCEV 'A',
+ // thus potentially giving cost bonus to those other SCEV's?
+
bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
bool Changed = false;
// Transformation.
for (const RewritePhi &Phi : RewritePhiSet) {
+ if (!Phi.ValidRewrite)
+ continue;
+
PHINode *PN = Phi.PN;
- Value *ExitVal = Phi.Val;
+ Value *ExitVal = Phi.Expansion;
// Only do the rewrite when the ExitValue can be expanded cheaply.
// If LoopCanBeDel is true, rewrite exit value aggressively.
@@ -844,6 +868,8 @@ bool IndVarSimplify::canLoopBeDeleted(
// phase later. Skip it in the loop invariant check below.
bool found = false;
for (const RewritePhi &Phi : RewritePhiSet) {
+ if (!Phi.ValidRewrite)
+ continue;
unsigned i = Phi.Ith;
if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
found = true;
diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp
index da68d3713b40..d6b01a12b937 100644
--- a/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -369,7 +369,8 @@ Value *Mapper::mapValue(const Value *V) {
if (NewTy != IA->getFunctionType())
V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
- IA->hasSideEffects(), IA->isAlignStack());
+ IA->hasSideEffects(), IA->isAlignStack(),
+ IA->getDialect());
}
return getVM()[V] = const_cast<Value *>(V);